diff --git a/include/pressiodemoapps/impl/euler_3d_prob_class.hpp b/include/pressiodemoapps/impl/euler_3d_prob_class.hpp index 550033dc..f95f0790 100644 --- a/include/pressiodemoapps/impl/euler_3d_prob_class.hpp +++ b/include/pressiodemoapps/impl/euler_3d_prob_class.hpp @@ -15,6 +15,10 @@ #include "mixin_directional_flux_balance.hpp" #include "mixin_directional_flux_balance_jacobian.hpp" +#ifdef PRESSIODEMOAPPS_ENABLE_OPENMP +#include +#endif + namespace pressiodemoapps{ namespace ee{ namespace impl{ template @@ -50,8 +54,6 @@ class EigenEuler3dApp // calculate total num of dofs on sample and stencil mesh m_numDofStencilMesh = m_meshObj.stencilMeshSize() * numDofPerCell; m_numDofSampleMesh = m_meshObj.sampleMeshSize() * numDofPerCell; - - allocateStencilValuesContainer(); allocateGhosts(); } @@ -103,6 +105,11 @@ class EigenEuler3dApp jacobian_type * J) const { +#ifdef PRESSIODEMOAPPS_ENABLE_OPENMP +#pragma omp parallel +{ +#endif + // reconstructions values edge_rec_type uMinusHalfNeg, uMinusHalfPos; edge_rec_type uPlusHalfNeg, uPlusHalfPos; @@ -111,76 +118,49 @@ class EigenEuler3dApp flux_type fluxB, fluxF; //back, front y flux_type fluxD, fluxU; //down, up z - // flux jacobians - flux_jac_type fluxJacLNeg, fluxJacLPos; - flux_jac_type fluxJacRNeg, fluxJacRPos; - flux_jac_type fluxJacBNeg, fluxJacBPos; - flux_jac_type fluxJacFNeg, fluxJacFPos; - flux_jac_type fluxJacDNeg, fluxJacDPos; - flux_jac_type fluxJacUNeg, fluxJacUPos; - - fillGhosts(U); - - V.setZero(); +#ifdef PRESSIODEMOAPPS_ENABLE_OPENMP + ::pressiodemoapps::set_zero_omp(V); +#else + ::pressiodemoapps::set_zero(V); +#endif - int nonZerosCountBeforeComputing = 0; if (J){ - nonZerosCountBeforeComputing = J->nonZeros(); +#ifdef PRESSIODEMOAPPS_ENABLE_OPENMP + ::pressiodemoapps::set_zero_omp(*J); +#else ::pressiodemoapps::set_zero(*J); +#endif + } - // near boundary I have be careful because - // the jacobian can only be first order for now - if (m_recEn == InviscidFluxReconstruction::FirstOrder){ - velocityAndJacNearBDCellsImplFirstOrder(U, currentTime, V, *J, - fluxL, fluxR, fluxB, fluxF, fluxD, fluxU, - fluxJacLNeg, fluxJacLPos, - fluxJacRNeg, fluxJacRPos, - fluxJacBNeg, fluxJacBPos, - fluxJacFNeg, fluxJacFPos, - fluxJacDNeg, fluxJacDPos, - fluxJacUNeg, fluxJacUPos, - uMinusHalfNeg, uMinusHalfPos, - uPlusHalfNeg, uPlusHalfPos); - } - - else{ - velocityAndJacNearBDCellsImplDifferentScheme(U, currentTime, V, *J, - fluxL, fluxR, fluxB, fluxF, fluxD, fluxU, - fluxJacLNeg, fluxJacLPos, - fluxJacRNeg, fluxJacRPos, - fluxJacBNeg, fluxJacBPos, - fluxJacFNeg, fluxJacFPos, - fluxJacDNeg, fluxJacDPos, - fluxJacUNeg, fluxJacUPos, - uMinusHalfNeg, uMinusHalfPos, - uPlusHalfNeg, uPlusHalfPos); - } + fillGhosts(U); - velocityAndJacInnerCellsImpl(U, currentTime, V, *J, - fluxL, fluxR, fluxB, fluxF, fluxD, fluxU, - fluxJacLNeg, fluxJacLPos, - fluxJacRNeg, fluxJacRPos, - fluxJacBNeg, fluxJacBPos, - fluxJacFNeg, fluxJacFPos, - fluxJacDNeg, fluxJacDPos, - fluxJacUNeg, fluxJacUPos, - uMinusHalfNeg, uMinusHalfPos, - uPlusHalfNeg, uPlusHalfPos); - - assert(J->nonZeros() == nonZerosCountBeforeComputing); + if (J){ + velocityAndJacobianImpl(U, currentTime, V, *J, + fluxL, fluxR, + fluxB, fluxF, + fluxD, fluxU, + uMinusHalfNeg, uMinusHalfPos, + uPlusHalfNeg, uPlusHalfPos); } else{ - velocityOnlyNearBdCellsImpl(U, currentTime, V, - fluxL, fluxR, fluxB, fluxF, fluxD, fluxU, - uMinusHalfNeg, uMinusHalfPos, - uPlusHalfNeg, uPlusHalfPos); + if (!m_meshObj.isPeriodic()){ + velocityOnlyNearBdCellsImpl(U, currentTime, V, + fluxL, fluxR, fluxB, fluxF, fluxD, fluxU, + uMinusHalfNeg, uMinusHalfPos, + uPlusHalfNeg, uPlusHalfPos); + } velocityOnlyInnerCellsImpl(U, currentTime, V, fluxL, fluxR, fluxB, fluxF, fluxD, fluxU, uMinusHalfNeg, uMinusHalfPos, uPlusHalfNeg, uPlusHalfPos); } + +#ifdef PRESSIODEMOAPPS_ENABLE_OPENMP +}//end omp parallel +#endif + } private: @@ -190,7 +170,7 @@ class EigenEuler3dApp // for inner cells, the Jacobian is of the same scheme // wanted by the user, no special treatment is needed - const scalar_type zero = 0; + const auto zero = static_cast(0); const auto & graph = m_meshObj.graph(); const auto & targetGraphRows = m_meshObj.graphRowsOfCellsAwayFromBd(); for (std::size_t it=0; it void initializeJacobianForNearBoundaryCells(std::vector & trList) { - const scalar_type zero = 0; + + const auto zero = static_cast(0); const auto & graph = m_meshObj.graph(); const auto & targetGraphRows = m_meshObj.graphRowsOfCellsNearBd(); for (std::size_t it=0; it; + using ghost_filler_t = ::pressiodemoapps::ee::impl::Ghost3dSedov< + U_t, MeshType, ghost_container_type>; ghost_filler_t ghF(stencilSize, U, m_meshObj, m_ghostLeft, m_ghostRight, m_ghostBack, m_ghostFront, @@ -272,11 +254,17 @@ class EigenEuler3dApp const auto & rowsBd = m_meshObj.graphRowsOfCellsNearBd(); if (stencilSize==3){ +#ifdef PRESSIODEMOAPPS_ENABLE_OPENMP +#pragma omp for schedule(static) +#endif for (int it=0; it(rowsBd[it], it); } } else if (stencilSize==5){ +#ifdef PRESSIODEMOAPPS_ENABLE_OPENMP +#pragma omp for schedule(static) +#endif for (int it=0; it(rowsBd[it], it); } @@ -286,6 +274,85 @@ class EigenEuler3dApp } } + template + void velocityAndJacobianImpl(const U_t & U, + const scalar_type currentTime, + V_t & V, + jacobian_type & J, + flux_type & fluxL, + flux_type & fluxR, + flux_type & fluxB, + flux_type & fluxF, + flux_type & fluxD, + flux_type & fluxU, + edge_rec_type & uMinusHalfNeg, + edge_rec_type & uMinusHalfPos, + edge_rec_type & uPlusHalfNeg, + edge_rec_type & uPlusHalfPos) const + { + + // flux jacobians + flux_jac_type fluxJacLNeg, fluxJacLPos; + flux_jac_type fluxJacRNeg, fluxJacRPos; + flux_jac_type fluxJacBNeg, fluxJacBPos; + flux_jac_type fluxJacFNeg, fluxJacFPos; + flux_jac_type fluxJacDNeg, fluxJacDPos; + flux_jac_type fluxJacUNeg, fluxJacUPos; + + int nonZerosCountBeforeComputing = 0; + nonZerosCountBeforeComputing = J.nonZeros(); + + // near boundary I have be careful because + // the jacobian can only be first order for now + // only need to do near-BD cells if there are any + if (!m_meshObj.isPeriodic()){ + if (m_recEn == InviscidFluxReconstruction::FirstOrder){ + velocityAndJacNearBDCellsImplFirstOrder(U, currentTime, V, J, + fluxL, fluxR, + fluxB, fluxF, + fluxD, fluxU, + fluxJacLNeg, fluxJacLPos, + fluxJacRNeg, fluxJacRPos, + fluxJacBNeg, fluxJacBPos, + fluxJacFNeg, fluxJacFPos, + fluxJacDNeg, fluxJacDPos, + fluxJacUNeg, fluxJacUPos, + uMinusHalfNeg, uMinusHalfPos, + uPlusHalfNeg, uPlusHalfPos); + } + + else{ + velocityAndJacNearBDCellsImplDifferentScheme(U, currentTime, V, J, + fluxL, fluxR, + fluxB, fluxF, + fluxD, fluxU, + fluxJacLNeg, fluxJacLPos, + fluxJacRNeg, fluxJacRPos, + fluxJacBNeg, fluxJacBPos, + fluxJacFNeg, fluxJacFPos, + fluxJacDNeg, fluxJacDPos, + fluxJacUNeg, fluxJacUPos, + uMinusHalfNeg, uMinusHalfPos, + uPlusHalfNeg, uPlusHalfPos); + } + } + + velocityAndJacInnerCellsImpl(U, currentTime, V, J, + fluxL, fluxR, + fluxB, fluxF, + fluxD, fluxU, + fluxJacLNeg, fluxJacLPos, + fluxJacRNeg, fluxJacRPos, + fluxJacBNeg, fluxJacBPos, + fluxJacFNeg, fluxJacFPos, + fluxJacDNeg, fluxJacDPos, + fluxJacUNeg, fluxJacUPos, + uMinusHalfNeg, uMinusHalfPos, + uPlusHalfNeg, uPlusHalfPos); + + assert(J.nonZeros() == nonZerosCountBeforeComputing); + } + template void velocityAndJacInnerCellsImpl(const U_t & U, const scalar_type currentTime, @@ -390,6 +457,9 @@ class EigenEuler3dApp ); const auto & graphRows = m_meshObj.graphRowsOfCellsAwayFromBd(); +#ifdef PRESSIODEMOAPPS_ENABLE_OPENMP +#pragma omp for schedule(static) +#endif for (decltype(graphRows.size()) it=0; it; stencil_filler_t FillStencilX(reconstructionTypeToStencilSize(m_recEn), U, m_meshObj, m_ghostLeft, m_ghostRight, - m_stencilVals, xAxis); + stencilVals, xAxis); stencil_filler_t FillStencilY(reconstructionTypeToStencilSize(m_recEn), U, m_meshObj, m_ghostBack, m_ghostFront, - m_stencilVals, yAxis); + stencilVals, yAxis); stencil_filler_t FillStencilZ(reconstructionTypeToStencilSize(m_recEn), U, m_meshObj, m_ghostDown, m_ghostUp, - m_stencilVals, zAxis); + stencilVals, zAxis); using functor_type = pda::impl::ComputeDirectionalFluxBalance< @@ -468,7 +541,7 @@ class EigenEuler3dApp m_fluxEn, normalX_, m_gamma, fluxL, fluxR, fluxJacLNeg, fluxJacLPos, fluxJacRNeg, fluxJacRPos, /* end args for flux */ - toReconstructionScheme(m_recEn), m_stencilVals, + toReconstructionScheme(m_recEn), stencilVals, uMinusHalfNeg, uMinusHalfPos, uPlusHalfNeg, uPlusHalfPos /* end args for reconstructor */ ); @@ -480,7 +553,7 @@ class EigenEuler3dApp m_fluxEn, normalY_, m_gamma, fluxB, fluxF, fluxJacBNeg, fluxJacBPos, fluxJacFNeg, fluxJacFPos, /* end args for flux */ - toReconstructionScheme(m_recEn), m_stencilVals, + toReconstructionScheme(m_recEn), stencilVals, uMinusHalfNeg, uMinusHalfPos, uPlusHalfNeg, uPlusHalfPos /* end args for reconstructor */ ); @@ -492,7 +565,7 @@ class EigenEuler3dApp m_fluxEn, normalZ_, m_gamma, fluxD, fluxU, fluxJacDNeg, fluxJacDPos, fluxJacUNeg, fluxJacUPos, /* end args for flux */ - toReconstructionScheme(m_recEn), m_stencilVals, + toReconstructionScheme(m_recEn), stencilVals, uMinusHalfNeg, uMinusHalfPos, uPlusHalfNeg, uPlusHalfPos /* end args for reconstructor */ ); @@ -512,23 +585,29 @@ class EigenEuler3dApp bcCellJacFactorsReflectiveZ[3] = static_cast(-1); const auto & graphRows = m_meshObj.graphRowsOfCellsNearBd(); +#ifdef PRESSIODEMOAPPS_ENABLE_OPENMP +#pragma omp for schedule(static) +#endif for (decltype(graphRows.size()) it=0; it; stencil_filler_t FillStencilX(reconstructionTypeToStencilSize(m_recEn), U, m_meshObj, m_ghostLeft, m_ghostRight, - m_stencilVals, xAxis); + stencilVals, xAxis); stencil_filler_t FillStencilY(reconstructionTypeToStencilSize(m_recEn), U, m_meshObj, m_ghostBack, m_ghostFront, - m_stencilVals, yAxis); + stencilVals, yAxis); stencil_filler_t FillStencilZ(reconstructionTypeToStencilSize(m_recEn), U, m_meshObj, m_ghostDown, m_ghostUp, - m_stencilVals, zAxis); + stencilVals, zAxis); using functor_type = pda::impl::ComputeDirectionalFluxBalance< @@ -833,7 +924,7 @@ class EigenEuler3dApp /* end args for velo */ m_fluxEn, normalX_, m_gamma, fluxL, fluxR, /* end args for flux */ - toReconstructionScheme(m_recEn), m_stencilVals, + toReconstructionScheme(m_recEn), stencilVals, uMinusHalfNeg, uMinusHalfPos, uPlusHalfNeg, uPlusHalfPos /* end args for reconstructor */ ); @@ -842,7 +933,7 @@ class EigenEuler3dApp /* end args for velo */ m_fluxEn, normalY_, m_gamma, fluxB, fluxF, /* end args for flux */ - toReconstructionScheme(m_recEn), m_stencilVals, + toReconstructionScheme(m_recEn), stencilVals, uMinusHalfNeg, uMinusHalfPos, uPlusHalfNeg, uPlusHalfPos /* end args for reconstructor */ ); @@ -851,12 +942,15 @@ class EigenEuler3dApp /* end args for velo */ m_fluxEn, normalZ_, m_gamma, fluxD, fluxU, /* end args for flux */ - toReconstructionScheme(m_recEn), m_stencilVals, + toReconstructionScheme(m_recEn), stencilVals, uMinusHalfNeg, uMinusHalfPos, uPlusHalfNeg, uPlusHalfPos /* end args for reconstructor */ ); const auto & graphRows = m_meshObj.graphRowsOfCellsNearBd(); +#ifdef PRESSIODEMOAPPS_ENABLE_OPENMP +#pragma omp for schedule(static) +#endif for (decltype(graphRows.size()) it=0; it