diff --git a/docs/source/run/parameters.rst b/docs/source/run/parameters.rst index 360d73c057..ddd21708a3 100644 --- a/docs/source/run/parameters.rst +++ b/docs/source/run/parameters.rst @@ -932,6 +932,11 @@ Thereby, "[]" stands for averaging over all particles in the current slice, Averages and totals over all slices are also provided for convenience under the respective ``average`` and ``total`` subcategories. +For the field in-situ diagnostics, the following quantities are calculated per slice and stored: +``[Ex^2], [Ey^2], [Ez^2], [Bx^2], [By^2], [Bz^2], [ExmBy^2], [EypBx^2], [Ez*jz_beam]``. +Thereby, "[]" stands for averaging over all cells in the current slice. +These quantities can be used to calculate the energy stored in the fields. + Additionally, some metadata is also available: ``time, step, n_slices, charge, mass, z_lo, z_hi, normalized_density_factor``. ``time`` and ``step`` refers to the physical time of the simulation and step number of the diff --git a/src/fields/Fields.H b/src/fields/Fields.H index 5862d82d01..7a18b5be44 100644 --- a/src/fields/Fields.H +++ b/src/fields/Fields.H @@ -521,6 +521,8 @@ private: bool m_explicit = false; /** If any plasma species has a neutralizing background */ bool m_any_neutral_background = false; + /** Number of real field properties for in-situ per-slice reduced diagnostics. */ + static constexpr int m_insitu_nrp = 9; /** How often the insitu field diagnostics should be computed and written * Default is 0, meaning no output */ int m_insitu_period {0}; diff --git a/src/fields/Fields.cpp b/src/fields/Fields.cpp index f740db99f2..9851858e3b 100644 --- a/src/fields/Fields.cpp +++ b/src/fields/Fields.cpp @@ -15,6 +15,7 @@ #include "utils/Constants.H" #include "utils/GPUUtil.H" #include "utils/InsituUtil.H" +#include "utils/TemplateUtil.H" #include "particles/particles_utils/ShapeFactors.H" #ifdef HIPACE_USE_OPENPMD # include @@ -205,8 +206,8 @@ Fields::AllocData ( "Must choose a different field insitu file prefix compared to the full diagnostics"); #endif // Allocate memory for in-situ diagnostics - m_insitu_rdata.resize(geom.Domain().length(2)*9, 0.); - m_insitu_sum_rdata.resize(9, 0.); + m_insitu_rdata.resize(geom.Domain().length(2)*m_insitu_nrp, 0.); + m_insitu_sum_rdata.resize(m_insitu_nrp, 0.); } } @@ -725,12 +726,7 @@ Fields::SetBoundaryCondition (amrex::Vector const& geom, const const amrex::Real x = (i * dx + poff_x) * scale; const amrex::Real y = (j * dy + poff_y) * scale; if (x*x + y*y > cutoff_sq) { - return MultipoleTuple{0._rt, - 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, - 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, - 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, - 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt - }; + return MakeZeroTuple(MultipoleTuple{}); } amrex::Real s_v = arr_staging_area(i, j); return GetMultipoleCoeffs(s_v, x, y); @@ -1288,14 +1284,8 @@ Fields::InSituComputeDiags (int step, amrex::Real time, int islice, const amrex: amrex::MultiFab& slicemf = getSlices(lev); - // Tuple contains: - // 0, 1, 2, 3, 4, 5, 6, 7 8 - // , , , , , , , , - amrex::ReduceOps reduce_op; - amrex::ReduceData reduce_data(reduce_op); + TypeMultiplier reduce_op; + TypeMultiplier reduce_data(reduce_op); using ReduceTuple = typename decltype(reduce_data)::Type; for ( amrex::MFIter mfi(slicemf, DfltMfi); mfi.isValid(); ++mfi ) { @@ -1304,40 +1294,28 @@ Fields::InSituComputeDiags (int step, amrex::Real time, int islice, const amrex: mfi.tilebox(), reduce_data, [=] AMREX_GPU_DEVICE (int i, int j, int) -> ReduceTuple { - return { - pow<2>(arr(i,j,ExmBy) + arr(i,j,By) * clight), - pow<2>(arr(i,j,EypBx) - arr(i,j,Bx) * clight), - pow<2>(arr(i,j,Ez)), - pow<2>(arr(i,j,Bx)), - pow<2>(arr(i,j,By)), - pow<2>(arr(i,j,Bz)), - pow<2>(arr(i,j,ExmBy)), - pow<2>(arr(i,j,EypBx)), - arr(i,j,Ez)*arr(i,j,jz_beam) + return { // Tuple contains: + pow<2>(arr(i,j,ExmBy) + arr(i,j,By) * clight), // 0 [Ex^2] + pow<2>(arr(i,j,EypBx) - arr(i,j,Bx) * clight), // 1 [Ey^2] + pow<2>(arr(i,j,Ez)), // 2 [Ez^2] + pow<2>(arr(i,j,Bx)), // 3 [Bx^2] + pow<2>(arr(i,j,By)), // 4 [By^2] + pow<2>(arr(i,j,Bz)), // 5 [Bz^2] + pow<2>(arr(i,j,ExmBy)), // 6 [ExmBy^2] + pow<2>(arr(i,j,EypBx)), // 7 [EypBx^2] + arr(i,j,Ez)*arr(i,j,jz_beam) // 8 [Ez*jz_beam] }; }); } ReduceTuple a = reduce_data.value(); - m_insitu_rdata[islice ] = amrex::get<0>(a)*dxdydz; - m_insitu_rdata[islice+1*nslices] = amrex::get<1>(a)*dxdydz; - m_insitu_rdata[islice+2*nslices] = amrex::get<2>(a)*dxdydz; - m_insitu_rdata[islice+3*nslices] = amrex::get<3>(a)*dxdydz; - m_insitu_rdata[islice+4*nslices] = amrex::get<4>(a)*dxdydz; - m_insitu_rdata[islice+5*nslices] = amrex::get<5>(a)*dxdydz; - m_insitu_rdata[islice+6*nslices] = amrex::get<6>(a)*dxdydz; - m_insitu_rdata[islice+7*nslices] = amrex::get<7>(a)*dxdydz; - m_insitu_rdata[islice+8*nslices] = amrex::get<8>(a)*dxdydz; - m_insitu_sum_rdata[0] += amrex::get<0>(a)*dxdydz; - m_insitu_sum_rdata[1] += amrex::get<1>(a)*dxdydz; - m_insitu_sum_rdata[2] += amrex::get<2>(a)*dxdydz; - m_insitu_sum_rdata[3] += amrex::get<3>(a)*dxdydz; - m_insitu_sum_rdata[4] += amrex::get<4>(a)*dxdydz; - m_insitu_sum_rdata[5] += amrex::get<5>(a)*dxdydz; - m_insitu_sum_rdata[6] += amrex::get<6>(a)*dxdydz; - m_insitu_sum_rdata[7] += amrex::get<7>(a)*dxdydz; - m_insitu_sum_rdata[8] += amrex::get<8>(a)*dxdydz; + amrex::constexpr_for<0, m_insitu_nrp>( + [&] (auto idx) { + m_insitu_rdata[islice + idx.value * nslices] = amrex::get(a)*dxdydz; + m_insitu_sum_rdata[idx.value] += amrex::get(a)*dxdydz; + } + ); } void diff --git a/src/fields/OpenBoundary.H b/src/fields/OpenBoundary.H index f6668bc3f6..d5b8b92b9e 100644 --- a/src/fields/OpenBoundary.H +++ b/src/fields/OpenBoundary.H @@ -8,6 +8,8 @@ #ifndef OPEN_BOUNDARY_H_ #define OPEN_BOUNDARY_H_ +#include "utils/TemplateUtil.H" + #include #include @@ -27,50 +29,9 @@ amrex::Real pow (amrex::Real base) { return 0._rt; //shut up compiler } -using MultipoleTuple = amrex::GpuTuple< - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real>; - -using MultipoleReduceOpList = amrex::TypeList< - amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, - amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, - amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, - amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, - amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, - amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, - amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, - amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, - amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, - amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, - amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, - amrex::ReduceOpSum, amrex::ReduceOpSum, amrex::ReduceOpSum, - amrex::ReduceOpSum>; - -using MultipoleReduceTypeList = amrex::TypeList< - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real, amrex::Real, amrex::Real, - amrex::Real>; +using MultipoleTuple = TypeMultiplier; +using MultipoleReduceOpList = TypeMultiplier; +using MultipoleReduceTypeList = TypeMultiplier; // To solve a poisson equation (d^2/dx^2 + d^2/dy^2)phi = source with open boundary conditions for // phi(x,y), the source field at (x',y') is integrated together with the Green's function diff --git a/src/particles/beam/BeamParticleContainer.H b/src/particles/beam/BeamParticleContainer.H index 18af552cbd..4daa81a5cb 100644 --- a/src/particles/beam/BeamParticleContainer.H +++ b/src/particles/beam/BeamParticleContainer.H @@ -301,27 +301,12 @@ private: int m_nslices; /**< number of z slices of the domain */ /** Number of real beam properties for in-situ per-slice reduced diagnostics. */ - int m_insitu_nrp {18}; + static constexpr int m_insitu_nrp = 18; /** Number of int beam properties for in-situ per-slice reduced diagnostics. */ - int m_insitu_nip {1}; - /** Per-slice real beam properties: - * 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - * sum(w), [x], [x^2], [y], [y^2], [z], [z^2], [ux], [ux^2], [uy], [uy^2], [uz], [uz^2], - * - * 13, 14, 15, 16, 17 - * [x*ux], [y*uy], [z*uz], [ga], [ga^2] - * where [] means average over all particles within slice. - * Per-slice emittance: sqrt( abs( ([x^2]-[x]^2) * ([ux^2]-[ux]^2) - ([x*ux]-[x][ux])^2 ) ). - * Projected emittance: Same as above AFTER averaging all these quantities over slices. - * Energy spread: sqrt([ga^2]-[ga]^2), and same as above. - * Momentum spread: sqrt([uz^2]-[uz]^2), and same as above. - */ + static constexpr int m_insitu_nip = 1; + /** Per-slice real beam properties */ amrex::Vector m_insitu_rdata; - /** Per-slice int beam properties: - * 0 - * Np - * Np: number of particles in this slice - */ + /** Per-slice int beam properties */ amrex::Vector m_insitu_idata; /** Sum of all per-slice real beam properties */ amrex::Vector m_insitu_sum_rdata; diff --git a/src/particles/beam/BeamParticleContainer.cpp b/src/particles/beam/BeamParticleContainer.cpp index aee457f5b9..750a23b024 100644 --- a/src/particles/beam/BeamParticleContainer.cpp +++ b/src/particles/beam/BeamParticleContainer.cpp @@ -12,6 +12,7 @@ #include "Hipace.H" #include "utils/HipaceProfilerWrapper.H" #include "utils/InsituUtil.H" +#include "utils/TemplateUtil.H" #ifdef HIPACE_USE_OPENPMD # include #endif @@ -450,116 +451,71 @@ BeamParticleContainer::InSituComputeDiags (int islice) AMREX_ALWAYS_ASSERT(m_insitu_rdata.size()>0 && m_insitu_idata.size()>0 && m_insitu_sum_rdata.size()>0 && m_insitu_sum_idata.size()>0); - const amrex::Real insitu_radius = m_insitu_radius; + const amrex::Real insitu_radius_sq = m_insitu_radius * m_insitu_radius; const PhysConst phys_const = get_phys_const(); const amrex::Real clight_inv = 1.0_rt/phys_const.c; - const amrex::Real clightsq_inv = 1.0_rt/(phys_const.c*phys_const.c); - - auto const& soa = getBeamSlice(WhichBeamSlice::This).GetStructOfArrays(); - const auto pos_x = soa.GetRealData(BeamIdx::x).data(); - const auto pos_y = soa.GetRealData(BeamIdx::y).data(); - const auto pos_z = soa.GetRealData(BeamIdx::z).data(); - const auto wp = soa.GetRealData(BeamIdx::w).data(); - const auto uxp = soa.GetRealData(BeamIdx::ux).data(); - const auto uyp = soa.GetRealData(BeamIdx::uy).data(); - const auto uzp = soa.GetRealData(BeamIdx::uz).data(); - auto idcpup = soa.GetIdCPUData().data(); - - // Tuple contains: - // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - // sum(w), , , , , , , , , , , , , - // - // 13, 14, 15, 16, 17, 18 - // , , , , , np - amrex::ReduceOps reduce_op; - amrex::ReduceData reduce_data(reduce_op); + const auto ptd = getBeamSlice(WhichBeamSlice::This).getParticleTileData(); + + TypeMultiplier reduce_op; + TypeMultiplier reduce_data(reduce_op); using ReduceTuple = typename decltype(reduce_data)::Type; reduce_op.eval( getNumParticles(WhichBeamSlice::This), reduce_data, [=] AMREX_GPU_DEVICE (int ip) -> ReduceTuple { - if (amrex::ConstParticleIDWrapper(idcpup[ip]) < 0 || - pos_x[ip]*pos_x[ip] + pos_y[ip]*pos_y[ip] > insitu_radius*insitu_radius) { - return{0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, - 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0}; + const amrex::Real x = ptd.pos(0, ip); + const amrex::Real y = ptd.pos(1, ip); + const amrex::Real z = ptd.pos(2, ip); + const amrex::Real ux = ptd.rdata(BeamIdx::ux)[ip] * clight_inv; // proper velocity to u + const amrex::Real uy = ptd.rdata(BeamIdx::uy)[ip] * clight_inv; + const amrex::Real uz = ptd.rdata(BeamIdx::uz)[ip] * clight_inv; + const amrex::Real w = ptd.rdata(BeamIdx::w)[ip]; + + if (ptd.id(ip) < 0 || x*x + y*y > insitu_radius_sq) { + return MakeZeroTuple(ReduceTuple{}); } - const amrex::Real gamma = std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq_inv - + uyp[ip]*uyp[ip]*clightsq_inv - + uzp[ip]*uzp[ip]*clightsq_inv); - return {wp[ip], - wp[ip]*pos_x[ip], - wp[ip]*pos_x[ip]*pos_x[ip], - wp[ip]*pos_y[ip], - wp[ip]*pos_y[ip]*pos_y[ip], - wp[ip]*pos_z[ip], - wp[ip]*pos_z[ip]*pos_z[ip], - wp[ip]*uxp[ip]*clight_inv, - wp[ip]*uxp[ip]*uxp[ip]*clightsq_inv, - wp[ip]*uyp[ip]*clight_inv, - wp[ip]*uyp[ip]*uyp[ip]*clightsq_inv, - wp[ip]*uzp[ip]*clight_inv, - wp[ip]*uzp[ip]*uzp[ip]*clightsq_inv, - wp[ip]*pos_x[ip]*uxp[ip]*clight_inv, - wp[ip]*pos_y[ip]*uyp[ip]*clight_inv, - wp[ip]*pos_z[ip]*uzp[ip]*clight_inv, - wp[ip]*gamma, - wp[ip]*gamma*gamma, - 1}; + const amrex::Real gamma = std::sqrt(1.0_rt + ux*ux + uy*uy + uz*uz); + return { // Tuple contains: + w, // 0 sum(w) + w*x, // 1 [x] + w*x*x, // 2 [x^2] + w*y, // 3 [y] + w*y*y, // 4 [y^2] + w*z, // 5 [z] + w*z*z, // 6 [z^2] + w*ux, // 7 [ux] + w*ux*ux, // 8 [ux^2] + w*uy, // 9 [uy] + w*uy*uy, // 10 [uy^2] + w*uz, // 11 [uz] + w*uz*uz, // 12 [uz^2] + w*x*ux, // 13 [x*ux] + w*y*uy, // 14 [y*uy] + w*z*uz, // 15 [z*uz] + w*gamma, // 16 [ga] + w*gamma*gamma, // 17 [ga^2] + 1 // 18 Np + }; }); ReduceTuple a = reduce_data.value(); - const amrex::Real sum_w0 = amrex::get< 0>(a); - const amrex::Real sum_w_inv = sum_w0<=0._rt ? 0._rt : 1._rt/sum_w0; - - m_insitu_rdata[islice ] = sum_w0; - m_insitu_rdata[islice+ 1*m_nslices] = amrex::get< 1>(a)*sum_w_inv; - m_insitu_rdata[islice+ 2*m_nslices] = amrex::get< 2>(a)*sum_w_inv; - m_insitu_rdata[islice+ 3*m_nslices] = amrex::get< 3>(a)*sum_w_inv; - m_insitu_rdata[islice+ 4*m_nslices] = amrex::get< 4>(a)*sum_w_inv; - m_insitu_rdata[islice+ 5*m_nslices] = amrex::get< 5>(a)*sum_w_inv; - m_insitu_rdata[islice+ 6*m_nslices] = amrex::get< 6>(a)*sum_w_inv; - m_insitu_rdata[islice+ 7*m_nslices] = amrex::get< 7>(a)*sum_w_inv; - m_insitu_rdata[islice+ 8*m_nslices] = amrex::get< 8>(a)*sum_w_inv; - m_insitu_rdata[islice+ 9*m_nslices] = amrex::get< 9>(a)*sum_w_inv; - m_insitu_rdata[islice+10*m_nslices] = amrex::get<10>(a)*sum_w_inv; - m_insitu_rdata[islice+11*m_nslices] = amrex::get<11>(a)*sum_w_inv; - m_insitu_rdata[islice+12*m_nslices] = amrex::get<12>(a)*sum_w_inv; - m_insitu_rdata[islice+13*m_nslices] = amrex::get<13>(a)*sum_w_inv; - m_insitu_rdata[islice+14*m_nslices] = amrex::get<14>(a)*sum_w_inv; - m_insitu_rdata[islice+15*m_nslices] = amrex::get<15>(a)*sum_w_inv; - m_insitu_rdata[islice+16*m_nslices] = amrex::get<16>(a)*sum_w_inv; - m_insitu_rdata[islice+17*m_nslices] = amrex::get<17>(a)*sum_w_inv; - m_insitu_idata[islice ] = amrex::get<18>(a); - - m_insitu_sum_rdata[ 0] += sum_w0; - m_insitu_sum_rdata[ 1] += amrex::get< 1>(a); - m_insitu_sum_rdata[ 2] += amrex::get< 2>(a); - m_insitu_sum_rdata[ 3] += amrex::get< 3>(a); - m_insitu_sum_rdata[ 4] += amrex::get< 4>(a); - m_insitu_sum_rdata[ 5] += amrex::get< 5>(a); - m_insitu_sum_rdata[ 6] += amrex::get< 6>(a); - m_insitu_sum_rdata[ 7] += amrex::get< 7>(a); - m_insitu_sum_rdata[ 8] += amrex::get< 8>(a); - m_insitu_sum_rdata[ 9] += amrex::get< 9>(a); - m_insitu_sum_rdata[10] += amrex::get<10>(a); - m_insitu_sum_rdata[11] += amrex::get<11>(a); - m_insitu_sum_rdata[12] += amrex::get<12>(a); - m_insitu_sum_rdata[13] += amrex::get<13>(a); - m_insitu_sum_rdata[14] += amrex::get<14>(a); - m_insitu_sum_rdata[15] += amrex::get<15>(a); - m_insitu_sum_rdata[16] += amrex::get<16>(a); - m_insitu_sum_rdata[17] += amrex::get<17>(a); - m_insitu_sum_idata[ 0] += amrex::get<18>(a); + const amrex::Real sum_w_inv = amrex::get<0>(a) <= 0._rt ? 0._rt : 1._rt / amrex::get<0>(a); + + amrex::constexpr_for<0, m_insitu_nrp>( + [&] (auto idx) { + m_insitu_rdata[islice + idx.value * m_nslices] = amrex::get(a) * + // sum(w) is not multiplied by sum_w_inv + ( idx.value == 0 ? 1 : sum_w_inv ); + m_insitu_sum_rdata[idx.value] += amrex::get(a); + } + ); + + amrex::constexpr_for<0, m_insitu_nip>( + [&] (auto idx) { + m_insitu_idata[islice + idx.value * m_nslices] = amrex::get(a); + m_insitu_sum_idata[idx.value] += amrex::get(a); + } + ); } void diff --git a/src/particles/plasma/PlasmaParticleContainer.H b/src/particles/plasma/PlasmaParticleContainer.H index 49e32ba8d2..6cdc6795d9 100644 --- a/src/particles/plasma/PlasmaParticleContainer.H +++ b/src/particles/plasma/PlasmaParticleContainer.H @@ -198,23 +198,12 @@ private: std::string m_name; /**< name of the species */ int m_nslices; /**< number of z slices of the domain */ /** Number of real plasma properties for in-situ per-slice reduced diagnostics. */ - int m_insitu_nrp {14}; + static constexpr int m_insitu_nrp = 14; /** Number of int plasma properties for in-situ per-slice reduced diagnostics. */ - int m_insitu_nip {1}; - /** Per-slice real plasma properties: - * 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 - * sum(w), [x], [x^2], [y], [y^2], [ux], [ux^2], [uy], [uy^2], [x*ux], [y*uy], [ga], [ga^2], [(ga-1)*(1-vz)] - * where [] means average over all particles within slice. - * Per-slice emittance: sqrt( abs( ([x^2]-[x]^2) * ([ux^2]-[ux]^2) - ([x*ux]-[x][ux])^2 ) ). - * Projected emittance: Same as above AFTER averaging all these quantities over slices. - * Energy spread: sqrt([ga^2]-[ga]^2), and same as above. - */ + static constexpr int m_insitu_nip = 1; + /** Per-slice real plasma properties */ amrex::Vector m_insitu_rdata; - /** Per-slice int plasma properties: - * 0 - * Np - * Np: number of particles in this slice - */ + /** Per-slice int plasma properties */ amrex::Vector m_insitu_idata; /** Sum of all per-slice real plasma properties */ amrex::Vector m_insitu_sum_rdata; diff --git a/src/particles/plasma/PlasmaParticleContainer.cpp b/src/particles/plasma/PlasmaParticleContainer.cpp index f7cb0e7838..39c0b3d9e2 100644 --- a/src/particles/plasma/PlasmaParticleContainer.cpp +++ b/src/particles/plasma/PlasmaParticleContainer.cpp @@ -13,6 +13,7 @@ #include "utils/DeprecatedInput.H" #include "utils/GPUUtil.H" #include "utils/InsituUtil.H" +#include "utils/TemplateUtil.H" #ifdef HIPACE_USE_OPENPMD # include #endif @@ -459,7 +460,7 @@ PlasmaParticleContainer::InSituComputeDiags (int islice) AMREX_ALWAYS_ASSERT(m_insitu_rdata.size()>0 && m_insitu_idata.size()>0 && m_insitu_sum_rdata.size()>0 && m_insitu_sum_idata.size()>0); - const amrex::Real insitu_radius = m_insitu_radius; + const amrex::Real insitu_radius_sq = m_insitu_radius * m_insitu_radius; const PhysConst phys_const = get_phys_const(); const amrex::Real clight_inv = 1.0_rt/phys_const.c; @@ -471,95 +472,67 @@ PlasmaParticleContainer::InSituComputeDiags (int islice) amrex::Long const num_particles = pti.numParticles(); - // Tuple contains: - // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - // sum(w), , , , , , , , , , , , - // 12, 13, 14 - // , <(ga-1)*(1-vz)>, np - amrex::ReduceOps reduce_op; - amrex::ReduceData reduce_data(reduce_op); + TypeMultiplier reduce_op; + TypeMultiplier reduce_data(reduce_op); using ReduceTuple = typename decltype(reduce_data)::Type; reduce_op.eval( num_particles, reduce_data, [=] AMREX_GPU_DEVICE (int ip) -> ReduceTuple { - const amrex::Real xp = ptd.pos(0, ip); - const amrex::Real yp = ptd.pos(1, ip); - const amrex::Real uxp = ptd.rdata(PlasmaIdx::ux )[ip] * clight_inv; // proper velocity to u - const amrex::Real uyp = ptd.rdata(PlasmaIdx::uy )[ip] * clight_inv; - const amrex::Real psip = ptd.rdata(PlasmaIdx::psi)[ip]; - - if (ptd.id(ip) < 0 || xp*xp + yp*yp > insitu_radius*insitu_radius) { - return{0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, - 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0._rt, 0}; + const amrex::Real x = ptd.pos(0, ip); + const amrex::Real y = ptd.pos(1, ip); + const amrex::Real ux = ptd.rdata(PlasmaIdx::ux)[ip] * clight_inv; // proper velocity to u + const amrex::Real uy = ptd.rdata(PlasmaIdx::uy)[ip] * clight_inv; + const amrex::Real psi = ptd.rdata(PlasmaIdx::psi)[ip]; + + if (ptd.id(ip) < 0 || x*x + y*y > insitu_radius_sq) { + return MakeZeroTuple(ReduceTuple{}); } - // particle's Lorentz factor - const amrex::Real gamma = (1.0_rt + uxp*uxp + uyp*uyp + psip*psip)/(2.0_rt*psip); - amrex::Real uzp = (gamma - psip); // the *c from uz cancels with the /c from the proper velocity conversion - const amrex::Real wp = ptd.rdata(PlasmaIdx::w)[ip] * gamma/psip; // quasi-static weighting factor + const amrex::Real gamma = (1.0_rt + ux*ux + uy*uy + psi*psi)/(2.0_rt*psi); + // the *c from uz cancels with the /c from the proper velocity conversion + const amrex::Real uz = (gamma - psi); + // weight with quasi-static weighting factor + const amrex::Real w = ptd.rdata(PlasmaIdx::w)[ip] * gamma/psi; // no quasi-static weighting factor to calculate quasi-static energy const amrex::Real energy = ptd.rdata(PlasmaIdx::w)[ip] * (gamma - 1._rt); - - return {wp, - wp*xp, - wp*xp*xp, - wp*yp, - wp*yp*yp, - wp*uxp, - wp*uxp*uxp, - wp*uyp, - wp*uyp*uyp, - wp*uzp, - wp*uzp*uzp, - wp*gamma, - wp*gamma*gamma, - energy, - 1}; + return { // Tuple contains: + w, // 0 sum(w) + w*x, // 1 [x] + w*x*x, // 2 [x^2] + w*y, // 3 [y] + w*y*y, // 4 [y^2] + w*ux, // 5 [ux] + w*ux*ux, // 6 [ux^2] + w*uy, // 7 [uy] + w*uy*uy, // 8 [uy^2] + w*uz, // 9 [uz] + w*uz*uz, // 10 [uz^2] + w*gamma, // 11 [ga] + w*gamma*gamma, // 12 [ga^2] + energy, // 13 [(ga-1)*(1-vz)] + 1 // 14 Np + }; }); ReduceTuple a = reduce_data.value(); - const amrex::Real sum_w0 = amrex::get< 0>(a); - const amrex::Real sum_w_inv = sum_w0 <= 0._rt ? 0._rt : 1._rt/sum_w0; - - m_insitu_rdata[islice ] = sum_w0; - m_insitu_rdata[islice+ 1*m_nslices] = amrex::get< 1>(a)*sum_w_inv; - m_insitu_rdata[islice+ 2*m_nslices] = amrex::get< 2>(a)*sum_w_inv; - m_insitu_rdata[islice+ 3*m_nslices] = amrex::get< 3>(a)*sum_w_inv; - m_insitu_rdata[islice+ 4*m_nslices] = amrex::get< 4>(a)*sum_w_inv; - m_insitu_rdata[islice+ 5*m_nslices] = amrex::get< 5>(a)*sum_w_inv; - m_insitu_rdata[islice+ 6*m_nslices] = amrex::get< 6>(a)*sum_w_inv; - m_insitu_rdata[islice+ 7*m_nslices] = amrex::get< 7>(a)*sum_w_inv; - m_insitu_rdata[islice+ 8*m_nslices] = amrex::get< 8>(a)*sum_w_inv; - m_insitu_rdata[islice+ 9*m_nslices] = amrex::get< 9>(a)*sum_w_inv; - m_insitu_rdata[islice+10*m_nslices] = amrex::get<10>(a)*sum_w_inv; - m_insitu_rdata[islice+11*m_nslices] = amrex::get<11>(a)*sum_w_inv; - m_insitu_rdata[islice+12*m_nslices] = amrex::get<12>(a)*sum_w_inv; - m_insitu_rdata[islice+13*m_nslices] = amrex::get<13>(a); - m_insitu_idata[islice ] = amrex::get<14>(a); - - m_insitu_sum_rdata[ 0] += sum_w0; - m_insitu_sum_rdata[ 1] += amrex::get< 1>(a); - m_insitu_sum_rdata[ 2] += amrex::get< 2>(a); - m_insitu_sum_rdata[ 3] += amrex::get< 3>(a); - m_insitu_sum_rdata[ 4] += amrex::get< 4>(a); - m_insitu_sum_rdata[ 5] += amrex::get< 5>(a); - m_insitu_sum_rdata[ 6] += amrex::get< 6>(a); - m_insitu_sum_rdata[ 7] += amrex::get< 7>(a); - m_insitu_sum_rdata[ 8] += amrex::get< 8>(a); - m_insitu_sum_rdata[ 9] += amrex::get< 9>(a); - m_insitu_sum_rdata[10] += amrex::get<10>(a); - m_insitu_sum_rdata[11] += amrex::get<11>(a); - m_insitu_sum_rdata[12] += amrex::get<12>(a); - m_insitu_sum_rdata[13] += amrex::get<13>(a); - m_insitu_sum_idata[ 0] += amrex::get<14>(a); + const amrex::Real sum_w_inv = amrex::get<0>(a) <= 0._rt ? 0._rt : 1._rt / amrex::get<0>(a); + + amrex::constexpr_for<0, m_insitu_nrp>( + [&] (auto idx) { + m_insitu_rdata[islice + idx.value * m_nslices] = amrex::get(a) * + // sum(w) and [(ga-1)*(1-vz)] are not multiplied by sum_w_inv + ( idx.value == 0 || idx.value == (m_insitu_nrp-1) ? 1 : sum_w_inv ); + m_insitu_sum_rdata[idx.value] += amrex::get(a); + } + ); + + amrex::constexpr_for<0, m_insitu_nip>( + [&] (auto idx) { + m_insitu_idata[islice + idx.value * m_nslices] = amrex::get(a); + m_insitu_sum_idata[idx.value] += amrex::get(a); + } + ); } } diff --git a/src/utils/TemplateUtil.H b/src/utils/TemplateUtil.H new file mode 100644 index 0000000000..26e0ebefd9 --- /dev/null +++ b/src/utils/TemplateUtil.H @@ -0,0 +1,66 @@ +/* Copyright 2024 + * + * This file is part of HiPACE++. + * + * Authors: AlexanderSinn + * License: BSD-3-Clause-LBNL + */ +#ifndef HIPACE_TEMPLATEUTIL_H_ +#define HIPACE_TEMPLATEUTIL_H_ + +#include +#include + +#include + +// zero initialize a GpuTuple +template +constexpr auto MakeZeroTuple(amrex::GpuTuple) { + return amrex::GpuTuple{Args{0}...}; +} + +namespace detail { + + // return TypeList by using the fast power algorithm + template + constexpr auto SingleTypeMultiplier_imp() { + if constexpr (N == 0) { + return amrex::TypeList<>{}; + } else if constexpr (N == 1) { + return amrex::TypeList{}; + } else if constexpr (N % 2 == 0) { + return SingleTypeMultiplier_imp() + SingleTypeMultiplier_imp(); + } else { + return SingleTypeMultiplier_imp() + amrex::TypeList{}; + } + } + + // overload of SingleTypeMultiplier for multiple types: + // convert T[N] to T, T, T, T, ... (N times with N >= 1) + template + constexpr auto SingleTypeMultiplier(const T (&)[N]) { + return SingleTypeMultiplier_imp(); + } + + // overload of SingleTypeMultiplier for one regular type + template + constexpr auto SingleTypeMultiplier(T) { + return amrex::TypeList{}; + } + + // apply the types of the input TypeList as template arguments to TParam + template