From f9b2927131803620d9af3ac671006bbb2de6b83e Mon Sep 17 00:00:00 2001 From: huebleruwm <37674341+huebleruwm@users.noreply.github.com> Date: Mon, 10 Jul 2023 18:42:07 -0500 Subject: [PATCH 1/7] Openacc tweaks and cleanup (#1097) * Chaning acc declare statements to acc enter data statement * Making acc statements more consistent * Making lapack useable while using openacc. Lapack is still run on the CPU * Updating setup_clubb_core to now accept clubb_config_flags as an input, and adding warning in case clubb is running with lapack but was compiled with openacc. --- src/CLUBB_core/adg1_adg2_3d_luhar_pdf.F90 | 4 +- src/CLUBB_core/advance_clubb_core_module.F90 | 196 ++++++++++-------- src/CLUBB_core/advance_helper_module.F90 | 40 +++- .../advance_windm_edsclrm_module.F90 | 4 +- src/CLUBB_core/advance_wp2_wp3_module.F90 | 38 ++-- src/CLUBB_core/advance_xm_wpxp_module.F90 | 85 ++++---- src/CLUBB_core/advance_xp2_xpyp_module.F90 | 4 +- src/CLUBB_core/clip_explicit.F90 | 6 +- src/CLUBB_core/clubb_api_module.F90 | 140 +------------ src/CLUBB_core/diffusion.F90 | 24 +-- src/CLUBB_core/fill_holes.F90 | 12 +- src/CLUBB_core/grid_class.F90 | 8 +- src/CLUBB_core/interpolation.F90 | 2 +- src/CLUBB_core/matrix_solver_wrapper.F90 | 73 +++---- src/CLUBB_core/mean_adv.F90 | 4 +- src/CLUBB_core/mixing_length.F90 | 42 ++-- src/CLUBB_core/mono_flux_limiter.F90 | 41 ++-- src/CLUBB_core/sfc_varnce_module.F90 | 52 ++--- src/CLUBB_core/sigma_sqd_w_module.F90 | 4 +- src/CLUBB_core/turbulent_adv_pdf.F90 | 8 +- src/clubb_driver.F90 | 10 +- 21 files changed, 373 insertions(+), 424 deletions(-) diff --git a/src/CLUBB_core/adg1_adg2_3d_luhar_pdf.F90 b/src/CLUBB_core/adg1_adg2_3d_luhar_pdf.F90 index 222b26b32..09334a800 100644 --- a/src/CLUBB_core/adg1_adg2_3d_luhar_pdf.F90 +++ b/src/CLUBB_core/adg1_adg2_3d_luhar_pdf.F90 @@ -147,7 +147,7 @@ subroutine ADG1_pdf_driver( nz, ngrdcol, & ! In integer :: j ! Loop index - !$acc declare create( w_1_n, w_2_n ) + !$acc enter data create( w_1_n, w_2_n ) ! Calculate the mixture fraction and the PDF component means and variances ! of w. @@ -204,6 +204,8 @@ subroutine ADG1_pdf_driver( nz, ngrdcol, & ! In enddo ! i=1, sclr_dim endif ! l_scalar_calc + !$acc exit data delete( w_1_n, w_2_n ) + return end subroutine ADG1_pdf_driver diff --git a/src/CLUBB_core/advance_clubb_core_module.F90 b/src/CLUBB_core/advance_clubb_core_module.F90 index 3a8ba25bc..925448535 100644 --- a/src/CLUBB_core/advance_clubb_core_module.F90 +++ b/src/CLUBB_core/advance_clubb_core_module.F90 @@ -1136,20 +1136,22 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) ! only be used to compute the variance at the surface. -dschanen 8 Sept 2009 if ( .not. l_host_applies_sfc_fluxes ) then - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol wpthlp(i,1) = wpthlp_sfc(i) wprtp(i,1) = wprtp_sfc(i) upwp(i,1) = upwp_sfc(i) vpwp(i,1) = vpwp_sfc(i) end do + !$acc end parallel loop if ( clubb_config_flags%l_linearize_pbl_winds ) then - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol upwp_pert(i,1) = upwp_sfc_pert(i) vpwp_pert(i,1) = vpwp_sfc_pert(i) end do + !$acc end parallel loop endif ! l_linearize_pbl_winds ! Set fluxes for passive scalars (if enabled) @@ -1160,6 +1162,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) wpsclrp(i,1,j) = wpsclrp_sfc(i,j) end do end do + !$acc end parallel loop end if if ( edsclr_dim > 0 ) then @@ -1169,17 +1172,19 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) wpedsclrp(i,1,j) = wpedsclrp_sfc(i,j) end do end do + !$acc end parallel loop end if else - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol wpthlp(i,1) = 0.0_core_rknd wprtp(i,1) = 0.0_core_rknd upwp(i,1) = 0.0_core_rknd vpwp(i,1) = 0.0_core_rknd end do + !$acc end parallel loop ! Set fluxes for passive scalars (if enabled) if ( sclr_dim > 0 ) then @@ -1189,6 +1194,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) wpsclrp(i,1,j) = 0.0_core_rknd end do end do + !$acc end parallel loop end if if ( edsclr_dim > 0 ) then @@ -1198,20 +1204,23 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) wpedsclrp(i,1,j) = 0.0_core_rknd end do end do + !$acc end parallel loop end if end if ! ~l_host_applies_sfc_fluxes #ifdef CLUBBND_CAM - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol newmu(i) = varmu(i) end do + !$acc end parallel loop #else - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol newmu(i) = mu end do + !$acc end parallel loop #endif if ( clubb_config_flags%ipdf_call_placement == ipdf_pre_advance_fields & @@ -1304,6 +1313,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) wp2_zt(i,k) = max( wp2_zt(i,k), w_tol_sqd ) end do end do + !$acc end parallel loop beta = clubb_params(ibeta) Skw_denom_coef = clubb_params(iSkw_denom_coef) @@ -1336,6 +1346,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) *exp( -(1.0_core_rknd/2.0_core_rknd) * (Skw_zm(i,k)/gamma_coefc)**2 ) end do end do + !$acc end parallel loop else !$acc parallel loop gang vector collapse(2) default(present) do k = 1, nz @@ -1343,6 +1354,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) gamma_Skw_fnc(i,k) = gamma_coef end do end do + !$acc end parallel loop endif ! Compute sigma_sqd_w (dimensionless PDF width parameter) @@ -1361,6 +1373,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) sigma_sqd_w(i,k) = max( zero_threshold, sigma_sqd_w(i,k) ) ! Pos. def. quantity end do end do + !$acc end parallel loop endif ! clubb_config_flags%ipdf_call_placement == ipdf_post_advance_fields @@ -1381,6 +1394,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) a3_coef(i,k) = -2._core_rknd * ( 1._core_rknd - sigma_sqd_w(i,k) )**2 + 3.0_core_rknd end do end do + !$acc end parallel loop ! We found we obtain fewer spikes in wp3 when we clip a3 to be no greater ! than -1.4 -dschanen 4 Jan 2011 @@ -1391,6 +1405,8 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) a3_coef(i,k) = max( a3_coef(i,k), a3_coef_min ) end do end do + !$acc end parallel loop + a3_coef_zt(:,:) = zm2zt( nz, ngrdcol, gr, a3_coef(:,:) ) ! Interpolate thlp2, rtp2, and rtpthlp to thermodynamic levels. @@ -1405,6 +1421,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) rtp2_zt(i,k) = max( rtp2_zt(i,k), rt_tol**2 ) end do end do + !$acc end parallel loop ! Compute wp3 / wp2 on zt levels. Always use the interpolated value in the ! denominator since it's less likely to create spikes @@ -1414,6 +1431,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) wp3_on_wp2_zt(i,k) = ( wp3(i,k) / max( wp2_zt(i,k), w_tol_sqd ) ) end do end do + !$acc end parallel loop ! Clip wp3_on_wp2_zt if it's too large !$acc parallel loop gang vector collapse(2) default(present) @@ -1426,6 +1444,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) end if end do end do + !$acc end parallel loop ! Compute wp3_on_wp2 by interpolating wp3_on_wp2_zt wp3_on_wp2(:,:) = zt2zm( nz, ngrdcol, gr, wp3_on_wp2_zt(:,:) ) @@ -1451,6 +1470,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) em(i,k) = three_halves * wp2(i,k) end do end do + !$acc end parallel loop else !$acc parallel loop gang vector collapse(2) default(present) do k = 1, nz @@ -1458,6 +1478,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) em(i,k) = 0.5_core_rknd * ( wp2(i,k) + vp2(i,k) + up2(i,k) ) end do end do + !$acc end parallel loop end if sqrt_em_zt(:,:) = zm2zt( nz, ngrdcol, gr, em(:,:) ) @@ -1468,6 +1489,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) sqrt_em_zt(i,k) = sqrt( max( em_min, sqrt_em_zt(i,k) ) ) end do end do + !$acc end parallel loop !---------------------------------------------------------------- ! Compute mixing length and dissipation time @@ -1503,6 +1525,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) tau_zt(i,k) = min( Lscale(i,k) / sqrt_em_zt(i,k), taumax ) end do end do + !$acc end parallel loop tau_zm(:,:) = zt2zm( nz, ngrdcol, gr, Lscale(:,:) ) @@ -1513,6 +1536,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) / sqrt( max( em_min, em(i,k) ) ) ), taumax ) end do end do + !$acc end parallel loop !$acc parallel loop gang vector collapse(2) default(present) do k = 1, nz @@ -1528,6 +1552,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) tau_max_zt(i,k) = taumax end do end do + !$acc end parallel loop ! End Vince Larson's replacement. @@ -1601,6 +1626,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) Kh_zt(i,k) = c_K * Lscale(i,k) * sqrt_em_zt(i,k) end do end do + !$acc end parallel loop Lscale_zm(:,:) = zt2zm( nz, ngrdcol, gr, Lscale(:,:) ) @@ -1611,6 +1637,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) * sqrt( max( em(i,k), em_min ) ) end do end do + !$acc end parallel loop ! calculate Brunt-Vaisala frequency used for splatting brunt_vaisala_freq_sqd_splat & @@ -1703,6 +1730,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) mixt_frac_zm(i,k) = pdf_params_zm%mixt_frac(i,k) end do end do + !$acc end parallel loop else w_1_zm(:,:) = zt2zm( nz, ngrdcol, gr, pdf_params%w_1(:,:) ) w_2_zm(:,:) = zt2zm( nz, ngrdcol, gr, pdf_params%w_2(:,:) ) @@ -1746,6 +1774,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) invrs_tau_C1_zm(i,k) = invrs_tau_N2_zm(i,k) end do end do + !$acc end parallel loop else !$acc parallel loop gang vector collapse(2) default(present) do k = 1, nz @@ -1755,6 +1784,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) invrs_tau_C1_zm(i,k) = invrs_tau_wp2_zm(i,k) end do end do + !$acc end parallel loop end if ! l_stability_correction ! Set invrs_tau variables for C4 and C14 @@ -1764,6 +1794,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) invrs_tau_C14_zm(i,k) = invrs_tau_wp2_zm(i,k) end do end do + !$acc end parallel loop if ( .not. l_use_invrs_tau_N2_iso ) then !$acc parallel loop gang vector collapse(2) default(present) @@ -1772,6 +1803,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) invrs_tau_C4_zm(i,k) = invrs_tau_wp2_zm(i,k) end do end do + !$acc end parallel loop else !$acc parallel loop gang vector collapse(2) default(present) do k = 1, nz @@ -1779,6 +1811,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) invrs_tau_C4_zm(i,k) = invrs_tau_N2_iso(i,k) end do end do + !$acc end parallel loop end if if ( l_stats_samp ) then @@ -1843,6 +1876,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) Cx_fnc_Richardson(i,k) = 0.0 end do end do + !$acc end parallel loop end if ! Loop over the 4 main advance subroutines -- advance_xm_wpxp, @@ -2160,6 +2194,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) Kmh_zm(i,k) = Kh_zm(i,k) * C_K10h ! Coefficient for thermo end do end do + !$acc end parallel loop if ( edsclr_dim > 1 .and. clubb_config_flags%l_do_expldiff_rtm_thlm ) then !$acc parallel loop gang vector collapse(2) default(present) @@ -2169,6 +2204,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) edsclrm(i,k,edsclr_dim) = rtm(i,k) end do end do + !$acc end parallel loop end if call advance_windm_edsclrm( nz, ngrdcol, gr, dt, & ! intent(in) @@ -2211,6 +2247,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) end if end do end do + !$acc end parallel loop end if @@ -2317,6 +2354,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) vp2_zt(i,k) = max( vp2_zt(i,k), w_tol_sqd ) end do end do + !$acc end parallel loop if ( clubb_config_flags%iiPDF_type == iiPDF_ADG1 ) then @@ -2330,6 +2368,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) sigma_sqd_w_zt(i,k) = max( sigma_sqd_w_zt(i,k), zero_threshold ) end do end do + !$acc end parallel loop call xp3_LG_2005_ansatz( nz, ngrdcol, Skw_zt, wpthlp_zt, wp2_zt, & thlp2_zt, sigma_sqd_w_zt, & @@ -2362,6 +2401,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) sclrp2_zt(i,k) = max( sclrp2_zt(i,k), sclr_tol(j)**2 ) end do end do + !$acc end parallel loop call xp3_LG_2005_ansatz( nz, ngrdcol, Skw_zt, wpsclrp_zt, wp2_zt, & sclrp2_zt, sigma_sqd_w_zt, & @@ -2532,6 +2572,7 @@ subroutine advance_clubb_core ( gr, nz, ngrdcol, & ! intent(in) qclvar(i,k) = rcp2_zt(i,k) end do end do + !$acc end parallel loop #endif @@ -3732,7 +3773,7 @@ subroutine pdf_closure_driver( gr, nz, ngrdcol, & ! Intent(in) ! Clip if extrap. causes sclrm_zm to be less than sclr_tol - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol sclrm_zm(i,nz,j) = max( sclrm_zm(i,nz,j), sclr_tol(j) ) end do @@ -3747,7 +3788,7 @@ subroutine pdf_closure_driver( gr, nz, ngrdcol, & ! Intent(in) ! p_sfc, which is p_in_Pa(1). Thus, p_in_Pa_zm(1) = p_in_Pa(1). p_in_Pa_zm(:,:) = zt2zm( nz, ngrdcol, gr, p_in_Pa(:,:) ) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol p_in_Pa_zm(i,1) = p_in_Pa(i,1) @@ -3768,7 +3809,7 @@ subroutine pdf_closure_driver( gr, nz, ngrdcol, & ! Intent(in) rtm_zm(:,:) = zt2zm( nz, ngrdcol, gr, rtm(:,:) ) thlm_zm(:,:) = zt2zm( nz, ngrdcol, gr, thlm(:,:) ) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Clip if extrapolation at the top level causes rtm_zm to be < rt_tol rtm_zm(i,nz) = max( rtm_zm(i,nz), rt_tol ) @@ -3859,7 +3900,7 @@ subroutine pdf_closure_driver( gr, nz, ngrdcol, & ! Intent(in) end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Since top momentum level is higher than top thermo level, ! set variables at top momentum level to 0. @@ -3886,7 +3927,7 @@ subroutine pdf_closure_driver( gr, nz, ngrdcol, & ! Intent(in) end do !$acc end parallel loop #ifndef CLUBB_CAM - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol rcp2(i,nz) = zero end do @@ -3906,7 +3947,7 @@ subroutine pdf_closure_driver( gr, nz, ngrdcol, & ! Intent(in) wp2up2(:,:) = zt2zm( nz, ngrdcol, gr, wp2up2_zt(:,:) ) wp2vp2(:,:) = zt2zm( nz, ngrdcol, gr, wp2vp2_zt(:,:) ) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol wpthvp(i,nz) = 0.0_core_rknd thlpthvp(i,nz) = 0.0_core_rknd @@ -3940,7 +3981,7 @@ subroutine pdf_closure_driver( gr, nz, ngrdcol, & ! Intent(in) sclrpthvp(:,:,j) = zt2zm( nz, ngrdcol, gr, sclrpthvp_zt(:,:,j) ) sclrprcp(:,:,j) = zt2zm( nz, ngrdcol, gr, sclrprcp_zt(:,:,j) ) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do k = 1, nz do i = 1, ngrdcol sclrpthvp(i,nz,j) = 0.0_core_rknd @@ -4105,15 +4146,7 @@ subroutine setup_clubb_core & #ifdef GFDL I_sat_sphum, & ! intent(in) h1g, 2010-06-16 #endif - iiPDF_type, & ! intent(in) - ipdf_call_placement, & ! intent(in) - l_predict_upwp_vpwp, & ! intent(in) - l_min_xp2_from_corr_wx, & ! intent(in) - l_prescribed_avg_deltaz, & ! intent(in) - l_damp_wp2_using_em, & ! intent(in) - l_stability_correct_tau_zm, & ! intent(in) - l_enable_relaxed_clipping, & ! intent(in) - l_diag_Lscale_from_tau, & ! intent(in) + clubb_config_flags, & ! intent(in) #ifdef GFDL cloud_frac_min, & ! intent(in) h1g, 2010-06-16 @@ -4174,6 +4207,7 @@ subroutine setup_clubb_core & iiPDF_TSDADG, & iiPDF_LY93, & iiPDF_new_hybrid, & + lapack, & l_explicit_turbulent_adv_wpxp use clubb_precision, only: & @@ -4215,35 +4249,9 @@ subroutine setup_clubb_core & logical, intent(in) :: & l_input_fields ! Flag for whether LES input fields are being used - integer, intent(in) :: & - iiPDF_type, & ! Selected option for the two-component normal - ! (double Gaussian) PDF type to use for the w, - ! rt, and theta-l (or w, chi, and eta) portion of - ! CLUBB's multivariate, two-component PDF. - ipdf_call_placement ! Selected option for the placement of the call to - ! CLUBB's PDF. - - logical, intent(in) :: & - l_predict_upwp_vpwp, & ! Flag to predict and along with and - ! alongside the advancement of , , , , - ! , and in subroutine advance_xm_wpxp. - ! Otherwise, and are still approximated by eddy - ! diffusivity when and are advanced in subroutine - ! advance_windm_edsclrm. - l_min_xp2_from_corr_wx, & ! Flag to base the threshold minimum value of xp2 (rtp2 and - ! thlp2) on keeping the overall correlation of w and x within - ! the limits of -max_mag_correlation_flux to - ! max_mag_correlation_flux. - l_prescribed_avg_deltaz, & ! used in adj_low_res_nu. If .true., avg_deltaz = deltaz - l_stability_correct_tau_zm, & ! Use tau_N2_zm instead of tau_zm in wpxp_pr1 stability - ! correction - l_damp_wp2_using_em, & ! In wp2 equation, use a dissipation formula of - ! -(2/3)*em/tau_zm, as in Bougeault (1981) - l_enable_relaxed_clipping, & ! Flag to relax clipping on wpxp in - ! xm_wpxp_clipping_and_stats - l_diag_Lscale_from_tau ! First diagnose dissipation time tau, and - ! then diagnose the mixing length scale as - ! Lscale = tau * tke + type(clubb_config_flags_type), intent(in) :: & + clubb_config_flags + #ifdef GFDL logical, intent(in) :: & ! h1g, 2010-06-16 begin mod @@ -4261,17 +4269,29 @@ subroutine setup_clubb_core & err_code_out = clubb_no_error ! Initialize to no error value call initialize_error_headers +#ifdef _OPENACC + if ( clubb_config_flags%penta_solve_method == lapack ) then + write(fstderr,*) "WARNING: The penta-diagonal lapack solver is not GPU accelerated" + write(fstderr,*) " Set penta_solve_method = 2, to use an accelerated penta-diagonal solver" + end if + + if ( clubb_config_flags%tridiag_solve_method == lapack ) then + write(fstderr,*) "WARNING: The tri-diagonal lapack solver is not GPU accelerated" + write(fstderr,*) " Set tridiag_solve_method = 2, to use an accelerated tri-diagonal solver" + end if +#endif + ! Sanity check if ( clubb_at_least_debug_level( 0 ) ) then - if ( l_damp_wp2_using_em .and. & + if ( clubb_config_flags%l_damp_wp2_using_em .and. & (abs(params(iC1) - params(iC14)) > abs(params(iC1) + params(iC14)) / 2 * eps .or. & - l_stability_correct_tau_zm) ) then + clubb_config_flags%l_stability_correct_tau_zm) ) then write(fstderr,*) "l_damp_wp2_using_em = T requires C1=C14 and" & // " l_stability_correct_tau_zm = F" write(fstderr,*) "C1 = ", params(iC1) write(fstderr,*) "C14 = ", params(iC14) - write(fstderr,*) "l_stability_correct_tau_zm = ", l_stability_correct_tau_zm + write(fstderr,*) "l_stability_correct_tau_zm = ", clubb_config_flags%l_stability_correct_tau_zm write(fstderr,*) "Fatal error in setup_clubb_core" err_code = clubb_fatal_error err_code_out = clubb_fatal_error @@ -4307,9 +4327,10 @@ subroutine setup_clubb_core & ! Check for the type of two component normal (double Gaussian) PDF being ! used for w, rt, and theta-l (or w, chi, and eta). - if ( iiPDF_type < iiPDF_ADG1 .or. iiPDF_type > iiPDF_new_hybrid ) then - write(fstderr,*) "Unknown type of double Gaussian PDF selected: ", iiPDF_type - write(fstderr,*) "iiPDF_type = ", iiPDF_type + if ( clubb_config_flags%iiPDF_type < iiPDF_ADG1 & + .or. clubb_config_flags%iiPDF_type > iiPDF_new_hybrid ) then + write(fstderr,*) "Unknown type of double Gaussian PDF selected: ", clubb_config_flags%iiPDF_type + write(fstderr,*) "iiPDF_type = ", clubb_config_flags%iiPDF_type write(fstderr,*) "Fatal error in setup_clubb_core" err_code = clubb_fatal_error err_code_out = clubb_fatal_error @@ -4317,11 +4338,11 @@ subroutine setup_clubb_core & endif ! iiPDF_type < iiPDF_ADG1 or iiPDF_type > iiPDF_lY93 ! The ADG2 and 3D Luhar PDFs can only be used as part of input fields. - if ( iiPDF_type == iiPDF_ADG2 ) then + if ( clubb_config_flags%iiPDF_type == iiPDF_ADG2 ) then if ( .not. l_input_fields ) then write(fstderr,*) "The ADG2 PDF can only be used with" & // " input fields (l_input_fields = .true.)." - write(fstderr,*) "iiPDF_type = ", iiPDF_type + write(fstderr,*) "iiPDF_type = ", clubb_config_flags%iiPDF_type write(fstderr,*) "l_input_fields = ", l_input_fields write(fstderr,*) "Fatal error in setup_clubb_core" err_code = clubb_fatal_error @@ -4330,11 +4351,11 @@ subroutine setup_clubb_core & endif ! .not. l_input_fields endif ! iiPDF_type == iiPDF_ADG2 - if ( iiPDF_type == iiPDF_3D_Luhar ) then + if ( clubb_config_flags%iiPDF_type == iiPDF_3D_Luhar ) then if ( .not. l_input_fields ) then write(fstderr,*) "The 3D Luhar PDF can only be used with" & // " input fields (l_input_fields = .true.)." - write(fstderr,*) "iiPDF_type = ", iiPDF_type + write(fstderr,*) "iiPDF_type = ", clubb_config_flags%iiPDF_type write(fstderr,*) "l_input_fields = ", l_input_fields write(fstderr,*) "Fatal error in setup_clubb_core" err_code = clubb_fatal_error @@ -4345,11 +4366,11 @@ subroutine setup_clubb_core & ! This also currently applies to the new PDF until it has been fully ! implemented. - if ( iiPDF_type == iiPDF_new ) then + if ( clubb_config_flags%iiPDF_type == iiPDF_new ) then if ( .not. l_input_fields ) then write(fstderr,*) "The new PDF can only be used with" & // " input fields (l_input_fields = .true.)." - write(fstderr,*) "iiPDF_type = ", iiPDF_type + write(fstderr,*) "iiPDF_type = ", clubb_config_flags%iiPDF_type write(fstderr,*) "l_input_fields = ", l_input_fields write(fstderr,*) "Fatal error in setup_clubb_core" err_code = clubb_fatal_error @@ -4360,11 +4381,11 @@ subroutine setup_clubb_core & ! This also currently applies to the TSDADG PDF until it has been fully ! implemented. - if ( iiPDF_type == iiPDF_TSDADG ) then + if ( clubb_config_flags%iiPDF_type == iiPDF_TSDADG ) then if ( .not. l_input_fields ) then write(fstderr,*) "The new TSDADG PDF can only be used with" & // " input fields (l_input_fields = .true.)." - write(fstderr,*) "iiPDF_type = ", iiPDF_type + write(fstderr,*) "iiPDF_type = ", clubb_config_flags%iiPDF_type write(fstderr,*) "l_input_fields = ", l_input_fields write(fstderr,*) "Fatal error in setup_clubb_core" err_code = clubb_fatal_error @@ -4374,11 +4395,11 @@ subroutine setup_clubb_core & endif ! iiPDF_type == iiPDF_TSDADG ! This also applies to Lewellen and Yoh (1993). - if ( iiPDF_type == iiPDF_LY93 ) then + if ( clubb_config_flags%iiPDF_type == iiPDF_LY93 ) then if ( .not. l_input_fields ) then write(fstderr,*) "The Lewellen and Yoh PDF can only be used with" & // " input fields (l_input_fields = .true.)." - write(fstderr,*) "iiPDF_type = ", iiPDF_type + write(fstderr,*) "iiPDF_type = ", clubb_config_flags%iiPDF_type write(fstderr,*) "l_input_fields = ", l_input_fields write(fstderr,*) "Fatal error in setup_clubb_core" err_code = clubb_fatal_error @@ -4388,9 +4409,10 @@ subroutine setup_clubb_core & endif ! iiPDF_type == iiPDF_LY93 ! Check the option for the placement of the call to CLUBB's PDF. - if ( ipdf_call_placement < ipdf_pre_advance_fields & - .or. ipdf_call_placement > ipdf_pre_post_advance_fields ) then - write(fstderr,*) "Invalid option selected for ipdf_call_placement: ", ipdf_call_placement + if ( clubb_config_flags%ipdf_call_placement < ipdf_pre_advance_fields & + .or. clubb_config_flags%ipdf_call_placement > ipdf_pre_post_advance_fields ) then + write(fstderr,*) "Invalid option selected for ipdf_call_placement: ", & + clubb_config_flags%ipdf_call_placement write(fstderr,*) "Fatal error in setup_clubb_core" err_code = clubb_fatal_error err_code_out = clubb_fatal_error @@ -4399,7 +4421,7 @@ subroutine setup_clubb_core & ! The l_predict_upwp_vpwp flag requires that the ADG1 PDF is used ! implicitly in subroutine advance_xm_wpxp. - if ( l_predict_upwp_vpwp ) then + if ( clubb_config_flags%l_predict_upwp_vpwp ) then ! When l_predict_upwp_vpwp is enabled, the ! l_explicit_turbulent_adv_wpxp flag must be turned off. @@ -4420,8 +4442,8 @@ subroutine setup_clubb_core & ! the ADG1 PDF or the new hybrid PDF. The other PDFs are not currently ! set up to calculate variables needed for implicit or semi-implicit ! turbulent advection, such as coef_wp2up_implicit, etc. - if ( ( iiPDF_type /= iiPDF_ADG1 ) & - .and. ( iiPDF_type /= iiPDF_new_hybrid ) ) then + if ( ( clubb_config_flags%iiPDF_type /= iiPDF_ADG1 ) & + .and. ( clubb_config_flags%iiPDF_type /= iiPDF_new_hybrid ) ) then write(fstderr,*) "Currently, only the ADG1 PDF and the new hybrid" & // " PDF are set up for use with the" & // " l_predict_upwp_vpwp code." @@ -4435,8 +4457,8 @@ subroutine setup_clubb_core & ! The flags l_min_xp2_from_corr_wx and l_enable_relaxed_clipping must ! have opposite values. - if ( ( l_min_xp2_from_corr_wx ) & - .and. ( l_enable_relaxed_clipping ) ) then + if ( ( clubb_config_flags%l_min_xp2_from_corr_wx ) & + .and. ( clubb_config_flags%l_enable_relaxed_clipping ) ) then write(fstderr,*) "Invalid configuration: l_min_xp2_from_corr_wx = T " & // "and l_enable_relaxed_clipping = T" write(fstderr,*) "They must have opposite values" @@ -4444,8 +4466,8 @@ subroutine setup_clubb_core & err_code = clubb_fatal_error err_code_out = clubb_fatal_error return - elseif ( ( .not. l_min_xp2_from_corr_wx ) & - .and. ( .not. l_enable_relaxed_clipping ) ) then + elseif ( ( .not. clubb_config_flags%l_min_xp2_from_corr_wx ) & + .and. ( .not. clubb_config_flags%l_enable_relaxed_clipping ) ) then write(fstderr,*) "Invalid configuration: l_min_xp2_from_corr_wx = F " & // "and l_enable_relaxed_clipping = F" write(fstderr,*) "They must have opposite values" @@ -4555,7 +4577,7 @@ subroutine setup_clubb_core & ! Checking that when the l_diag_Lscale_from_tau is enabled, the ! relevant Cx tunable parameters are all set to a value of 1 (as ! you're supposed to tune the C_invrs_tau_ parameters instead). - if ( l_diag_Lscale_from_tau ) then + if ( clubb_config_flags%l_diag_Lscale_from_tau ) then ! Note: someday when we can successfully run with all these parameters ! having a value of 1, the "Warning" messages should be removed and the @@ -4864,7 +4886,7 @@ subroutine trapezoidal_rule_zt( nz, ngrdcol, gr, l_call_pdf_closure_twice, & ! ! Since top momentum level is higher than top thermo. level, ! set variables at top momentum level to 0. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol wprtp2_zm(i,nz) = 0.0_core_rknd wpthlp2_zm(i,nz) = 0.0_core_rknd @@ -4881,7 +4903,7 @@ subroutine trapezoidal_rule_zt( nz, ngrdcol, gr, l_call_pdf_closure_twice, & ! wpsclrp2_zm(:,:,sclr) = zt2zm( nz, ngrdcol, gr, wpsclrp2(:,:,sclr) ) wpsclrpthlp_zm(:,:,sclr) = zt2zm( nz, ngrdcol, gr, wpsclrpthlp(:,:,sclr) ) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol wpsclrprtp_zm(i,nz,sclr) = 0.0_core_rknd wpsclrp2_zm(i,nz,sclr) = 0.0_core_rknd @@ -5055,7 +5077,7 @@ subroutine calc_trapezoid_zt( nz, ngrdcol, gr, & ! ---------------- Begin Code ---------------- ! Boundary condition: trapezoidal rule not valid at zt level 1 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol trapezoid_zt(i,1) = variable_zt(i,1) end do @@ -5118,7 +5140,7 @@ subroutine calc_trapezoid_zm( nz, ngrdcol, gr, variable_zm, variable_zt, & ! Boundary conditions: trapezoidal rule not valid at top zm level, nzmax. ! Trapezoidal rule also not used at zm level 1. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol trapezoid_zm(i,1) = variable_zm(i,1) trapezoid_zm(i,nz) = variable_zm(i,nz) @@ -5210,7 +5232,8 @@ subroutine compute_cloud_cover( gr, nz, ngrdcol, & !------------------------ Begin code ------------------------ - !$acc declare create( chi_mean, vert_cloud_frac_upper, vert_cloud_frac_lower, vert_cloud_frac ) + !$acc enter data create( chi_mean, vert_cloud_frac_upper, & + !$acc vert_cloud_frac_lower, vert_cloud_frac ) !$acc parallel loop gang vector collapse(2) default(present) do k = 1, nz @@ -5307,7 +5330,7 @@ subroutine compute_cloud_cover( gr, nz, ngrdcol, & end do ! k = 2, gr%nz-1, 1 !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol cloud_cover(i,1) = cloud_frac(i,1) cloud_cover(i,nz) = cloud_frac(i,nz) @@ -5334,6 +5357,9 @@ subroutine compute_cloud_cover( gr, nz, ngrdcol, & end if end if + !$acc exit data delete( chi_mean, vert_cloud_frac_upper, & + !$acc vert_cloud_frac_lower, vert_cloud_frac ) + return end subroutine compute_cloud_cover @@ -5471,13 +5497,13 @@ subroutine set_Lscale_max( ngrdcol, l_implemented, host_dx, host_dy, & ! Determine the maximum allowable value for Lscale (in meters). if ( l_implemented ) then - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol Lscale_max(i) = 0.25_core_rknd * min( host_dx(i), host_dy(i) ) end do !$acc end parallel loop else - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol Lscale_max(i) = 1.0e5_core_rknd end do diff --git a/src/CLUBB_core/advance_helper_module.F90 b/src/CLUBB_core/advance_helper_module.F90 index 6ab79056c..19602e375 100644 --- a/src/CLUBB_core/advance_helper_module.F90 +++ b/src/CLUBB_core/advance_helper_module.F90 @@ -293,9 +293,9 @@ subroutine calc_stability_correction( nz, ngrdcol, gr, & !------------ Begin Code -------------- - !$acc declare create( brunt_vaisala_freq_sqd, brunt_vaisala_freq_sqd_mixed, & - !$acc brunt_vaisala_freq_sqd_moist, brunt_vaisala_freq_sqd_dry, & - !$acc brunt_vaisala_freq_sqd_plus, lambda0_stability, Lscale_zm ) + !$acc enter data create( brunt_vaisala_freq_sqd, brunt_vaisala_freq_sqd_mixed, & + !$acc brunt_vaisala_freq_sqd_moist, brunt_vaisala_freq_sqd_dry, & + !$acc brunt_vaisala_freq_sqd_plus, lambda0_stability, Lscale_zm ) call calc_brunt_vaisala_freq_sqd( nz, ngrdcol, gr, thlm, & ! intent(in) exner, rtm, rcm, p_in_Pa, thvm, & ! intent(in) @@ -332,6 +332,10 @@ subroutine calc_stability_correction( nz, ngrdcol, gr, & end do !$acc end parallel loop + !$acc exit data delete( brunt_vaisala_freq_sqd, brunt_vaisala_freq_sqd_mixed, & + !$acc brunt_vaisala_freq_sqd_moist, brunt_vaisala_freq_sqd_dry, & + !$acc brunt_vaisala_freq_sqd_plus, lambda0_stability, Lscale_zm ) + return end subroutine calc_stability_correction @@ -724,11 +728,12 @@ subroutine compute_Cx_fnc_Richardson( nz, ngrdcol, gr, & !------------------------------ Begin Code ------------------------------ - !$acc declare create( brunt_vaisala_freq_sqd, brunt_vaisala_freq_sqd_mixed, brunt_vaisala_freq_sqd_dry, & - !$acc brunt_vaisala_freq_sqd_moist, brunt_vaisala_freq_sqd_plus, Richardson_num, & - !$acc Ri_zm, ddzt_um, ddzt_vm, shear_sqd, turb_freq_sqd, Lscale_zm, Cx_fnc_interp, & - !$acc Richardson_num_clipped, Cx_fnc_Richardson_avg, fnc_Richardson, & - !$acc fnc_Richardson_clipped, fnc_Richardson_smooth ) + !$acc enter data create( brunt_vaisala_freq_sqd, brunt_vaisala_freq_sqd_mixed, & + !$acc brunt_vaisala_freq_sqd_dry, brunt_vaisala_freq_sqd_moist, & + !$acc brunt_vaisala_freq_sqd_plus, Richardson_num, Cx_fnc_interp, & + !$acc Ri_zm, ddzt_um, ddzt_vm, shear_sqd, turb_freq_sqd, Lscale_zm, & + !$acc Richardson_num_clipped, Cx_fnc_Richardson_avg, fnc_Richardson, & + !$acc fnc_Richardson_clipped, fnc_Richardson_smooth ) if ( l_modify_limiters_for_cnvg_test .and. l_use_shear_turb_freq_sqd ) then error stop "ERROR: l_modify_limiters_for_cnvg_test .and. l_use_shear_turb_freq_sqd "// & @@ -910,6 +915,13 @@ subroutine compute_Cx_fnc_Richardson( nz, ngrdcol, gr, & end do end if + !$acc exit data delete( brunt_vaisala_freq_sqd, brunt_vaisala_freq_sqd_mixed, & + !$acc brunt_vaisala_freq_sqd_dry, brunt_vaisala_freq_sqd_moist, & + !$acc brunt_vaisala_freq_sqd_plus, Richardson_num, Cx_fnc_interp, & + !$acc Ri_zm, ddzt_um, ddzt_vm, shear_sqd, turb_freq_sqd, Lscale_zm, & + !$acc Richardson_num_clipped, Cx_fnc_Richardson_avg, fnc_Richardson, & + !$acc fnc_Richardson_clipped, fnc_Richardson_smooth ) + return end subroutine compute_Cx_fnc_Richardson @@ -979,7 +991,7 @@ function Lscale_width_vert_avg( nz, ngrdcol, gr, smth_type, & !-------------------------- Begin Code -------------------------- - !$acc declare create( one_half_avg_width, numer_terms, denom_terms ) + !$acc enter data create( one_half_avg_width, numer_terms, denom_terms ) if ( smth_type == 1 ) then !$acc parallel loop gang vector collapse(2) default(present) @@ -1079,6 +1091,8 @@ function Lscale_width_vert_avg( nz, ngrdcol, gr, smth_type, & end do end do + !$acc exit data delete( one_half_avg_width, numer_terms, denom_terms ) + return end function Lscale_width_vert_avg @@ -1133,7 +1147,7 @@ subroutine wp2_term_splat_lhs( nz, ngrdcol, gr, C_wp2_splat, & !----------------------------- Begin Code ----------------------------- - !$acc declare create( brunt_vaisala_freq_splat_clipped, brunt_vaisala_freq_splat_smooth ) + !$acc enter data create( brunt_vaisala_freq_splat_clipped, brunt_vaisala_freq_splat_smooth ) !$acc parallel loop gang vector collapse(2) default(present) do k = 1, nz @@ -1155,6 +1169,8 @@ subroutine wp2_term_splat_lhs( nz, ngrdcol, gr, C_wp2_splat, & end do !$acc end parallel loop + !$acc exit data delete( brunt_vaisala_freq_splat_clipped, brunt_vaisala_freq_splat_smooth ) + return end subroutine wp2_term_splat_lhs @@ -1211,7 +1227,7 @@ subroutine wp3_term_splat_lhs( nz, ngrdcol, gr, C_wp2_splat, & !----------------------------- Begin Code ----------------------------- - !$acc declare create( brunt_vaisala_freq_splat_clipped, brunt_vaisala_freq_splat_smooth ) + !$acc enter data create( brunt_vaisala_freq_splat_clipped, brunt_vaisala_freq_splat_smooth ) !$acc parallel loop gang vector collapse(2) default(present) do k = 1, nz @@ -1234,6 +1250,8 @@ subroutine wp3_term_splat_lhs( nz, ngrdcol, gr, C_wp2_splat, & end do !$acc end parallel loop + !$acc exit data delete( brunt_vaisala_freq_splat_clipped, brunt_vaisala_freq_splat_smooth ) + return end subroutine wp3_term_splat_lhs diff --git a/src/CLUBB_core/advance_windm_edsclrm_module.F90 b/src/CLUBB_core/advance_windm_edsclrm_module.F90 index ce47bcd70..37c639c24 100644 --- a/src/CLUBB_core/advance_windm_edsclrm_module.F90 +++ b/src/CLUBB_core/advance_windm_edsclrm_module.F90 @@ -2133,7 +2133,7 @@ subroutine compute_uv_tndcy( nz, ngrdcol, solve_type, & ! -------------------------- Begin Code -------------------------- - !$acc declare create( xm_gf, xm_cf ) + !$acc enter data create( xm_gf, xm_cf ) if ( .not. l_implemented ) then ! Only compute the Coriolis term if the model is running on it's own, @@ -2241,6 +2241,8 @@ subroutine compute_uv_tndcy( nz, ngrdcol, solve_type, & endif + !$acc exit data delete( xm_gf, xm_cf ) + return end subroutine compute_uv_tndcy diff --git a/src/CLUBB_core/advance_wp2_wp3_module.F90 b/src/CLUBB_core/advance_wp2_wp3_module.F90 index 3b14504fe..a79562f8f 100644 --- a/src/CLUBB_core/advance_wp2_wp3_module.F90 +++ b/src/CLUBB_core/advance_wp2_wp3_module.F90 @@ -413,17 +413,17 @@ subroutine advance_wp2_wp3( nz, ngrdcol, gr, dt, & ! i integer :: k, i, b - !$acc declare create( wp2_old, wp3_old, C1_Skw_fnc, C11_Skw_fnc, C16_fnc, & - !$acc wp3_term_ta_lhs_result, wp3_pr3_lhs, lhs_ta_wp2, & - !$acc lhs_tp_wp3, lhs_adv_tp_wp3, lhs_pr_tp_wp3, & - !$acc lhs_ta_wp3, lhs_dp1_wp2, rhs_dp1_wp2, lhs_pr1_wp2, & - !$acc rhs_pr1_wp2, lhs_pr1_wp3, rhs_pr1_wp3, rhs_bp_pr2_wp2, & - !$acc rhs_pr_dfsn_wp2, rhs_bp1_pr2_wp3, rhs_pr3_wp2, & - !$acc rhs_pr3_wp3, rhs_ta_wp3, rhs_pr_turb_wp3, rhs_pr_dfsn_wp3, & - !$acc lhs_diff_zm, lhs_diff_zt, lhs_diff_zm_crank, lhs_diff_zt_crank, & - !$acc lhs_ma_zm, lhs_ma_zt, lhs_ac_pr2_wp2, lhs_ac_pr2_wp3, & - !$acc coef_wp4_implicit_zt, coef_wp4_implicit, a1, a1_zt, & - !$acc dum_dz, dvm_dz, lhs, rhs, Kw1, Kw8, Kw1_zm, Kw8_zt ) + !$acc enter data create( wp2_old, wp3_old, C1_Skw_fnc, C11_Skw_fnc, C16_fnc, & + !$acc wp3_term_ta_lhs_result, wp3_pr3_lhs, lhs_ta_wp2, & + !$acc lhs_tp_wp3, lhs_adv_tp_wp3, lhs_pr_tp_wp3, & + !$acc lhs_ta_wp3, lhs_dp1_wp2, rhs_dp1_wp2, lhs_pr1_wp2, & + !$acc rhs_pr1_wp2, lhs_pr1_wp3, rhs_pr1_wp3, rhs_bp_pr2_wp2, & + !$acc rhs_pr_dfsn_wp2, rhs_bp1_pr2_wp3, rhs_pr3_wp2, & + !$acc rhs_pr3_wp3, rhs_ta_wp3, rhs_pr_turb_wp3, rhs_pr_dfsn_wp3, & + !$acc lhs_diff_zm, lhs_diff_zt, lhs_diff_zm_crank, lhs_diff_zt_crank, & + !$acc lhs_ma_zm, lhs_ma_zt, lhs_ac_pr2_wp2, lhs_ac_pr2_wp3, & + !$acc coef_wp4_implicit_zt, coef_wp4_implicit, a1, a1_zt, & + !$acc dum_dz, dvm_dz, lhs, rhs, Kw1, Kw8, Kw1_zm, Kw8_zt ) !----------------------------------------------------------------------- @@ -1160,6 +1160,18 @@ subroutine advance_wp2_wp3( nz, ngrdcol, gr, dt, & ! i end if ! fatal error end if + !$acc exit data delete( wp2_old, wp3_old, C1_Skw_fnc, C11_Skw_fnc, C16_fnc, & + !$acc wp3_term_ta_lhs_result, wp3_pr3_lhs, lhs_ta_wp2, & + !$acc lhs_tp_wp3, lhs_adv_tp_wp3, lhs_pr_tp_wp3, & + !$acc lhs_ta_wp3, lhs_dp1_wp2, rhs_dp1_wp2, lhs_pr1_wp2, & + !$acc rhs_pr1_wp2, lhs_pr1_wp3, rhs_pr1_wp3, rhs_bp_pr2_wp2, & + !$acc rhs_pr_dfsn_wp2, rhs_bp1_pr2_wp3, rhs_pr3_wp2, & + !$acc rhs_pr3_wp3, rhs_ta_wp3, rhs_pr_turb_wp3, rhs_pr_dfsn_wp3, & + !$acc lhs_diff_zm, lhs_diff_zt, lhs_diff_zm_crank, lhs_diff_zt_crank, & + !$acc lhs_ma_zm, lhs_ma_zt, lhs_ac_pr2_wp2, lhs_ac_pr2_wp3, & + !$acc coef_wp4_implicit_zt, coef_wp4_implicit, a1, a1_zt, & + !$acc dum_dz, dvm_dz, lhs, rhs, Kw1, Kw8, Kw1_zm, Kw8_zt ) + return end subroutine advance_wp2_wp3 @@ -1392,7 +1404,7 @@ subroutine wp23_solve( nz, ngrdcol, gr, dt, lhs, rhs, & !------------------------- Begin Code ------------------------- - !$acc declare create( rhs_save, solut, old_solut, rcond, threshold_array ) + !$acc enter data create( rhs_save, solut, old_solut, rcond, threshold_array ) ! Save the value of rhs, which will be overwritten with the solution as ! part of the solving routine. @@ -1815,6 +1827,8 @@ subroutine wp23_solve( nz, ngrdcol, gr, dt, lhs, rhs, & ! Compute wp3_zm for output purposes wp3_zm(:,:) = zt2zm( nz, ngrdcol, gr, wp3 ) + !$acc exit data delete( rhs_save, solut, old_solut, rcond, threshold_array ) + return end subroutine wp23_solve diff --git a/src/CLUBB_core/advance_xm_wpxp_module.F90 b/src/CLUBB_core/advance_xm_wpxp_module.F90 index 9a28e41e5..78648bbad 100644 --- a/src/CLUBB_core/advance_xm_wpxp_module.F90 +++ b/src/CLUBB_core/advance_xm_wpxp_module.F90 @@ -1111,17 +1111,17 @@ subroutine advance_xm_wpxp( nz, ngrdcol, gr, dt, sigma_sqd_w, wm_zm, wm_zt, wp2, end if ! l_predict_upwp_vpwp !$acc exit data delete( C6rt_Skw_fnc, C6thl_Skw_fnc, C7_Skw_fnc, C6_term, Kw6, & - !$acc low_lev_effect, high_lev_effect, rtm_old, wprtp_old, thlm_old, & - !$acc wpthlp_old, um_old, upwp_old, vm_old, & - !$acc vpwp_old, lhs_diff_zm, lhs_diff_zt, lhs_ma_zt, lhs_ma_zm, & - !$acc lhs_ta_wprtp, lhs_ta_wpthlp, lhs_ta_wpup, lhs_ta_wpvp, & - !$acc rhs_ta_wprtp, rhs_ta_wpthlp, rhs_ta_wpup, & - !$acc rhs_ta_wpvp, lhs_tp, lhs_ta_xm, lhs_ac_pr2, & - !$acc lhs_pr1_wprtp, lhs_pr1_wpthlp ) + !$acc low_lev_effect, high_lev_effect, rtm_old, wprtp_old, thlm_old, & + !$acc wpthlp_old, um_old, upwp_old, vm_old, & + !$acc vpwp_old, lhs_diff_zm, lhs_diff_zt, lhs_ma_zt, lhs_ma_zm, & + !$acc lhs_ta_wprtp, lhs_ta_wpthlp, lhs_ta_wpup, lhs_ta_wpvp, & + !$acc rhs_ta_wprtp, rhs_ta_wpthlp, rhs_ta_wpup, & + !$acc rhs_ta_wpvp, lhs_tp, lhs_ta_xm, lhs_ac_pr2, & + !$acc lhs_pr1_wprtp, lhs_pr1_wpthlp ) !$acc exit data if( sclr_dim > 0 ) & - !$acc delete( sclrm_old, wpsclrp_old, lhs_ta_wpsclrp, & - !$acc rhs_ta_wpsclrp, lhs_pr1_wpsclrp ) + !$acc delete( sclrm_old, wpsclrp_old, lhs_ta_wpsclrp, & + !$acc rhs_ta_wpsclrp, lhs_pr1_wpsclrp ) return @@ -1292,21 +1292,16 @@ subroutine xm_wpxp_lhs( nz, ngrdcol, l_iter, dt, wpxp, wm_zt, C7_Skw_fnc, & real (kind = core_rknd) :: & invrs_dt - - real( kind = core_rknd ), dimension(ngrdcol,nz) :: & - zero_vector ! Vector of 0s integer :: i !------------------- Begin Code ------------------- - - !$acc declare create( zero_vector ) ! Initializations/precalculations invrs_dt = 1.0_core_rknd / dt ! Lower boundary for xm, lhs(:,1) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lhs(1,i,1) = 0.0_core_rknd lhs(2,i,1) = 0.0_core_rknd @@ -1317,7 +1312,7 @@ subroutine xm_wpxp_lhs( nz, ngrdcol, l_iter, dt, wpxp, wm_zt, C7_Skw_fnc, & !$acc end parallel loop ! Lower boundary for w'x', lhs(:,2) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lhs(1,i,2) = 0.0_core_rknd lhs(2,i,2) = 0.0_core_rknd @@ -1368,7 +1363,7 @@ subroutine xm_wpxp_lhs( nz, ngrdcol, l_iter, dt, wpxp, wm_zt, C7_Skw_fnc, & ! Upper boundary for w'x', , lhs(:,2*gr%nz) ! These were set in the loop above for simplicity, so they must be set properly here - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lhs(1,i,2*nz) = 0.0_core_rknd lhs(2,i,2*nz) = 0.0_core_rknd @@ -1562,7 +1557,7 @@ subroutine calc_xm_wpxp_lhs_terms( nz, ngrdcol, gr, Kh_zm, wm_zm, wm_zt, wp2, !------------------- Begin Code ------------------- - !$acc declare create( Kh_N2_zm, K_zm, K_zt, Kw6_zm, zeros_array ) + !$acc enter data create( Kh_N2_zm, K_zm, K_zt, Kw6_zm, zeros_array ) ! Initializations/precalculations constant_nu = 0.1_core_rknd @@ -1646,7 +1641,7 @@ subroutine calc_xm_wpxp_lhs_terms( nz, ngrdcol, gr, Kh_zm, wm_zm, wm_zt, wp2, end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol zeros_array(i) = zero end do @@ -1666,6 +1661,8 @@ subroutine calc_xm_wpxp_lhs_terms( nz, ngrdcol, gr, Kh_zm, wm_zm, wm_zt, wp2, lhs_ma_zt ) ! Intent(out) end if + !$acc exit data delete( Kh_N2_zm, K_zm, K_zt, Kw6_zm, zeros_array ) + return end subroutine calc_xm_wpxp_lhs_terms @@ -1828,7 +1825,7 @@ subroutine xm_wpxp_rhs( nz, ngrdcol, solve_type, l_iter, dt, xm, wpxp, & ! In !------------------- Begin Code ------------------- - !$acc declare create( rhs_bp_pr3 ) + !$acc enter data create( rhs_bp_pr3 ) ! Initialize output array and precalculate the reciprocal of dt invrs_dt = 1.0_core_rknd / dt @@ -1837,7 +1834,7 @@ subroutine xm_wpxp_rhs( nz, ngrdcol, solve_type, l_iter, dt, xm, wpxp, & ! In call wpxp_terms_bp_pr3_rhs( nz, ngrdcol, C7_Skw_fnc, thv_ds_zm, xpthvp, & ! intent(in) rhs_bp_pr3 ) ! intent(out) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Set lower boundary for xm rhs(i,1) = xm(i,1) @@ -1873,7 +1870,7 @@ subroutine xm_wpxp_rhs( nz, ngrdcol, solve_type, l_iter, dt, xm, wpxp, & ! In end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Upper boundary for xm rhs(i,2*nz-1) = xm(i,nz) * invrs_dt + xm_forcing(i,nz) @@ -2027,6 +2024,8 @@ subroutine xm_wpxp_rhs( nz, ngrdcol, solve_type, l_iter, dt, xm, wpxp, & ! In endif ! l_stats_samp + !$acc exit data delete( rhs_bp_pr3 ) + return end subroutine xm_wpxp_rhs @@ -4565,7 +4564,7 @@ subroutine xm_wpxp_clipping_and_stats( & ! --------------------------- Begin code --------------------------- - !$acc declare create( xm_old, wpxp_pd, xm_pd, wpxp_chnge, xp2_relaxed ) + !$acc enter data create( xm_old, wpxp_pd, xm_pd, wpxp_chnge, xp2_relaxed ) select case ( solve_type ) @@ -4678,7 +4677,7 @@ subroutine xm_wpxp_clipping_and_stats( & !$acc end parallel loop ! Lower boundary condition on xm - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol xm(i,1) = xm(i,2) end do @@ -4911,7 +4910,7 @@ subroutine xm_wpxp_clipping_and_stats( & ! Hole filling does not affect the below ground level, perform a blunt clipping ! here on that level to prevent small values of xm(1) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol if ( any( xm(i,:) < xm_threshold) ) then xm(i,1) = max( xm(i,1), xm_tol ) @@ -5044,7 +5043,9 @@ subroutine xm_wpxp_clipping_and_stats( & wpxp_chnge, gr%invrs_dzt, & ! intent(in) stats_zt, & ! intent(inout) xm ) ! intent(inout) - end if + end if + + !$acc exit data delete( xm_old, wpxp_pd, xm_pd, wpxp_chnge, xp2_relaxed ) return @@ -5135,7 +5136,7 @@ pure subroutine xm_term_ta_lhs( nz, ngrdcol, gr, & integer :: i, k ! Vertical level index ! Set lower boundary condition to 0 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lhs_ta_xm(k_mdiag,i,1) = zero lhs_ta_xm(km1_mdiag,i,1) = zero @@ -5241,7 +5242,7 @@ pure subroutine wpxp_term_tp_lhs( nz, ngrdcol, gr, wp2, & integer :: i, k ! Vertical level index ! Set lower boundary to 0 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lhs_tp(1,i,1) = zero lhs_tp(2,i,1) = zero @@ -5264,7 +5265,7 @@ pure subroutine wpxp_term_tp_lhs( nz, ngrdcol, gr, wp2, & !$acc end parallel loop ! Set upper boundary to 0 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lhs_tp(1,i,nz) = 0.0_core_rknd lhs_tp(2,i,nz) = 0.0_core_rknd @@ -5360,7 +5361,7 @@ pure subroutine wpxp_terms_ac_pr2_lhs( nz, ngrdcol, C7_Skw_fnc, & !$acc copyout( lhs_ac_pr2 ) ! Set lower boundary to 0 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lhs_ac_pr2(i,1) = zero end do @@ -5378,7 +5379,7 @@ pure subroutine wpxp_terms_ac_pr2_lhs( nz, ngrdcol, C7_Skw_fnc, & !$acc end parallel loop ! Set upper boundary to 0 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lhs_ac_pr2(i,nz) = zero end do @@ -5471,7 +5472,7 @@ pure subroutine wpxp_term_pr1_lhs( nz, ngrdcol, C6rt_Skw_fnc, C6thl_Skw_fnc, C7_ end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Set lower boundary to 0 @@ -5502,7 +5503,7 @@ pure subroutine wpxp_term_pr1_lhs( nz, ngrdcol, C6rt_Skw_fnc, C6thl_Skw_fnc, C7_ end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Set lower boundary to 0 @@ -5578,7 +5579,7 @@ pure subroutine wpxp_terms_bp_pr3_rhs( nz, ngrdcol, C7_Skw_fnc, thv_ds_zm, xpthv !$acc copyout( rhs_bp_pr3 ) ! Set lower boundary to 0 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol rhs_bp_pr3(i,1) = zero end do @@ -5592,7 +5593,7 @@ pure subroutine wpxp_terms_bp_pr3_rhs( nz, ngrdcol, C7_Skw_fnc, thv_ds_zm, xpthv end do ! k = 2, nz-1 ! Set upper boundary to 0 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol rhs_bp_pr3(i,nz) = zero end do @@ -5775,7 +5776,7 @@ subroutine xm_correction_wpxp_cl( nz, ngrdcol, solve_type, dt, & !---------------------------- Begin Code ---------------------------- - !$acc declare create( xm_tndcy_wpxp_cl, l_clipping_needed, l_any_clipping_needed ) + !$acc enter data create( xm_tndcy_wpxp_cl, l_clipping_needed, l_any_clipping_needed ) l_any_clipping_needed = .false. @@ -5831,6 +5832,8 @@ subroutine xm_correction_wpxp_cl( nz, ngrdcol, solve_type, dt, & end do endif + !$acc exit data delete( xm_tndcy_wpxp_cl, l_clipping_needed, l_any_clipping_needed ) + return end subroutine xm_correction_wpxp_cl @@ -5953,7 +5956,7 @@ pure subroutine diagnose_upxp( nz, ngrdcol, gr, ypwp, xm, wpxp, ym, & !----------------------------- Begin Code ------------------------------ - !$acc declare create( ddzt_xm, ddzt_ym ) + !$acc enter data create( ddzt_xm, ddzt_ym ) ddzt_xm = ddzt( nz, ngrdcol, gr, xm ) ddzt_ym = ddzt( nz, ngrdcol, gr, ym ) @@ -5964,9 +5967,11 @@ pure subroutine diagnose_upxp( nz, ngrdcol, gr, ypwp, xm, wpxp, ym, & ypxp(i,k) = ( tau_C6_zm(i,k) / C6x_Skw_fnc(i,k) ) & * ( - ypwp(i,k) * ddzt_xm(i,k) - (one - C7_Skw_fnc(i,k) ) & * ( wpxp(i,k) * ddzt_ym(i,k) ) ) - end do - end do - !$acc end parallel loop + end do + end do + !$acc end parallel loop + + !$acc exit data delete( ddzt_xm, ddzt_ym ) return diff --git a/src/CLUBB_core/advance_xp2_xpyp_module.F90 b/src/CLUBB_core/advance_xp2_xpyp_module.F90 index 7ac3d31ac..3c91399a2 100644 --- a/src/CLUBB_core/advance_xp2_xpyp_module.F90 +++ b/src/CLUBB_core/advance_xp2_xpyp_module.F90 @@ -3053,7 +3053,7 @@ subroutine xp2_xpyp_uv_rhs( nz, ngrdcol, gr, solve_type, dt, & ! In !----------------------------- Begin Code ---------------------------------- - !$acc declare create( rhs_pr2 ) + !$acc enter data create( rhs_pr2 ) select case ( solve_type ) case ( xp2_xpyp_vp2 ) @@ -3228,6 +3228,8 @@ subroutine xp2_xpyp_uv_rhs( nz, ngrdcol, gr, solve_type, dt, & ! In rhs(i,nz) = w_tol_sqd end do !$acc end parallel loop + + !$acc exit data delete( rhs_pr2 ) return diff --git a/src/CLUBB_core/clip_explicit.F90 b/src/CLUBB_core/clip_explicit.F90 index 2f6801fad..40825cad1 100644 --- a/src/CLUBB_core/clip_explicit.F90 +++ b/src/CLUBB_core/clip_explicit.F90 @@ -619,7 +619,7 @@ subroutine clip_covar( nz, ngrdcol, gr, solve_type, l_first_clip_ts, & ! Since there is no covariance clipping at the upper or lower boundaries, ! the change in x'y' due to covariance clipping at those levels is 0. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol xpyp_chnge(i,1) = 0.0_core_rknd xpyp_chnge(i,nz) = 0.0_core_rknd @@ -1180,7 +1180,7 @@ subroutine clip_skewness_core( nz, ngrdcol, gr, sfc_elevation, & !----------------------- Begin Code----------------------- - !$acc declare create( wp2_zt_cubed, wp3_lim_sqd, zagl_thresh, H_zagl ) + !$acc enter data create( wp2_zt_cubed, wp3_lim_sqd, zagl_thresh, H_zagl ) ! Compute the upper and lower limits of w'^3 at every level, ! based on the skewness of w, Sk_w, such that: @@ -1277,6 +1277,8 @@ subroutine clip_skewness_core( nz, ngrdcol, gr, sfc_elevation, & end do !$acc end parallel + !$acc exit data delete( wp2_zt_cubed, wp3_lim_sqd, zagl_thresh, H_zagl ) + end subroutine clip_skewness_core !=============================================================================== diff --git a/src/CLUBB_core/clubb_api_module.F90 b/src/CLUBB_core/clubb_api_module.F90 index 1008ea0b4..4a54552dc 100644 --- a/src/CLUBB_core/clubb_api_module.F90 +++ b/src/CLUBB_core/clubb_api_module.F90 @@ -252,6 +252,9 @@ module clubb_api_module ! Calculates liquid water potential temperature from absolute temperature T_in_K2thlm_api => T_in_K2thlm + use advance_clubb_core_module, only: & + setup_clubb_core_api => setup_clubb_core + use stats_type, only: stats ! Type implicit none @@ -1697,143 +1700,6 @@ subroutine advance_clubb_core_api_multi_col( gr, nz, ngrdcol, & end subroutine advance_clubb_core_api_multi_col - !================================================================================================ - ! setup_clubb_core - Sets up the model for execution. - !================================================================================================ - - subroutine setup_clubb_core_api( & - nzmax, T0_in, ts_nudge_in, & ! intent(in) - hydromet_dim_in, sclr_dim_in, & ! intent(in) - sclr_tol_in, edsclr_dim_in, params, & ! intent(in) - l_host_applies_sfc_fluxes, & ! intent(in) - saturation_formula, & ! intent(in) - l_input_fields, & ! intent(in) -#ifdef GFDL - I_sat_sphum, & ! intent(in) h1g, 2010-06-16 -#endif - iiPDF_type, & ! intent(in) - ipdf_call_placement, & ! intent(in) - l_predict_upwp_vpwp, & ! intent(in) - l_min_xp2_from_corr_wx, & ! intent(in) - l_prescribed_avg_deltaz, & ! intent(in) - l_damp_wp2_using_em, & ! intent(in) - l_stability_correct_tau_zm, & ! intent(in) - l_enable_relaxed_clipping, & ! intent(in) - l_diag_Lscale_from_tau, & ! intent(in) -#ifdef GFDL - cloud_frac_min , & ! intent(in) h1g, 2010-06-16 -#endif - err_code_api ) ! intent(out) - - use advance_clubb_core_module, only : setup_clubb_core - - use parameter_indices, only: & - nparams ! Variable(s) - - use model_flags, only: & - clubb_config_flags_type ! Type - -! TODO: This should be called from the api, but all the host models appear to call -! it directly or not at all. -! use model_flags, only: & -! setup_model_flags ! Subroutine - - implicit none - - ! Input Variables - - integer, intent(in) :: nzmax ! Vertical grid levels [#] - - ! Model parameters - real( kind = core_rknd ), intent(in) :: & - T0_in, ts_nudge_in - - integer, intent(in) :: & - hydromet_dim_in, & ! Number of hydrometeor species - sclr_dim_in, & ! Number of passive scalars - edsclr_dim_in ! Number of eddy-diff. passive scalars - - real( kind = core_rknd ), intent(in), dimension(sclr_dim_in) :: & - sclr_tol_in ! Thresholds for passive scalars - - real( kind = core_rknd ), intent(in), dimension(nparams) :: & - params ! Including C1, nu1, nu2, etc. - - ! Flags - logical, intent(in) :: & - l_host_applies_sfc_fluxes ! Whether to apply for the surface flux - - character(len=*), intent(in) :: & - saturation_formula ! Approximation for saturation vapor pressure - - logical, intent(in) :: & - l_input_fields ! Flag for whether LES input fields are used - - integer, intent(in) :: & - iiPDF_type, & ! Selected option for the two-component normal - ! (double Gaussian) PDF type to use for the w, - ! rt, and theta-l (or w, chi, and eta) portion of - ! CLUBB's multivariate, two-component PDF. - ipdf_call_placement ! Selected option for the placement of the call to - ! CLUBB's PDF. - - logical, intent(in) :: & - l_predict_upwp_vpwp, & ! Flag to predict and along with and - ! alongside the advancement of , , , , - ! , and in subroutine advance_xm_wpxp. - ! Otherwise, and are still approximated by eddy - ! diffusivity when and are advanced in subroutine - ! advance_windm_edsclrm. - l_min_xp2_from_corr_wx, & ! Flag to base the threshold minimum value of xp2 (rtp2 and - ! thlp2) on keeping the overall correlation of w and x within - ! the limits of -max_mag_correlation_flux to - ! max_mag_correlation_flux. - l_prescribed_avg_deltaz, & ! used in adj_low_res_nu. If .true., avg_deltaz = deltaz - l_damp_wp2_using_em, & - l_stability_correct_tau_zm, & - l_enable_relaxed_clipping, & ! Flag to relax clipping on wpxp in - ! xm_wpxp_clipping_and_stats - l_diag_Lscale_from_tau ! First diagnose dissipation time tau, and - ! then diagnose the mixing length scale as - ! Lscale = tau * tke - -#ifdef GFDL - logical, intent(in) :: & ! h1g, 2010-06-16 begin mod - I_sat_sphum - - real( kind = core_rknd ), intent(in) :: & - cloud_frac_min ! h1g, 2010-06-16 end mod -#endif - - integer, intent(out) :: & - err_code_api ! Diagnostic for a problem with the setup - - call setup_clubb_core( & - nzmax, T0_in, ts_nudge_in, & ! intent(in) - hydromet_dim_in, sclr_dim_in, & ! intent(in) - sclr_tol_in, edsclr_dim_in, params, & ! intent(in) - l_host_applies_sfc_fluxes, & ! intent(in) - saturation_formula, & ! intent(in) - l_input_fields, & ! intent(in) -#ifdef GFDL - I_sat_sphum, & ! intent(in) h1g, 2010-06-16 -#endif - iiPDF_type, & ! intent(in) - ipdf_call_placement, & ! intent(in) - l_predict_upwp_vpwp, & ! intent(in) - l_min_xp2_from_corr_wx, & ! intent(in) - l_prescribed_avg_deltaz, & ! intent(in) - l_damp_wp2_using_em, & ! intent(in) - l_stability_correct_tau_zm, & ! intent(in) - l_enable_relaxed_clipping, & ! intent(in) - l_diag_Lscale_from_tau, & ! intent(in) -#ifdef GFDL - cloud_frac_min, & ! intent(in) h1g, 2010-06-16 -#endif - err_code_api ) ! intent(out) - - end subroutine setup_clubb_core_api - !================================================================================================ ! cleanup_clubb_core_api - Frees memory used by the model. !================================================================================================ diff --git a/src/CLUBB_core/diffusion.F90 b/src/CLUBB_core/diffusion.F90 index 3903c9a0e..f0ce296e6 100644 --- a/src/CLUBB_core/diffusion.F90 +++ b/src/CLUBB_core/diffusion.F90 @@ -340,7 +340,7 @@ pure subroutine diffusion_zt_lhs( nz, ngrdcol, gr, K_zm, K_zt, nu, & ! In ! extra terms with upwind scheme ! k = 1 (bottom level); lowere boundary level - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lhs_upwind(kp1_tdiag,i,1) = + min( drhoKdz_zt(i,1) , zero ) * gr%invrs_dzm(i,1) lhs_upwind(k_tdiag,i,1) = - min( drhoKdz_zt(i,1) , zero ) * gr%invrs_dzm(i,1) @@ -369,7 +369,7 @@ pure subroutine diffusion_zt_lhs( nz, ngrdcol, gr, K_zm, K_zt, nu, & ! In ! k = nz (top level); upper boundary level. ! Only relevant if zero-flux boundary conditions are used. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lhs_upwind(kp1_tdiag,i,nz) = zero lhs_upwind(k_tdiag,i,nz) = + max( drhoKdz_zt(i,nz) , zero ) * gr%invrs_dzm(i,nz-1) @@ -387,7 +387,7 @@ pure subroutine diffusion_zt_lhs( nz, ngrdcol, gr, K_zm, K_zt, nu, & ! In if ( .not. l_upwind_Kh_dp_term ) then - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Thermodynamic superdiagonal: [ x var_zt(k+1,) ] lhs(kp1_tdiag,i,1) = - gr%invrs_dzt(i,1) * invrs_rho_ds_zt(i,1) & @@ -404,7 +404,7 @@ pure subroutine diffusion_zt_lhs( nz, ngrdcol, gr, K_zm, K_zt, nu, & ! In else - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Thermodynamic superdiagonal: [ x var_zt(k+1,) ] lhs(kp1_tdiag,i,1) = - gr%invrs_dzt(i,1) * ( K_zt(i,1) + nu(i) ) * gr%invrs_dzm(i,1) & @@ -477,7 +477,7 @@ pure subroutine diffusion_zt_lhs( nz, ngrdcol, gr, K_zm, K_zt, nu, & ! In if ( .not. l_upwind_Kh_dp_term ) then - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Thermodynamic superdiagonal: [ x var_zt(k+1,) ] lhs(kp1_tdiag,i,nz) = zero @@ -496,7 +496,7 @@ pure subroutine diffusion_zt_lhs( nz, ngrdcol, gr, K_zm, K_zt, nu, & ! In else - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Thermodynamic superdiagonal: [ x var_zt(k+1,) ] lhs(kp1_tdiag,i,nz) = zero + lhs_upwind(kp1_tdiag,i,nz) @@ -990,7 +990,7 @@ pure subroutine diffusion_zm_lhs( nz, ngrdcol, gr, K_zt, K_zm, nu, & ! In ! extra terms with upwind scheme ! k = 1 (bottom level); lowere boundary level - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lhs_upwind(kp1_mdiag,i,1) = + min( drhoKdz_zm(i,1) , zero ) * gr%invrs_dzt(i,2) lhs_upwind(k_mdiag,i,1) = - min( drhoKdz_zm(i,1) , zero ) * gr%invrs_dzt(i,2) @@ -1015,7 +1015,7 @@ pure subroutine diffusion_zm_lhs( nz, ngrdcol, gr, K_zt, K_zm, nu, & ! In ! k = nz (top level); upper boundary level. ! Only relevant if zero-flux boundary conditions are used. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lhs_upwind(kp1_mdiag,i,nz) = zero lhs_upwind(k_mdiag,i,nz) = + max( drhoKdz_zm(i,nz) , zero ) * gr%invrs_dzt(i,nz) @@ -1031,7 +1031,7 @@ pure subroutine diffusion_zm_lhs( nz, ngrdcol, gr, K_zt, K_zm, nu, & ! In if ( .not. l_upwind_Kh_dp_term ) then - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Momentum superdiagonal: [ x var_zm(k+1,) ] lhs(kp1_mdiag,i,1) = - gr%invrs_dzm(i,1) * invrs_rho_ds_zm(i,1) & @@ -1048,7 +1048,7 @@ pure subroutine diffusion_zm_lhs( nz, ngrdcol, gr, K_zt, K_zm, nu, & ! In else - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Momentum superdiagonal: [ x var_zm(k+1,) ] lhs(kp1_mdiag,i,1) = - gr%invrs_dzm(i,1) * ( K_zm(i,1) + nu(i) ) * gr%invrs_dzt(i,2) & @@ -1122,7 +1122,7 @@ pure subroutine diffusion_zm_lhs( nz, ngrdcol, gr, K_zt, K_zm, nu, & ! In if ( .not. l_upwind_Kh_dp_term ) then - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Momentum superdiagonal: [ x var_zm(k+1,) ] lhs(kp1_mdiag,i,nz) = zero @@ -1141,7 +1141,7 @@ pure subroutine diffusion_zm_lhs( nz, ngrdcol, gr, K_zt, K_zm, nu, & ! In else - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Momentum superdiagonal: [ x var_zm(k+1,) ] lhs(kp1_mdiag,i,nz) = zero & diff --git a/src/CLUBB_core/fill_holes.F90 b/src/CLUBB_core/fill_holes.F90 index b3f92135a..81b49d48a 100644 --- a/src/CLUBB_core/fill_holes.F90 +++ b/src/CLUBB_core/fill_holes.F90 @@ -143,7 +143,7 @@ subroutine fill_holes_vertical( nz, ngrdcol, num_draw_pts, threshold, upper_hf_l return end if - !$acc parallel loop collapse(2) default(present) + !$acc parallel loop gang vector collapse(2) default(present) do k = 1, nz do i = 1, ngrdcol rho_ds_dz(i,k) = rho_ds(i,k) * dz(i,k) @@ -246,14 +246,14 @@ subroutine fill_holes_vertical( nz, ngrdcol, num_draw_pts, threshold, upper_hf_l ! if any holes need filling before the final step of updating the field. ! Compute the numerator and denominator integrals - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol numer_integral_global(i) = 0.0_core_rknd denom_integral_global(i) = 0.0_core_rknd end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol do k = 2, upper_hf_level numer_integral_global(i) = numer_integral_global(i) + rho_ds_dz(i,k) * field(i,k) @@ -264,7 +264,7 @@ subroutine fill_holes_vertical( nz, ngrdcol, num_draw_pts, threshold, upper_hf_l !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Find the vertical average of field, using the precomputed numerator and denominator, @@ -285,7 +285,7 @@ subroutine fill_holes_vertical( nz, ngrdcol, num_draw_pts, threshold, upper_hf_l !$acc end parallel loop ! To compute the clipped field's vertical integral we only need to recompute the numerator - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol numer_integral_global(i) = 0.0_core_rknd do k = 2, upper_hf_level @@ -294,7 +294,7 @@ subroutine fill_holes_vertical( nz, ngrdcol, num_draw_pts, threshold, upper_hf_l end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Do not complete calculations or update field values for this diff --git a/src/CLUBB_core/grid_class.F90 b/src/CLUBB_core/grid_class.F90 index 7381f7100..a94de605c 100644 --- a/src/CLUBB_core/grid_class.F90 +++ b/src/CLUBB_core/grid_class.F90 @@ -1505,7 +1505,7 @@ pure subroutine linear_interpolated_azm_2D( nz, ngrdcol, gr, azt, & ! Use a linear extension based on the values of azt at levels gr%nz and ! gr%nz-1 to find the value of azm at level gr%nz (the uppermost level ! in the model). - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol linear_interpolated_azm(i,nz) & = ( ( azt(i,nz) - azt(i,nz-1) ) / ( gr%zt(i,nz) - gr%zt(i,nz-1) ) ) & @@ -1964,7 +1964,7 @@ pure subroutine linear_interpolated_azt_2D( nz, ngrdcol, gr, azm, & ! thermodynamic levels is azt. ! Use a linear extension based on the values of azm at levels 1 and 2 to ! find the value of azt at level 1 (the lowermost level in the model). - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol linear_interpolated_azt(i,1) & = ( ( azm(i,2) - azm(i,1) ) / ( gr%zm(i,2) - gr%zm(i,1) ) ) & @@ -2317,7 +2317,7 @@ pure function gradzm_2D( nz, ngrdcol, gr, azm ) !$acc data copyin( gr, gr%invrs_dzt, azm ) & !$acc copyout( gradzm_2D ) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol gradzm_2D(i,1) = ( azm(i,2) - azm(i,1) ) * gr%invrs_dzt(i,2) end do @@ -2411,7 +2411,7 @@ pure function gradzt_2D( nz, ngrdcol, gr, azt ) !$acc data copyin( gr, gr%invrs_dzm, azt ) & !$acc copyout( gradzt_2D ) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol gradzt_2D(i,nz) = ( azt(i,nz) - azt(i,nz-1) ) * gr%invrs_dzm(i,nz-1) end do diff --git a/src/CLUBB_core/interpolation.F90 b/src/CLUBB_core/interpolation.F90 index 8ee884f35..680cc28b0 100644 --- a/src/CLUBB_core/interpolation.F90 +++ b/src/CLUBB_core/interpolation.F90 @@ -541,7 +541,7 @@ subroutine pvertinterp( nz, ngrdcol, & ! If we've fallen through the k=1,nz-1 loop, we cannot interpolate and ! must extrapolate from the bottom or top data level for at least some ! of the longitude points. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol if ( p_out >= p_mid(i,1) ) then diff --git a/src/CLUBB_core/matrix_solver_wrapper.F90 b/src/CLUBB_core/matrix_solver_wrapper.F90 index 46aa831cf..f22be91d9 100644 --- a/src/CLUBB_core/matrix_solver_wrapper.F90 +++ b/src/CLUBB_core/matrix_solver_wrapper.F90 @@ -107,16 +107,10 @@ subroutine band_solve_single_rhs_multiple_lhs( & ! ----------------------- Begin Code ----------------------- -#ifdef _OPENACC - if ( penta_solve_method /= penta_lu ) then - write(fstderr,*) "Running on GPUs requires penta_solve_method = penta_lu = 2" - err_code = clubb_no_error - return - end if -#endif - if ( present(rcond) ) then + !$acc update host( lhs, rhs ) + ! Lapack overwrites lhs and rhs, so we'll give it copies of them. lhs_copy = lhs rhs_copy = rhs @@ -128,17 +122,23 @@ subroutine band_solve_single_rhs_multiple_lhs( & lhs_copy, rhs_copy, & ! Intent(inout) dummy_soln, rcond ) ! Intent(out) + !$acc update device( rcond ) + end if if ( penta_solve_method == lapack ) then + !$acc update host( lhs, rhs ) + ! Perform LU decomp and solve system (LAPACK) call lapack_band_solve( "xm_wpxp", nsup, nsub, & ! Intent(in) ndim, 1, ngrdcol, & ! Intent(in) lhs, rhs, & ! Intent(inout) soln ) ! Intent(out) + !$acc update device( soln ) + else if ( penta_solve_method == penta_lu ) then ! Solve the system with a penta-diagonal specific LU decomp @@ -225,16 +225,10 @@ subroutine band_solve_multiple_rhs_lhs( & ! ----------------------- Begin Code ----------------------- -#ifdef _OPENACC - if ( penta_solve_method /= penta_lu ) then - write(fstderr,*) "Running on GPUs requires penta_solve_method = penta_lu = 2" - err_code = clubb_no_error - return - end if -#endif - if ( present(rcond) ) then + !$acc update host( lhs, rhs ) + ! Lapack overwrites lhs and rhs, so we'll give it copies of them. lhs_copy = lhs rhs_copy = rhs @@ -246,17 +240,23 @@ subroutine band_solve_multiple_rhs_lhs( & lhs_copy, rhs_copy, & ! Intent(inout) dummy_soln, rcond ) ! Intent(out) + !$acc update device( rcond ) + end if if ( penta_solve_method == lapack ) then + !$acc update host( lhs, rhs ) + ! Perform LU decomp and solve system (LAPACK) call lapack_band_solve( "xm_wpxp", nsup, nsub, & ! Intent(in) ndim, nrhs, ngrdcol, & ! Intent(in) lhs, rhs, & ! Intent(inout) soln ) ! Intent(out) + !$acc update device( soln ) + else if ( penta_solve_method == penta_lu ) then ! Solve the system with a penta-diagonal specific LU decomp @@ -339,14 +339,6 @@ subroutine tridiag_solve_single_rhs_lhs( & ! ----------------------- Begin Code ----------------------- -#ifdef _OPENACC - if ( tridiag_solve_method /= tridiag_lu ) then - write(fstderr,*) "Running on GPUs requires tridiag_solve_method = tridiag_lu = 2" - err_code = clubb_no_error - return - end if -#endif - if ( present(rcond) ) then ! Lapack overwrites lhs and rhs, so we'll give it copies of them. @@ -357,7 +349,6 @@ subroutine tridiag_solve_single_rhs_lhs( & call lapack_tridiag_solvex( solve_name, ndim, 1, 1, & ! Intent(in) lhs_copy, rhs_copy, & ! Intent(inout) dummy_soln, rcond ) ! Intent(out) - end if @@ -448,16 +439,10 @@ subroutine tridiag_solve_single_rhs_multiple_lhs( & ! ----------------------- Begin Code ----------------------- -#ifdef _OPENACC - if ( tridiag_solve_method /= tridiag_lu ) then - write(fstderr,*) "Running on GPUs requires tridiag_solve_method = tridiag_lu = 2" - err_code = clubb_no_error - return - end if -#endif - if ( present(rcond) ) then + !$acc update host( lhs, rhs ) + ! Lapack overwrites lhs and rhs, so we'll give it copies of them. lhs_copy = lhs rhs_copy = rhs @@ -466,17 +451,23 @@ subroutine tridiag_solve_single_rhs_multiple_lhs( & call lapack_tridiag_solvex( solve_name, ndim, 1, ngrdcol, & ! Intent(in) lhs_copy, rhs_copy, & ! Intent(inout) dummy_soln, rcond ) ! Intent(out) + + !$acc update device( rcond ) end if if ( tridiag_solve_method == lapack ) then + !$acc update host( lhs, rhs ) + ! Perform LU decomp and solve system (LAPACK) call lapack_tridiag_solve( solve_name, ndim, 1, ngrdcol, & ! Intent(in) lhs, rhs, & ! Intent(inout) soln ) ! Intent(out) + !$acc update device( soln ) + else if ( tridiag_solve_method == tridiag_lu ) then ! Solve the system with a tri-diagonal specific LU decomp @@ -555,17 +546,11 @@ subroutine tridiag_solve_multiple_rhs_lhs( & integer :: i ! ----------------------- Begin Code ----------------------- - -#ifdef _OPENACC - if ( tridiag_solve_method /= tridiag_lu ) then - write(fstderr,*) "Running on GPUs requires tridiag_solve_method = tridiag_lu = 2" - err_code = clubb_no_error - return - end if -#endif if ( present(rcond) ) then + !$acc update host( lhs, rhs ) + ! Lapack overwrites lhs and rhs, so we'll give it copies of them. lhs_copy = lhs rhs_copy = rhs @@ -574,17 +559,23 @@ subroutine tridiag_solve_multiple_rhs_lhs( & call lapack_tridiag_solvex( solve_name, ndim, nrhs, ngrdcol, & ! Intent(in) lhs_copy, rhs_copy, & ! Intent(inout) dummy_soln, rcond ) ! Intent(out) + + !$acc update device( rcond ) end if if ( tridiag_solve_method == lapack ) then + !$acc update host( lhs, rhs ) + ! Perform LU decomp and solve system (LAPACK) call lapack_tridiag_solve( solve_name, ndim, nrhs, ngrdcol, & ! Intent(in) lhs, rhs, & ! Intent(inout) soln ) ! Intent(out) + !$acc update device( soln ) + else if ( tridiag_solve_method == tridiag_lu ) then ! Solve the system with a tri-diagonal specific LU decomp diff --git a/src/CLUBB_core/mean_adv.F90 b/src/CLUBB_core/mean_adv.F90 index 3ff37ca26..f111495f1 100644 --- a/src/CLUBB_core/mean_adv.F90 +++ b/src/CLUBB_core/mean_adv.F90 @@ -266,7 +266,7 @@ pure subroutine term_ma_zt_lhs( nz, ngrdcol, wm_zt, weights_zt2zm, & ! Intent(in ! derivative d(var_zt)/dz over the model top is set to 0, in order ! to stay consistent with the zero-flux boundary condition option ! in the eddy diffusion code. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Thermodynamic superdiagonal: [ x var_zt(k+1,) ] @@ -317,7 +317,7 @@ pure subroutine term_ma_zt_lhs( nz, ngrdcol, wm_zt, weights_zt2zm, & ! Intent(in !$acc end parallel loop ! Upper Boundary - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol if ( wm_zt(i,nz) >= zero ) then ! Mean wind is in upward direction diff --git a/src/CLUBB_core/mixing_length.F90 b/src/CLUBB_core/mixing_length.F90 index f5917a9ab..b324e3fc3 100644 --- a/src/CLUBB_core/mixing_length.F90 +++ b/src/CLUBB_core/mixing_length.F90 @@ -232,12 +232,12 @@ subroutine compute_mixing_length( nz, ngrdcol, gr, thvm, thlm, & ! --------------------------------- Begin Code --------------------------------- - !$acc declare create( exp_mu_dzm, invrs_dzm_on_mu, grav_on_thvm, Lv_coef, & - !$acc entrain_coef, thl_par_j_precalc, rt_par_j_precalc, & - !$acc tl_par_1, rt_par_1, rsatl_par_1, thl_par_1, dCAPE_dz_1, & - !$acc s_par_1, rc_par_1, CAPE_incr_1, thv_par_1, tke_i ) + !$acc enter data create( exp_mu_dzm, invrs_dzm_on_mu, grav_on_thvm, Lv_coef, & + !$acc entrain_coef, thl_par_j_precalc, rt_par_j_precalc, & + !$acc tl_par_1, rt_par_1, rsatl_par_1, thl_par_1, dCAPE_dz_1, & + !$acc s_par_1, rc_par_1, CAPE_incr_1, thv_par_1, tke_i ) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol if ( abs(mu(i)) < eps ) then err_code = clubb_fatal_error @@ -855,6 +855,11 @@ subroutine compute_mixing_length( nz, ngrdcol, gr, thvm, thlm, & end if + !$acc exit data delete( exp_mu_dzm, invrs_dzm_on_mu, grav_on_thvm, Lv_coef, & + !$acc entrain_coef, thl_par_j_precalc, rt_par_j_precalc, & + !$acc tl_par_1, rt_par_1, rsatl_par_1, thl_par_1, dCAPE_dz_1, & + !$acc s_par_1, rc_par_1, CAPE_incr_1, thv_par_1, tke_i ) + return end subroutine compute_mixing_length @@ -995,10 +1000,11 @@ subroutine calc_Lscale_directly ( ngrdcol, nz, gr, & !--------------------------------- Begin Code --------------------------------- - !$acc declare create( sign_rtpthlp, Lscale_pert_1, Lscale_pert_2, & - !$acc thlm_pert_1, thlm_pert_2, rtm_pert_1, rtm_pert_2, & - !$acc thlm_pert_pos_rt, thlm_pert_neg_rt, rtm_pert_pos_rt, rtm_pert_neg_rt, & - !$acc mu_pert_1, mu_pert_2, mu_pert_pos_rt, mu_pert_neg_rt ) + !$acc enter data create( sign_rtpthlp, Lscale_pert_1, Lscale_pert_2, & + !$acc thlm_pert_1, thlm_pert_2, rtm_pert_1, rtm_pert_2, & + !$acc thlm_pert_pos_rt, thlm_pert_neg_rt, rtm_pert_pos_rt, & + !$acc rtm_pert_neg_rt, & + !$acc mu_pert_1, mu_pert_2, mu_pert_pos_rt, mu_pert_neg_rt ) Lscale_mu_coef = clubb_params(iLscale_mu_coef) Lscale_pert_coef = clubb_params(iLscale_pert_coef) @@ -1044,7 +1050,7 @@ subroutine calc_Lscale_directly ( ngrdcol, nz, gr, & end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol mu_pert_1(i) = newmu(i) / Lscale_mu_coef end do @@ -1073,7 +1079,7 @@ subroutine calc_Lscale_directly ( ngrdcol, nz, gr, & end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol mu_pert_2(i) = newmu(i) * Lscale_mu_coef end do @@ -1138,7 +1144,7 @@ subroutine calc_Lscale_directly ( ngrdcol, nz, gr, & end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol mu_pert_pos_rt(i) = newmu(i) / Lscale_mu_coef mu_pert_neg_rt(i) = newmu(i) * Lscale_mu_coef @@ -1219,6 +1225,12 @@ subroutine calc_Lscale_directly ( ngrdcol, nz, gr, & end if end if + !$acc exit data delete( sign_rtpthlp, Lscale_pert_1, Lscale_pert_2, & + !$acc thlm_pert_1, thlm_pert_2, rtm_pert_1, rtm_pert_2, & + !$acc thlm_pert_pos_rt, thlm_pert_neg_rt, rtm_pert_pos_rt, & + !$acc rtm_pert_neg_rt, & + !$acc mu_pert_1, mu_pert_2, mu_pert_pos_rt, mu_pert_neg_rt ) + return end subroutine calc_Lscale_directly @@ -1456,7 +1468,7 @@ subroutine diagnose_Lscale_from_tau( nz, ngrdcol, gr, & !$acc tau_zm_unclipped, tau_zt_unclipped, sqrt_Ri_zm_smooth, em_clipped, & !$acc tmp_calc, tmp_calc_max, tmp_calc_min_max ) - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol if ( gr%zm(i,1) - sfc_elevation(i) + z_displace < eps ) then err_code = clubb_fatal_error @@ -1500,7 +1512,7 @@ subroutine diagnose_Lscale_from_tau( nz, ngrdcol, gr, & if ( l_smooth_min_max ) then - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ustar(i) = smooth_max( ( upwp_sfc(i)**2 + vpwp_sfc(i)**2 )**one_fourth, ufmin, min_max_smth_mag ) end do @@ -1508,7 +1520,7 @@ subroutine diagnose_Lscale_from_tau( nz, ngrdcol, gr, & else - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ustar(i) = max( ( upwp_sfc(i)**2 + vpwp_sfc(i)**2 )**one_fourth, ufmin ) end do diff --git a/src/CLUBB_core/mono_flux_limiter.F90 b/src/CLUBB_core/mono_flux_limiter.F90 index 108854b03..4b95fce9a 100644 --- a/src/CLUBB_core/mono_flux_limiter.F90 +++ b/src/CLUBB_core/mono_flux_limiter.F90 @@ -464,10 +464,10 @@ subroutine monotonic_turbulent_flux_limit( nz, ngrdcol, gr, solve_type, dt, xm_o !---------------------------- Begin Code ---------------------------- - !$acc declare create( xp2_zt, xm_enter_mfl, xm_without_ta, wpxp_net_adjust, & - !$acc min_x_allowable_lev, max_x_allowable_lev, min_x_allowable, & - !$acc max_x_allowable, wpxp_mfl_max, wpxp_mfl_min, lhs_mfl_xm, & - !$acc rhs_mfl_xm, l_adjustment_needed, xm_mfl ) + !$acc enter data create( xp2_zt, xm_enter_mfl, xm_without_ta, wpxp_net_adjust, & + !$acc min_x_allowable_lev, max_x_allowable_lev, min_x_allowable, & + !$acc max_x_allowable, wpxp_mfl_max, wpxp_mfl_min, lhs_mfl_xm, & + !$acc rhs_mfl_xm, l_adjustment_needed, xm_mfl ) select case( solve_type ) case ( mono_flux_rtm ) ! rtm/wprtp @@ -616,7 +616,7 @@ subroutine monotonic_turbulent_flux_limit( nz, ngrdcol, gr, solve_type, dt, xm_o !$acc end parallel loop ! Boundary condition on xm_without_ta - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol xm_without_ta(i,1) = xm(i,1) min_x_allowable_lev(i,1) = min_x_allowable_lev(i,2) @@ -788,7 +788,7 @@ subroutine monotonic_turbulent_flux_limit( nz, ngrdcol, gr, solve_type, dt, xm_o !$acc end parallel loop ! Boundary conditions - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol min_x_allowable(i,1) = 0._core_rknd max_x_allowable(i,1) = 0._core_rknd @@ -927,7 +927,7 @@ subroutine monotonic_turbulent_flux_limit( nz, ngrdcol, gr, solve_type, dt, xm_o !$acc end parallel loop ! Boundary condition on xm - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol xm(i,1) = xm(i,2) end do @@ -941,7 +941,7 @@ subroutine monotonic_turbulent_flux_limit( nz, ngrdcol, gr, solve_type, dt, xm_o !enddo !Ensure there are no spikes at the top of the domain - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol if (abs( xm(i,nz) - xm_enter_mfl(i,nz) ) > 10._core_rknd * xm_tol) then @@ -1024,6 +1024,11 @@ subroutine monotonic_turbulent_flux_limit( nz, ngrdcol, gr, solve_type, dt, xm_o end do endif + !$acc exit data delete( xp2_zt, xm_enter_mfl, xm_without_ta, wpxp_net_adjust, & + !$acc min_x_allowable_lev, max_x_allowable_lev, min_x_allowable, & + !$acc max_x_allowable, wpxp_mfl_max, wpxp_mfl_min, lhs_mfl_xm, & + !$acc rhs_mfl_xm, l_adjustment_needed, xm_mfl ) + return end subroutine monotonic_turbulent_flux_limit @@ -1235,7 +1240,7 @@ subroutine mfl_xm_rhs( nz, ngrdcol, dt, xm_old, wpxp, xm_forcing, & ! The value of xm at the lower boundary will remain the same. However, the ! value of xm at the lower boundary gets overwritten after the matrix is ! solved for the next timestep, such that xm(1) = xm(2). - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol rhs(i,1) = xm_old(i,1) end do @@ -1333,7 +1338,7 @@ subroutine mfl_xm_solve( nz, ngrdcol, solve_type, tridiag_solve_method, & end if ! Boundary condition on xm - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol xm(i,1) = xm(i,2) end do @@ -1435,7 +1440,7 @@ subroutine calc_turb_adv_range( nz, ngrdcol, gr, dt, & !------------------------- Begin Code ------------------------- - !$acc declare create( vert_vel_up, vert_vel_down, w_min ) + !$acc enter data create( vert_vel_up, vert_vel_down, w_min ) if ( l_constant_thickness ) then ! thickness is a constant value. @@ -1681,7 +1686,7 @@ subroutine calc_turb_adv_range( nz, ngrdcol, gr, dt, & ! Information for levels 1, 2, gr%nz-1, and gr%nz is not needed. ! However, set the values at these levels for purposes of not having odd ! values in the arrays. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol low_lev_effect(i,1) = 1 high_lev_effect(i,1) = 1 @@ -1694,6 +1699,8 @@ subroutine calc_turb_adv_range( nz, ngrdcol, gr, dt, & end do !$acc end parallel loop + !$acc exit data delete( vert_vel_up, vert_vel_down, w_min ) + return end subroutine calc_turb_adv_range @@ -1949,7 +1956,7 @@ subroutine mean_vert_vel_up_down( nz, ngrdcol, & !------------------------- Begin Code ------------------------- - !$acc declare create( mean_w_down_1st, mean_w_down_2nd, mean_w_up_1st, mean_w_up_2nd ) + !$acc enter data create( mean_w_down_1st, mean_w_down_2nd, mean_w_up_1st, mean_w_up_2nd ) call calc_mean_w_up_down_component( nz, ngrdcol, & ! intent(in) w_1_zm, varnce_w_1_zm, & ! intent(in) @@ -1992,6 +1999,8 @@ subroutine mean_vert_vel_up_down( nz, ngrdcol, & end do end if ! l_stats_samp + !$acc exit data delete( mean_w_down_1st, mean_w_down_2nd, mean_w_up_1st, mean_w_up_2nd ) + return end subroutine mean_vert_vel_up_down @@ -2071,7 +2080,7 @@ subroutine calc_mean_w_up_down_component( nz, ngrdcol, & !------------------------- Begin Code ------------------------- - !$acc declare create( erf_cache, exp_cache ) + !$acc enter data create( erf_cache, exp_cache ) invrs_sqrt_2pi = one / sqrt_2pi @@ -2124,7 +2133,7 @@ subroutine calc_mean_w_up_down_component( nz, ngrdcol, & !$acc end parallel loop ! Upper and lower levels are not used, set to 0 to besafe and avoid NaN problems - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol mean_w_down_i(i,1) = 0.0_core_rknd mean_w_up_i(i,1) = 0.0_core_rknd @@ -2134,6 +2143,8 @@ subroutine calc_mean_w_up_down_component( nz, ngrdcol, & end do !$acc end parallel loop + !$acc exit data delete( erf_cache, exp_cache ) + return end subroutine calc_mean_w_up_down_component diff --git a/src/CLUBB_core/sfc_varnce_module.F90 b/src/CLUBB_core/sfc_varnce_module.F90 index b0364d4e3..a73885ed0 100644 --- a/src/CLUBB_core/sfc_varnce_module.F90 +++ b/src/CLUBB_core/sfc_varnce_module.F90 @@ -204,9 +204,9 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & !-------------------------- Begin Code -------------------------- - !$acc declare create( uf, depth_pos_wpthlp, min_wp2_sfc_val, & - !$acc um_sfc_sqd, vm_sfc_sqd, usp2_sfc, vsp2_sfc, & - !$acc ustar, zeta, wp2_splat_sfc_correction ) + !$acc enter data create( uf, depth_pos_wpthlp, min_wp2_sfc_val, & + !$acc um_sfc_sqd, vm_sfc_sqd, usp2_sfc, vsp2_sfc, & + !$acc ustar, zeta, wp2_splat_sfc_correction ) up2_sfc_coef = clubb_params(iup2_sfc_coef) @@ -242,7 +242,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & end do end if - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ! Find thickness of layer near surface with positive heat flux. ! This is used when l_vary_convect_depth=.true. in order to determine wp2. @@ -270,7 +270,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & if ( l_andre_1978 ) then ! Calculate ^2 and ^2. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol um_sfc_sqd(i) = um(i,2)**2 vm_sfc_sqd(i) = vm(i,2)**2 @@ -278,13 +278,13 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & !$acc end parallel loop ! Calculate surface friction velocity, u*. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ustar(i) = max( ( upwp_sfc(i)**2 + vpwp_sfc(i)**2 )**(one_fourth), ufmin ) end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol if ( abs(wpthlp(i,1)) > eps) then @@ -317,7 +317,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & ! zeta <= -0.212. ! 3) The surface correlation of rt & thl is 1. ! Brian Griffin; February 2, 2008. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol if ( zeta(i) < zero ) then @@ -353,7 +353,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol thlp2(i,1) = max( thl_tol**2, thlp2(i,1) ) rtp2(i,1) = max( rt_tol**2, rtp2(i,1) ) @@ -365,7 +365,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & ! where z_i is the height of the mixed layer. The value of CLUBB's ! upward component of mixing length, Lscale_up, at the surface will be ! used as z_i. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol wstar = ( (one/T0) * grav * wpthlp(i,1) * Lscale_up(i,2) )**(one_third) @@ -399,7 +399,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & ! of (w,thl) or (w,rt) outside (-1,1). ! Perhaps in the future we should also ensure that the correlations ! of (w,u) and (w,v) are not outside (-1,1). - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol min_wp2_sfc_val(i) & = max( w_tol_sqd, & @@ -408,7 +408,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol if ( wp2(i,1) - tau_zm(i,1) * lhs_splat_wp2(i,1) * wp2(i,1) & < min_wp2_sfc_val(i) ) then @@ -424,7 +424,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol usp2_sfc(i) = usp2_sfc(i) - 0.5_core_rknd * wp2_splat_sfc_correction(i) vsp2_sfc(i) = vsp2_sfc(i) - 0.5_core_rknd * wp2_splat_sfc_correction(i) @@ -436,7 +436,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & ! |_sfc = * [ ^2 / ( ^2 + ^2 ) ] ! + * [ ^2 / ( ^2 + ^2 ) ]; ! where ^2 + ^2 /= 0. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol up2(i,1) = usp2_sfc(i) * ( um_sfc_sqd(i) / max( um_sfc_sqd(i) + vm_sfc_sqd(i) , eps ) ) & + vsp2_sfc(i) * ( vm_sfc_sqd(i) / max( um_sfc_sqd(i) + vm_sfc_sqd(i) , eps ) ) @@ -448,7 +448,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & ! |_sfc = * [ ^2 / ( ^2 + ^2 ) ] ! + * [ ^2 / ( ^2 + ^2 ) ]; ! where ^2 + ^2 /= 0. - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol vp2(i,1) = vsp2_sfc(i) * ( um_sfc_sqd(i) / max( um_sfc_sqd(i) + vm_sfc_sqd(i) , eps ) ) & + usp2_sfc(i) * ( vm_sfc_sqd(i) / max( um_sfc_sqd(i) + vm_sfc_sqd(i) , eps ) ) @@ -516,7 +516,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & else ! Previous code. ! Compute ustar^2 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol ustar2 = sqrt( upwp_sfc(i) * upwp_sfc(i) + vpwp_sfc(i) * vpwp_sfc(i) ) @@ -543,7 +543,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & !$acc end parallel loop ! Compute estimate for surface second order moments - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol wp2(i,1) = a_const * uf(i)**2 up2(i,1) = up2_sfc_coef * a_const * uf(i)**2 ! From Andre, et al. 1978 @@ -557,7 +557,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & ! Brian Griffin; February 2, 2008. if ( .not. l_vary_convect_depth ) then - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol thlp2(i,1) = 0.4_core_rknd * a_const * ( wpthlp(i,1) / uf(i) )**2 rtp2(i,1) = 0.4_core_rknd * a_const * ( wprtp_sfc(i) / uf(i) )**2 @@ -566,7 +566,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & end do !$acc end parallel loop else - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol thlp2(i,1) = ( wpthlp(i,1) / uf(i) )**2 / ( max_mag_correlation_flux**2 * a_const ) rtp2(i,1) = ( wprtp_sfc(i) / uf(i) )**2 / ( max_mag_correlation_flux**2 * a_const ) @@ -575,7 +575,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & !$acc end parallel loop end if - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol thlp2(i,1) = max( thl_tol**2, thlp2(i,1) ) rtp2(i,1) = max( rt_tol**2, rtp2(i,1) ) @@ -587,7 +587,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & ! of (w,thl) or (w,rt) outside (-1,1). ! Perhaps in the future we should also ensure that the correlations ! of (w,u) and (w,v) are not outside (-1,1). - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol min_wp2_sfc_val(i) & = max( w_tol_sqd, & @@ -596,7 +596,7 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol if ( wp2(i,1) - tau_zm(i,1) * lhs_splat_wp2(i,1) * wp2(i,1) & < min_wp2_sfc_val(i) ) then @@ -674,13 +674,13 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & endif ! l_andre_1978 ! Clip wp2 at wp2_max, same as in advance_wp2_wp3 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol wp2(i,1) = min( wp2(i,1), wp2_max ) end do !$acc end parallel loop - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol if ( abs(gr%zm(i,1)-sfc_elevation(i)) > abs(gr%zm(i,1)+sfc_elevation(i))*eps/2 ) then @@ -790,6 +790,10 @@ subroutine calc_sfc_varnce( nz, ngrdcol, gr, dt, sfc_elevation, & endif ! clubb_at_least_debug_level ( 2 )! Update surface stats + !$acc exit data delete( uf, depth_pos_wpthlp, min_wp2_sfc_val, & + !$acc um_sfc_sqd, vm_sfc_sqd, usp2_sfc, vsp2_sfc, & + !$acc ustar, zeta, wp2_splat_sfc_correction ) + return end subroutine calc_sfc_varnce diff --git a/src/CLUBB_core/sigma_sqd_w_module.F90 b/src/CLUBB_core/sigma_sqd_w_module.F90 index 28027daed..25470cf27 100644 --- a/src/CLUBB_core/sigma_sqd_w_module.F90 +++ b/src/CLUBB_core/sigma_sqd_w_module.F90 @@ -94,7 +94,7 @@ subroutine compute_sigma_sqd_w( nz, ngrdcol, & ! ---- Begin Code ---- - !$acc declare create( max_corr_w_x_sqd ) + !$acc enter data create( max_corr_w_x_sqd ) !---------------------------------------------------------------- ! Compute sigma_sqd_w with new formula from Vince @@ -139,6 +139,8 @@ subroutine compute_sigma_sqd_w( nz, ngrdcol, & end do !$acc end parallel loop + !$acc exit data delete( max_corr_w_x_sqd ) + return end subroutine compute_sigma_sqd_w diff --git a/src/CLUBB_core/turbulent_adv_pdf.F90 b/src/CLUBB_core/turbulent_adv_pdf.F90 index a494a79af..46eb4241c 100644 --- a/src/CLUBB_core/turbulent_adv_pdf.F90 +++ b/src/CLUBB_core/turbulent_adv_pdf.F90 @@ -892,7 +892,7 @@ pure subroutine xpyp_term_ta_pdf_rhs( nz, ngrdcol, gr, term_wpxpyp_explicit, & ! Set lower boundary value to 0 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol rhs_ta(i,1) = zero end do @@ -947,7 +947,7 @@ pure subroutine xpyp_term_ta_pdf_rhs( nz, ngrdcol, gr, term_wpxpyp_explicit, & end if ! Set upper boundary value to 0 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol rhs_ta(i,nz) = zero end do @@ -1018,7 +1018,7 @@ pure subroutine xpyp_term_ta_pdf_rhs_godunov( nz, ngrdcol, gr, & ! Intent(in) !$acc copyout( rhs_ta ) ! Set lower boundary value to 0 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol rhs_ta(i,1) = 0.0_core_rknd end do @@ -1042,7 +1042,7 @@ pure subroutine xpyp_term_ta_pdf_rhs_godunov( nz, ngrdcol, gr, & ! Intent(in) !$acc end parallel loop ! Set upper boundary value to 0 - !$acc parallel loop default(present) + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol rhs_ta(i,nz) = 0.0_core_rknd end do diff --git a/src/clubb_driver.F90 b/src/clubb_driver.F90 index 08dec564f..a8d8b6195 100644 --- a/src/clubb_driver.F90 +++ b/src/clubb_driver.F90 @@ -1513,15 +1513,7 @@ subroutine run_clubb & l_host_applies_sfc_fluxes, & ! Intent(in) saturation_formula, & ! Intent(in) l_input_fields, & ! Intent(in) - iiPDF_type, & ! intent(in) - ipdf_call_placement, & ! intent(in) - l_predict_upwp_vpwp, & ! intent(in) - l_min_xp2_from_corr_wx, & ! intent(in) - l_prescribed_avg_deltaz, & ! intent(in) - l_damp_wp2_using_em, & ! intent(in) - l_stability_correct_tau_zm, & ! intent(in) - l_enable_relaxed_clipping, & ! intent(in) - l_diag_Lscale_from_tau, & ! intent(in) + clubb_config_flags, & ! intent(in) err_code_dummy ) ! Intent(out) ! Setup grid From 398b8a728ce9f39a6ebc4be4e3cabba7c4bccc36 Mon Sep 17 00:00:00 2001 From: huebler Date: Tue, 11 Jul 2023 13:08:17 -0500 Subject: [PATCH 2/7] Improving run_bin_diff.py to accept an argument to print the differences, and an argument to define a threshold to ignore differences. --- run_scripts/run_bindiff_all.py | 153 ++++++++++++++++++++++++++------- 1 file changed, 124 insertions(+), 29 deletions(-) mode change 100644 => 100755 run_scripts/run_bindiff_all.py diff --git a/run_scripts/run_bindiff_all.py b/run_scripts/run_bindiff_all.py old mode 100644 new mode 100755 index 816868333..9357322aa --- a/run_scripts/run_bindiff_all.py +++ b/run_scripts/run_bindiff_all.py @@ -5,52 +5,76 @@ import sys import os import subprocess +import numpy as np +import tabulate scriptPath = os.path.realpath(__file__)[0:-18] #print(scriptPath) +# Threshold used to ignore field values for calculating % diff +field_threshold = 1.0e-7 + def main(): parser = argparse.ArgumentParser() parser.add_argument("-o", "--output", action="store_true", help="add this option if you want to get output for each file compared put into a file made in the cwd.") + parser.add_argument("-d", "--diffs", action="store_true", help="print the differences found in variables") + parser.add_argument("-t", "--threshold", dest="threshold", type=float, action="store", help="print the differences found in variables") parser.add_argument("dirs", nargs=2, help="need 2 clubb output directories to diff. Usage: python run_bindiff_all.py dir_path1 dir_path2") args = parser.parse_args() paths_exist = os.path.exists(args.dirs[0]) and os.path.exists(args.dirs[1]) - if(paths_exist): + if( paths_exist ): + print("Directory 1 is", args.dirs[0]) - print("Directory 2 is", args.dirs[1] + "\n") + print("Directory 2 is", args.dirs[1]) + dir1_files = os.listdir(args.dirs[0]) dir2_files = os.listdir(args.dirs[1]) + #print(dir1_files, dir2_files) + cases = get_cases(dir1_files, dir2_files) - diffs = run_diff(cases, args.dirs[0], args.dirs[1]) - - diff_in_files = run_py_diff(diffs, args.dirs[0], args.dirs[1],args.output) - if(args.output and diff_in_files): + if ( args.threshold is not None ): + tot_abs_diff_thresh = args.threshold + else: + tot_abs_diff_thresh = 0.0 + + print( "Using reporting threshold: ", tot_abs_diff_thresh, "\n" ) + + diff_in_files = run_diff(cases, args.dirs[0], args.dirs[1], args.output, args.diffs, tot_abs_diff_thresh) + + if( args.output and diff_in_files ): print("\nOutput files made and placed in " + os.getcwd()) - if(diff_in_files): + if( diff_in_files ): print("\nThere were differences detected in netCDF (*.nc) files.") sys.exit(1) - if(args.output): - print("\nNo differences detected, so no files were made!") else: - print("\nNo differences detected!") + if( args.output ): + print("\nNo differences detected, so no files were made!") + else: + print("\nNo differences detected!") else: print("Chosen directories do not exist. Please input valid directories") sys.exit(1) + def get_cases(dir1_files, dir2_files): + cases = [] + with open(scriptPath + "RUN_CASES") as file: for line in file: #print(line.strip()) case = line.strip() - #If statement goes through every case in the RUN_CASES file and checks if the case is able to be run, and if that case has the files in the directories to diff it. If it passes all these conditions, the case is added to the runnable cases. + + #If statement goes through every case in the RUN_CASES file and checks if the case is able to be run, + # and if that case has the files in the directories to diff it. If it passes all these conditions, + # the case is added to the runnable cases. if(line[0] != "!" and len(line) > 1 and case + "_zt.nc" in dir1_files and case + "_zt.nc" in dir2_files and case + "_zm.nc" in dir1_files and case + "_zm.nc" in dir2_files and case + "_sfc.nc" in dir1_files and case + "_sfc.nc" in dir2_files): @@ -59,32 +83,103 @@ def get_cases(dir1_files, dir2_files): return cases -def run_diff(cases, dir1_path, dir2_path): - diff_list = [] +def run_diff(cases, dir1_path, dir2_path, l_output, l_diffs, thresh): + + diff_in_files = False + nc_data_formats = ["_zm.nc", "_zt.nc", "_sfc.nc"] -#This for loop runs through all the cases you have the files to check, and each netcdf format. It runs the linux diff on the bonary netcdf files to see which files are needed to be sent through the diff_netcdf_outputs.py script. + + # This for loop runs through all the cases you have the files to check, + # and each netcdf format. It runs the linux diff on the binary netcdf files + # to see which files should be looked at more closely for case in cases: + print("DIFFING " + case + " netCDF (*.nc) files") + for out_form in nc_data_formats: - if(len(subprocess.getoutput("diff " + dir1_path + "/" + case + out_form + " " + dir2_path + "/" + case + out_form)) > 0): - diff_list.append(case + out_form) - return diff_list + # Call linux diff command, if it shows a diff, check the netcdf values to confirm + if( len(subprocess.getoutput("diff " + dir1_path + "/" + case + out_form + " " + dir2_path + "/" + case + out_form)) > 0 ): + diff_in_files = run_py_diff(case+out_form, dir1_path, dir2_path, l_output, l_diffs, thresh) or diff_in_files + + return diff_in_files + + +def run_py_diff( test_file, dir1_path, dir2_path, l_save_output, l_print_diffs, tot_abs_diff_thresh ): -def run_py_diff(diff_list, dir1_path, dir2_path, make_out_files): diff_in_files = False - for test_file in diff_list: - #print("DIFFING " + test_file + "netCDF (*.nc) files") - #print(dir1_path + test_file) - #print(dir2_path + test_file) - output = subprocess.getoutput("python " + scriptPath + "diff_netcdf_outputs.py " + dir1_path + test_file + " " + dir2_path + test_file) - if(len(output) > 0): + any_diff_above_thresh = False + + #print("DIFFING " + test_file + "netCDF (*.nc) files") + #print(dir1_path + test_file) + #print(dir2_path + test_file) + + # Create datasets from nc files + dset1 = netCDF4.Dataset(dir1_path+"/"+test_file) + dset2 = netCDF4.Dataset(dir2_path+"/"+test_file) + + # Define table header, these are the values we're going to output + table = [[ 'Var', + 'Max Abs Diff', + 'Max % Diff', + 'Total Abs Diff', + 'Avg Abs Diff' ]] + + for var in dset1.variables: + + # The CLUBB netcdf variables we are interested in are all + # 4 dimensional (time, altitude, latitude, longitude), + # but currently we don't actually have latitude or longitude in clubb, so those + # dimensions are hardcoded to be 1. If in the future we remove those useless + # dimensions (unlikely), then the variables of interested would be 2D. So + # for futureproofing, we will just check all variables with more than 1 dimension. + if( dset1[var].ndim > 1 ): + diff_in_files = True - print("*** Differences detected in " + test_file + "! ***") - if(make_out_files): - f = open(test_file+"-diff_out", "w") - f.write(output) - f.close() + + abs_diff = abs( dset1[var][:,:,:] - dset2[var][:,:,:] ) + + # If the sum of all absolute differences is less than the threshold, then ignore this var + if ( np.sum(abs_diff) < tot_abs_diff_thresh ): + continue + else: + any_diff_above_thresh = True + + # Clip fields to ignore tiny values for the % diff + field_1_clipped = np.clip( dset1[var][:,:,:], a_min = field_threshold, a_max = 9999999.0 ) + field_2_clipped = np.clip( dset2[var][:,:,:], a_min = field_threshold, a_max = 9999999.0 ) + + # Calculate the percent difference, 100 * (a-b) / ((a+b)/2) + percent_diff = 200.0 * ( field_1_clipped-field_2_clipped ) \ + / ( field_1_clipped+field_2_clipped ) + + # Append the table array with the values we want to print + table.append( [ var, + np.max(abs_diff), + np.max(percent_diff), + np.sum(abs_diff), + np.average(abs_diff) ] ) + + # Print results + header = "************** Differences detected in " + test_file + "! **************" + print( header ) + + if( any_diff_above_thresh ): + output = tabulate.tabulate(table, headers='firstrow') + else: + output = "Total absolute value of all differences are below threshold: " + str(tot_abs_diff_thresh) + + if( l_print_diffs ): + # Print a very pretty table of the values + print(output+"\n") + + if( l_save_output ): + # Save all the output to a file + f = open(test_file + "-diff_out", "w") + f.write(header) + f.write(output) + f.close() + return diff_in_files From 0b9f422518f7159d7c2d040cc3b2b818a71e9143 Mon Sep 17 00:00:00 2001 From: Tyler Cernik <80285381+cernikt@users.noreply.github.com> Date: Thu, 13 Jul 2023 21:23:09 -0500 Subject: [PATCH 3/7] Update requirements.txt updated Pillow version to resolve related dependabot alerts --- postprocessing/pyplotgen/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postprocessing/pyplotgen/requirements.txt b/postprocessing/pyplotgen/requirements.txt index d91465a59..bc82ad7e9 100755 --- a/postprocessing/pyplotgen/requirements.txt +++ b/postprocessing/pyplotgen/requirements.txt @@ -1,4 +1,4 @@ -Pillow~=8.2.0 +Pillow~=10.0.0 netCDF4~=1.5.3 matplotlib~=3.2.1 numpy~=1.18.2 From 4c8e71aad6664c648a84ba517dada0994b796fe5 Mon Sep 17 00:00:00 2001 From: Tyler Cernik <80285381+cernikt@users.noreply.github.com> Date: Thu, 13 Jul 2023 21:37:32 -0500 Subject: [PATCH 4/7] Update requirements.txt Also updating numpy version to 1.25.1 --- postprocessing/pyplotgen/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/postprocessing/pyplotgen/requirements.txt b/postprocessing/pyplotgen/requirements.txt index bc82ad7e9..b2599ffd5 100755 --- a/postprocessing/pyplotgen/requirements.txt +++ b/postprocessing/pyplotgen/requirements.txt @@ -1,7 +1,7 @@ Pillow~=10.0.0 netCDF4~=1.5.3 matplotlib~=3.2.1 -numpy~=1.18.2 +numpy~=1.25.1 cycler~=0.10.0 fpdf~=1.7.2 opencv-python~=4.4.0 From 13de9b8e3a8d758a44b8408fb64cc2ac5ffd2c23 Mon Sep 17 00:00:00 2001 From: Vincent Larson Date: Sat, 15 Jul 2023 09:20:04 -0500 Subject: [PATCH 5/7] Loop to `nz-1` instead of `nz` because upper boundary condition is set in the lines immediately below. --- src/CLUBB_core/mean_adv.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CLUBB_core/mean_adv.F90 b/src/CLUBB_core/mean_adv.F90 index f111495f1..69a5e2f20 100644 --- a/src/CLUBB_core/mean_adv.F90 +++ b/src/CLUBB_core/mean_adv.F90 @@ -244,7 +244,7 @@ pure subroutine term_ma_zt_lhs( nz, ngrdcol, wm_zt, weights_zt2zm, & ! Intent(in ! Most of the interior model; normal conditions. !$acc parallel loop gang vector collapse(2) default(present) - do k = 2, nz, 1 + do k = 2, nz-1, 1 do i = 1, ngrdcol ! Thermodynamic superdiagonal: [ x var_zt(k+1,) ] From 29ad556fe9b55c4071e4fc4b6145b79d471f7a1a Mon Sep 17 00:00:00 2001 From: domkesteffen <42725900+domkesteffen@users.noreply.github.com> Date: Tue, 18 Jul 2023 17:49:07 -0500 Subject: [PATCH 6/7] Added code to subroutine run_clubb in src/clubb_driver.F90 which prints git log and git diff information to the _setup.txt output file. (#1101) CLUBB ticket #1098 --- src/clubb_driver.F90 | 51 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/src/clubb_driver.F90 b/src/clubb_driver.F90 index a8d8b6195..e82baa4c4 100644 --- a/src/clubb_driver.F90 +++ b/src/clubb_driver.F90 @@ -1148,6 +1148,39 @@ subroutine run_clubb & ! Print the date and time call write_date( l_write_to_file, iunit ) + call write_text( "", l_write_to_file, iunit ) + call write_text( "--------------------------------------------------", & + l_write_to_file, iunit ) + call write_text( "Latest git log entry", l_write_to_file, iunit ) + call write_text( "--------------------------------------------------", & + l_write_to_file, iunit ) + call write_text( "", l_write_to_file, iunit ) + if ( l_write_to_file ) close( unit=iunit ) + + if ( l_write_to_file ) then + call execute_command_line( 'git log -n 1' ) + call execute_command_line( 'echo "Branch name:" >> '//case_info_file ) + call execute_command_line( 'git rev-parse --abbrev-ref HEAD >> '//case_info_file ) + call execute_command_line( 'echo "(If no branch name is shown HEAD may be detached)"'// & + ' >> '//case_info_file ) + call execute_command_line( 'echo "" >> '//case_info_file ) + call execute_command_line( 'git log -n 1 >> '//case_info_file ) + end if + + if ( l_write_to_file ) then + open(unit=iunit, file=case_info_file, status='OLD', action='write', position='APPEND') + end if + + call write_text( NEW_LINE('A')//"A detailed git diff can be found "// & + "at the end of this file"//NEW_LINE('A'), & + l_write_to_file, iunit ) + + call write_text( "--------------------------------------------------", & + l_write_to_file, iunit ) + call write_text( "Tunable Parameters:", l_write_to_file, iunit) + call write_text( "--------------------------------------------------", & + l_write_to_file, iunit ) + ! Print the list of parameters that are being used before the run. call write_text( "Parameter Value", l_write_to_file, iunit, '(4x,A24)') call write_text( "--------- -----", l_write_to_file, iunit, '(4x,A24)') @@ -1497,9 +1530,23 @@ subroutine run_clubb & call write_text( "--------------------------------------------------", & l_write_to_file, iunit ) - call print_clubb_config_flags( iunit, clubb_config_flags ) ! Intent(in) + call print_clubb_config_flags( fstdout, clubb_config_flags ) ! Intent(in) + call print_clubb_config_flags( iunit, clubb_config_flags ) ! Intent(in) - if ( l_write_to_file ) close(unit=iunit) + call write_text( "--------------------------------------------------", & + l_write_to_file, iunit ) + call write_text( "git diff src/", l_write_to_file, iunit ) + call write_text( "--------------------------------------------------", & + l_write_to_file, iunit ) + + if ( l_write_to_file ) then + write(fstdout, *) "See *setup.txt file in output folder"//NEW_LINE('A') + close(unit=iunit) + end if + + if ( l_write_to_file ) then + call execute_command_line( 'git --no-pager diff >> '//case_info_file ) + end if end if ! clubb_at_least_debug_level( 1 ) From 1f4d0453a4d5c225be028a20ba8cfe44f3238336 Mon Sep 17 00:00:00 2001 From: huebleruwm <37674341+huebleruwm@users.noreply.github.com> Date: Tue, 18 Jul 2023 20:27:07 -0500 Subject: [PATCH 7/7] Openacc tweaks and cleanup 2 (#1099) * Making all end parallel directives specify end loop * Replacing last acc declare in a procedure with acc enter/exit data commands * Removing pure declarations, turns out they dont really improve performance and openmp isn't allowed within them. * Adding explicit directives to penta_lu and tridiag_lu, as opposed to the previously used kernels directive. Also splitting up a loop to improve GPU performance. * Splitting up a couple loops for performance reasons. * Missed one somehow --- src/CLUBB_core/adg1_adg2_3d_luhar_pdf.F90 | 2 +- src/CLUBB_core/advance_clubb_core_module.F90 | 4 +- src/CLUBB_core/advance_helper_module.F90 | 2 +- .../advance_windm_edsclrm_module.F90 | 108 +++++++++--------- src/CLUBB_core/advance_wp2_wp3_module.F90 | 57 +++++---- src/CLUBB_core/advance_xm_wpxp_module.F90 | 12 +- src/CLUBB_core/advance_xp2_xpyp_module.F90 | 8 +- src/CLUBB_core/advance_xp3_module.F90 | 4 +- src/CLUBB_core/calc_roots.F90 | 6 +- src/CLUBB_core/clip_explicit.F90 | 14 +-- src/CLUBB_core/clip_semi_implicit.F90 | 2 +- src/CLUBB_core/clubb_api_module.F90 | 2 +- src/CLUBB_core/corr_varnce_module.F90 | 2 +- src/CLUBB_core/diffusion.F90 | 6 +- src/CLUBB_core/fill_holes.F90 | 18 ++- src/CLUBB_core/grid_class.F90 | 18 +-- src/CLUBB_core/mean_adv.F90 | 4 +- src/CLUBB_core/mono_flux_limiter.F90 | 2 +- src/CLUBB_core/numerical_check.F90 | 2 +- src/CLUBB_core/penta_lu_solver.F90 | 102 ++++++++++------- src/CLUBB_core/tridiag_lu_solver.F90 | 48 +++++--- src/CLUBB_core/turbulent_adv_pdf.F90 | 8 +- 22 files changed, 245 insertions(+), 186 deletions(-) diff --git a/src/CLUBB_core/adg1_adg2_3d_luhar_pdf.F90 b/src/CLUBB_core/adg1_adg2_3d_luhar_pdf.F90 index 09334a800..acf9fe634 100644 --- a/src/CLUBB_core/adg1_adg2_3d_luhar_pdf.F90 +++ b/src/CLUBB_core/adg1_adg2_3d_luhar_pdf.F90 @@ -1360,7 +1360,7 @@ subroutine backsolve_Luhar_params( Sk_max, Skx, & ! In end subroutine backsolve_Luhar_params !============================================================================= - pure function max_cubic_root( a_coef, b_coef, c_coef, d_coef ) & + function max_cubic_root( a_coef, b_coef, c_coef, d_coef ) & result( max_root ) ! Description: diff --git a/src/CLUBB_core/advance_clubb_core_module.F90 b/src/CLUBB_core/advance_clubb_core_module.F90 index 925448535..4ac548303 100644 --- a/src/CLUBB_core/advance_clubb_core_module.F90 +++ b/src/CLUBB_core/advance_clubb_core_module.F90 @@ -5157,7 +5157,7 @@ subroutine calc_trapezoid_zm( nz, ngrdcol, gr, variable_zm, variable_zt, & * ( gr%zm(i,k) - gr%zt(i,k) ) * gr%invrs_dzm(i,k) end do end do - !$acc end parallel + !$acc end parallel loop return end subroutine calc_trapezoid_zm @@ -5514,7 +5514,7 @@ subroutine set_Lscale_max( ngrdcol, l_implemented, host_dx, host_dy, & end subroutine set_Lscale_max !=============================================================================== - pure subroutine calculate_thlp2_rad & + subroutine calculate_thlp2_rad & ( nz, rcm_zm, thlprcp, radht_zm, & ! Intent(in) clubb_params, & ! Intent(in) thlp2_forcing ) ! Intent(inout) diff --git a/src/CLUBB_core/advance_helper_module.F90 b/src/CLUBB_core/advance_helper_module.F90 index 19602e375..5fb4c88b2 100644 --- a/src/CLUBB_core/advance_helper_module.F90 +++ b/src/CLUBB_core/advance_helper_module.F90 @@ -2023,7 +2023,7 @@ function vertical_avg( total_idx, rho_ds, field, dz ) end function vertical_avg !============================================================================= - pure function vertical_integral( total_idx, rho_ds, & + function vertical_integral( total_idx, rho_ds, & field, dz ) ! Description: diff --git a/src/CLUBB_core/advance_windm_edsclrm_module.F90 b/src/CLUBB_core/advance_windm_edsclrm_module.F90 index 37c639c24..3e502d531 100644 --- a/src/CLUBB_core/advance_windm_edsclrm_module.F90 +++ b/src/CLUBB_core/advance_windm_edsclrm_module.F90 @@ -294,7 +294,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & do i = 1, ngrdcol nu_zero(i) = zero end do - !$acc end parallel + !$acc end parallel loop !$acc parallel loop gang vector collapse(2) default(present) do k = 1, nz @@ -302,7 +302,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & Km_zm_p_nu10(i,k) = Km_zm(i,k) + nu_vert_res_dep%nu10(i) end do end do - !$acc end parallel + !$acc end parallel loop l_perturbed_wind = ( .not. l_predict_upwp_vpwp ) .and. l_linearize_pbl_winds @@ -320,7 +320,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & end do end do end do - !$acc end parallel + !$acc end parallel loop end if if ( .not. l_predict_upwp_vpwp ) then @@ -333,7 +333,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & Km_zt(i,k) = max( Km_zt(i,k), zero ) end do end do - !$acc end parallel + !$acc end parallel loop ! Calculate diffusion terms call diffusion_zt_lhs( nz, ngrdcol, gr, Km_zm, Km_zt, nu_vert_res_dep%nu10, & ! In @@ -357,7 +357,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & ! Thermodynamic subdiagonal: [ x xm(k-1,) ] lhs_diff(km1_tdiag,i,2) = zero end do - !$acc end parallel + !$acc end parallel loop else !$acc parallel loop gang vector default(present) do i = 1, ngrdcol @@ -374,7 +374,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & ! Thermodynamic subdiagonal: [ x xm(k-1,) ] lhs_diff(km1_tdiag,i,2) = zero end do - !$acc end parallel + !$acc end parallel loop end if if ( l_lmm_stepping ) then @@ -385,7 +385,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & vm_old(i,k) = vm(i,k) end do end do - !$acc end parallel + !$acc end parallel loop end if ! l_lmm_stepping !---------------------------------------------------------------- @@ -419,14 +419,14 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & wind_speed(i,k) = max( sqrt( um(i,k)**2 + vm(i,k)**2 ), eps ) end do end do - !$acc end parallel + !$acc end parallel loop ! Compute u_star_sqd according to the definition of u_star. !$acc parallel loop gang vector default(present) do i = 1, ngrdcol u_star_sqd(i) = sqrt( upwp(i,1)**2 + vpwp(i,1)**2 ) end do - !$acc end parallel + !$acc end parallel loop ! Compute the explicit portion of the um equation. ! Build the right-hand side vector. @@ -464,7 +464,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & upwp(i,k) = -one_half * xpwp(i,k) end do end do - !$acc end parallel + !$acc end parallel loop call calc_xpwp( nz, ngrdcol, gr, & Km_zm_p_nu10, vm, & @@ -476,7 +476,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & vpwp(i,k) = -one_half * xpwp(i,k) end do end do - !$acc end parallel + !$acc end parallel loop ! A zero-flux boundary condition at the top of the model, d(xm)/dz = 0, ! means that x'w' at the top model level is 0, @@ -486,7 +486,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & upwp(i,nz) = zero vpwp(i,nz) = zero end do - !$acc end parallel + !$acc end parallel loop ! Compute the implicit portion of the um and vm equations. ! Build the left-hand side matrix. @@ -522,7 +522,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & um(i,k) = solution(i,k,windm_edsclrm_um) end do end do - !$acc end parallel + !$acc end parallel loop !---------------------------------------------------------------- ! Update meridional (south-to-north) component of mean wind, vm @@ -533,7 +533,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & vm(i,k) = solution(i,k,windm_edsclrm_vm) end do end do - !$acc end parallel + !$acc end parallel loop if ( l_stats_samp ) then @@ -571,7 +571,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & um(i,1) = um(i,2) vm(i,1) = vm(i,2) end do - !$acc end parallel + !$acc end parallel loop if ( l_lmm_stepping ) then !$acc parallel loop gang vector collapse(2) default(present) @@ -581,7 +581,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & vm(i,k) = one_half * ( vm_old(i,k) + vm(i,k) ) end do end do - !$acc end parallel + !$acc end parallel loop endif ! l_lmm_stepping ) then if ( uv_sponge_damp_settings%l_sponge_damping ) then @@ -645,7 +645,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & upwp(i,k) = upwp(i,k) - one_half * xpwp(i,k) end do end do - !$acc end parallel + !$acc end parallel loop call calc_xpwp( nz, ngrdcol, gr, & Km_zm_p_nu10, vm, & @@ -657,7 +657,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & vpwp(i,k) = vpwp(i,k) - one_half * xpwp(i,k) end do end do - !$acc end parallel + !$acc end parallel loop ! Adjust um and vm if nudging is turned on. if ( l_uv_nudge ) then @@ -680,7 +680,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & vm(i,k) = vm(i,k) - ( ( vm(i,k) - vm_ref(i,k) ) * (dt/ts_nudge) ) end do end do - !$acc end parallel + !$acc end parallel loop if ( l_stats_samp ) then !$acc update host( um, vm ) @@ -800,14 +800,14 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & wind_speed_pert(i,k) = max( sqrt( (um_pert(i,k))**2 + (vm_pert(i,k))**2 ), eps ) end do end do - !$acc end parallel + !$acc end parallel loop ! Compute u_star_sqd according to the definition of u_star. !$acc parallel loop gang vector default(present) do i = 1, ngrdcol u_star_sqd_pert(i) = sqrt( upwp_pert(i,1)**2 + vpwp_pert(i,1)**2 ) end do - !$acc end parallel + !$acc end parallel loop ! Compute the explicit portion of the um equation. ! Build the right-hand side vector. @@ -845,7 +845,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & upwp_pert(i,k) = -one_half * xpwp(i,k) end do end do - !$acc end parallel + !$acc end parallel loop call calc_xpwp( nz, ngrdcol, gr, & Km_zm_p_nu10, vm_pert, & @@ -857,7 +857,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & vpwp_pert(i,k) = -one_half * xpwp(i,k) end do end do - !$acc end parallel + !$acc end parallel loop ! A zero-flux boundary condition at the top of the model, d(xm)/dz = 0, ! means that x'w' at the top model level is 0, @@ -867,7 +867,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & upwp_pert(i,nz) = zero vpwp_pert(i,nz) = zero end do - !$acc end parallel + !$acc end parallel loop ! Compute the implicit portion of the um and vm equations. ! Build the left-hand side matrix. @@ -903,7 +903,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & um_pert(i,k) = solution(i,k,windm_edsclrm_um) end do end do - !$acc end parallel + !$acc end parallel loop !---------------------------------------------------------------- ! Update meridional (south-to-north) component of mean wind, vm @@ -914,7 +914,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & vm_pert(i,k) = solution(i,k,windm_edsclrm_vm) end do end do - !$acc end parallel + !$acc end parallel loop ! The values of um(1) and vm(1) are located below the model surface and ! do not affect the rest of the model. The values of um(1) or vm(1) are @@ -927,7 +927,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & um_pert(i,1) = um_pert(i,2) vm_pert(i,1) = vm_pert(i,2) end do - !$acc end parallel + !$acc end parallel loop ! Second part of momentum (implicit component) @@ -943,7 +943,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & upwp_pert(i,k) = upwp_pert(i,k) - one_half * xpwp(i,k) end do end do - !$acc end parallel + !$acc end parallel loop call calc_xpwp( nz, ngrdcol, gr, & Km_zm_p_nu10, vm_pert, & @@ -955,7 +955,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & vpwp_pert(i,k) = vpwp_pert(i,k) - one_half * xpwp(i,k) end do end do - !$acc end parallel + !$acc end parallel loop if ( l_tke_aniso ) then @@ -1034,7 +1034,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & Kmh_zt(i,k) = max( Kmh_zt(i,k), zero ) end do end do - !$acc end parallel + !$acc end parallel loop ! Calculate diffusion terms call diffusion_zt_lhs( nz, ngrdcol, gr, Kmh_zm, Kmh_zt, nu_zero, & ! intent(in) @@ -1059,7 +1059,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & ! Thermodynamic subdiagonal: [ x xm(k-1,) ] lhs_diff(km1_tdiag,i,2) = zero end do - !$acc end parallel + !$acc end parallel loop else @@ -1076,7 +1076,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & ! Thermodynamic subdiagonal: [ x xm(k-1,) ] lhs_diff(km1_tdiag,i,2) = zero end do - !$acc end parallel + !$acc end parallel loop end if @@ -1089,7 +1089,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & end do end do end do - !$acc end parallel + !$acc end parallel loop endif ! l_lmm_stepping ! Eddy-scalar surface fluxes, x'w'|_sfc, are applied through an explicit @@ -1130,7 +1130,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & wpedsclrp(i,k,edsclr) = -one_half * xpwp(i,k) end do end do - !$acc end parallel + !$acc end parallel loop end do ! A zero-flux boundary condition at the top of the model, d(xm)/dz = 0, @@ -1142,7 +1142,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & wpedsclrp(i,nz,j) = zero end do end do - !$acc end parallel + !$acc end parallel loop ! Compute the implicit portion of the xm (eddy-scalar) equations. ! Build the left-hand side matrix. @@ -1177,7 +1177,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & end do end do end do - !$acc end parallel + !$acc end parallel loop ! The value of edsclrm(1) is located below the model surface and does not ! effect the rest of the model. The value of edsclrm(1) is simply set to @@ -1188,7 +1188,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & edsclrm(i,1,edsclr) = edsclrm(i,2,edsclr) end do end do - !$acc end parallel + !$acc end parallel loop if ( l_lmm_stepping ) then !$acc parallel loop gang vector collapse(3) default(present) @@ -1199,7 +1199,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & end do end do end do - !$acc end parallel + !$acc end parallel loop endif ! l_lmm_stepping ! Second part of momentum (implicit component) @@ -1218,7 +1218,7 @@ subroutine advance_windm_edsclrm( nz, ngrdcol, gr, dt, & wpedsclrp(i,k,edsclr) = -one_half * xpwp(i,k) end do end do - !$acc end parallel + !$acc end parallel loop end do ! Note that the w'edsclr' terms are not clipped, since we don't compute @@ -2153,7 +2153,7 @@ subroutine compute_uv_tndcy( nz, ngrdcol, solve_type, & xm_gf(i,k) = - fcor(i) * perp_wind_g(i,k) end do end do - !$acc end parallel + !$acc end parallel loop !$acc parallel loop gang vector collapse(2) default(present) do k = 1, nz @@ -2161,7 +2161,7 @@ subroutine compute_uv_tndcy( nz, ngrdcol, solve_type, & xm_cf(i,k) = fcor(i) * perp_wind_m(i,k) end do end do - !$acc end parallel + !$acc end parallel loop case ( windm_edsclrm_vm ) @@ -2175,7 +2175,7 @@ subroutine compute_uv_tndcy( nz, ngrdcol, solve_type, & xm_gf(i,k) = fcor(i) * perp_wind_g(i,k) end do end do - !$acc end parallel + !$acc end parallel loop !$acc parallel loop gang vector collapse(2) default(present) do k = 1, nz @@ -2183,7 +2183,7 @@ subroutine compute_uv_tndcy( nz, ngrdcol, solve_type, & xm_cf(i,k) = -fcor(i) * perp_wind_m(i,k) end do end do - !$acc end parallel + !$acc end parallel loop case default @@ -2198,7 +2198,7 @@ subroutine compute_uv_tndcy( nz, ngrdcol, solve_type, & xm_cf(i,k) = 0._core_rknd end do end do - !$acc end parallel + !$acc end parallel loop end select @@ -2208,7 +2208,7 @@ subroutine compute_uv_tndcy( nz, ngrdcol, solve_type, & xm_tndcy(i,k) = xm_gf(i,k) + xm_cf(i,k) + xm_forcing(i,k) end do end do - !$acc end parallel + !$acc end parallel loop if ( l_stats_samp ) then @@ -2237,7 +2237,7 @@ subroutine compute_uv_tndcy( nz, ngrdcol, solve_type, & xm_tndcy(i,k) = 0.0_core_rknd end do end do - !$acc end parallel + !$acc end parallel loop endif @@ -2329,7 +2329,7 @@ subroutine windm_edsclrm_lhs( nz, ngrdcol, gr, dt, & lhs(2,i,1) = 1.0_core_rknd lhs(3,i,1) = 0.0_core_rknd end do - !$acc end parallel + !$acc end parallel loop ! Add terms to lhs !$acc parallel loop gang vector collapse(2) default(present) @@ -2345,7 +2345,7 @@ subroutine windm_edsclrm_lhs( nz, ngrdcol, gr, dt, & lhs(3,i,k) = 0.5_core_rknd * lhs_diff(3,i,k) end do end do - !$acc end parallel + !$acc end parallel loop ! LHS mean advection term. if ( .not. l_implemented ) then @@ -2355,7 +2355,7 @@ subroutine windm_edsclrm_lhs( nz, ngrdcol, gr, dt, & lhs(1:3,i,k) = lhs(1:3,i,k) + lhs_ma_zt(:,i,k) end do end do - !$acc end parallel + !$acc end parallel loop endif @@ -2367,7 +2367,7 @@ subroutine windm_edsclrm_lhs( nz, ngrdcol, gr, dt, & lhs(2,i,2) = lhs(2,i,2) + invrs_rho_ds_zt(i,2) * gr%invrs_dzt(i,2) & * rho_ds_zm(i,1) * ( u_star_sqd(i) / wind_speed(i,2) ) end do - !$acc end parallel + !$acc end parallel loop end if ! l_imp_sfc_momentum_flux return @@ -2488,7 +2488,7 @@ subroutine windm_edsclrm_rhs( nz, ngrdcol, gr, solve_type, dt, & do i = 1, ngrdcol rhs(i,1) = 0.0_core_rknd end do - !$acc end parallel + !$acc end parallel loop ! Non-boundary rhs calculation, this is a highly vectorized loop !$acc parallel loop gang vector collapse(2) default(present) @@ -2502,7 +2502,7 @@ subroutine windm_edsclrm_rhs( nz, ngrdcol, gr, solve_type, dt, & + invrs_dt * xm(i,k) ! RHS time tendency end do end do - !$acc end parallel + !$acc end parallel loop ! Upper boundary calculation !$acc parallel loop gang vector default(present) @@ -2513,7 +2513,7 @@ subroutine windm_edsclrm_rhs( nz, ngrdcol, gr, solve_type, dt, & + xm_tndcy(i,nz) & ! RHS forcings + invrs_dt * xm(i,nz) ! RHS time tendency end do - !$acc end parallel + !$acc end parallel loop if ( l_stats_samp .and. ixm_ta > 0 ) then @@ -2555,7 +2555,7 @@ subroutine windm_edsclrm_rhs( nz, ngrdcol, gr, solve_type, dt, & * gr%invrs_dzt(i,2) & * rho_ds_zm(i,1) * xpwp_sfc(i) end do - !$acc end parallel + !$acc end parallel loop if ( l_stats_samp .and. ixm_ta > 0 ) then diff --git a/src/CLUBB_core/advance_wp2_wp3_module.F90 b/src/CLUBB_core/advance_wp2_wp3_module.F90 index a79562f8f..921e4309e 100644 --- a/src/CLUBB_core/advance_wp2_wp3_module.F90 +++ b/src/CLUBB_core/advance_wp2_wp3_module.F90 @@ -1985,7 +1985,6 @@ subroutine wp23_lhs( nz, ngrdcol, gr, dt, & do i = 1, ngrdcol k_wp3 = 2*k - 1 - k_wp2 = 2*k ! ------ w'3 ------ @@ -2018,8 +2017,16 @@ subroutine wp23_lhs( nz, ngrdcol, gr, dt, & ! LHS mean advection (ma) and diffusion (diff) terms lhs(5,i,k_wp3) = lhs(5,i,k_wp3) + lhs_ma_zt(3,i,k) + lhs_diff_zt(3,i,k) + end do + end do + !$acc end parallel loop + !$acc parallel loop gang vector collapse(2) default(present) + do k = 2, nz-1, 1 + do i = 1, ngrdcol + k_wp2 = 2*k + ! ------ w'2 ------ ! LHS mean advection (ma) and diffusion (diff) terms @@ -2473,7 +2480,6 @@ subroutine wp23_rhs( nz, ngrdcol, gr, dt, & do i = 1, ngrdcol k_wp3 = 2*k - 1 - k_wp2 = 2*k ! ------ Combine terms for 3rd moment of vertical velocity, ------ ! @@ -2494,8 +2500,17 @@ subroutine wp23_rhs( nz, ngrdcol, gr, dt, & ! RHS "over implicit" pressure term 1 (pr1). rhs(i,k_wp3) = rhs(i,k_wp3) + ( one - gamma_over_implicit_ts ) & * ( - lhs_pr1_wp3(i,k) * wp3(i,k) ) + end do + end do + !$acc end parallel loop + !$acc parallel loop gang vector collapse(2) default(present) + do k = 2, nz-1 + do i = 1, ngrdcol + + k_wp2 = 2*k + ! ------ Combine terms for 2nd moment of vertical velocity, ------ ! ! RHS time tendency. @@ -2906,7 +2921,7 @@ subroutine wp23_rhs( nz, ngrdcol, gr, dt, & end subroutine wp23_rhs !============================================================================= - pure subroutine wp2_term_ta_lhs( nz, ngrdcol, gr, & + subroutine wp2_term_ta_lhs( nz, ngrdcol, gr, & rho_ds_zt, invrs_rho_ds_zm, & lhs_ta_wp2 ) @@ -3027,7 +3042,7 @@ pure subroutine wp2_term_ta_lhs( nz, ngrdcol, gr, & end subroutine wp2_term_ta_lhs !============================================================================= - pure subroutine wp2_terms_ac_pr2_lhs( nz, ngrdcol, gr, C_uu_shr, wm_zt, & + subroutine wp2_terms_ac_pr2_lhs( nz, ngrdcol, gr, C_uu_shr, wm_zt, & lhs_ac_pr2_wp2 ) ! Description: @@ -3147,7 +3162,7 @@ pure subroutine wp2_terms_ac_pr2_lhs( nz, ngrdcol, gr, C_uu_shr, wm_zt, & end subroutine wp2_terms_ac_pr2_lhs !============================================================================= - pure subroutine wp2_term_dp1_lhs( nz, ngrdcol, & + subroutine wp2_term_dp1_lhs( nz, ngrdcol, & C1_Skw_fnc, invrs_tau1m, & lhs_dp1_wp2 ) @@ -3241,7 +3256,7 @@ pure subroutine wp2_term_dp1_lhs( nz, ngrdcol, & end subroutine wp2_term_dp1_lhs !============================================================================= - pure subroutine wp2_term_pr1_lhs( nz, ngrdcol, C4, invrs_tau_C4_zm, & + subroutine wp2_term_pr1_lhs( nz, ngrdcol, C4, invrs_tau_C4_zm, & lhs_pr1_wp2 ) ! Description @@ -3342,7 +3357,7 @@ pure subroutine wp2_term_pr1_lhs( nz, ngrdcol, C4, invrs_tau_C4_zm, & end subroutine wp2_term_pr1_lhs !============================================================================= - pure subroutine wp2_terms_bp_pr2_rhs( nz, ngrdcol, C_uu_buoy, & + subroutine wp2_terms_bp_pr2_rhs( nz, ngrdcol, C_uu_buoy, & thv_ds_zm, wpthvp, & rhs_bp_pr2_wp2 ) @@ -3435,7 +3450,7 @@ pure subroutine wp2_terms_bp_pr2_rhs( nz, ngrdcol, C_uu_buoy, & end subroutine wp2_terms_bp_pr2_rhs !============================================================================= - pure subroutine wp2_term_dp1_rhs( nz, ngrdcol, C1_Skw_fnc, & + subroutine wp2_term_dp1_rhs( nz, ngrdcol, C1_Skw_fnc, & invrs_tau1m, threshold, up2, vp2, & l_damp_wp2_using_em, & rhs_dp1_wp2 ) @@ -3547,7 +3562,7 @@ pure subroutine wp2_term_dp1_rhs( nz, ngrdcol, C1_Skw_fnc, & end subroutine wp2_term_dp1_rhs !============================================================================= - pure subroutine wp2_term_pr3_rhs( nz, ngrdcol, gr, C_uu_shr, C_uu_buoy, & + subroutine wp2_term_pr3_rhs( nz, ngrdcol, gr, C_uu_shr, C_uu_buoy, & thv_ds_zm, wpthvp, upwp, & um, vpwp, vm, & rhs_pr3_wp2 ) @@ -3685,7 +3700,7 @@ pure subroutine wp2_term_pr3_rhs( nz, ngrdcol, gr, C_uu_shr, C_uu_buoy, & end subroutine wp2_term_pr3_rhs !============================================================================= - pure subroutine wp2_term_pr1_rhs( nz, ngrdcol, C4, & + subroutine wp2_term_pr1_rhs( nz, ngrdcol, C4, & up2, vp2, invrs_tau_C4_zm, & rhs_pr1_wp2 ) @@ -3778,7 +3793,7 @@ pure subroutine wp2_term_pr1_rhs( nz, ngrdcol, C4, & end subroutine wp2_term_pr1_rhs !============================================================================= - pure subroutine wp2_term_pr_dfsn_rhs( nz, ngrdcol, gr, C_wp2_pr_dfsn, & + subroutine wp2_term_pr_dfsn_rhs( nz, ngrdcol, gr, C_wp2_pr_dfsn, & rho_ds_zt, invrs_rho_ds_zm, & wpup2, wpvp2, wp3, & rhs_pr_dfsn_wp2 ) @@ -3887,7 +3902,7 @@ pure subroutine wp2_term_pr_dfsn_rhs( nz, ngrdcol, gr, C_wp2_pr_dfsn, & end subroutine wp2_term_pr_dfsn_rhs !============================================================================= - pure subroutine wp3_term_ta_new_pdf_lhs( nz, ngrdcol, gr, coef_wp4_implicit, & + subroutine wp3_term_ta_new_pdf_lhs( nz, ngrdcol, gr, coef_wp4_implicit, & wp2, rho_ds_zm, invrs_rho_ds_zt, & lhs_ta_wp3 ) @@ -4047,7 +4062,7 @@ pure subroutine wp3_term_ta_new_pdf_lhs( nz, ngrdcol, gr, coef_wp4_implicit, & end subroutine wp3_term_ta_new_pdf_lhs !============================================================================= - pure subroutine wp3_term_ta_ADG1_lhs( nz, ngrdcol, gr, & + subroutine wp3_term_ta_ADG1_lhs( nz, ngrdcol, gr, & wp2, a1, a1_zt, a3, a3_zt, & wp3_on_wp2, rho_ds_zm, & rho_ds_zt, invrs_rho_ds_zt, & @@ -4384,7 +4399,7 @@ pure subroutine wp3_term_ta_ADG1_lhs( nz, ngrdcol, gr, & end subroutine wp3_term_ta_ADG1_lhs !============================================================================= - pure subroutine wp3_term_tp_lhs( nz, ngrdcol, gr, coef_wp3_tp, & + subroutine wp3_term_tp_lhs( nz, ngrdcol, gr, coef_wp3_tp, & wp2, rho_ds_zm, invrs_rho_ds_zt, & lhs_tp_wp3 ) @@ -4536,7 +4551,7 @@ pure subroutine wp3_term_tp_lhs( nz, ngrdcol, gr, coef_wp3_tp, & end subroutine wp3_term_tp_lhs !============================================================================= - pure subroutine wp3_terms_ac_pr2_lhs( nz, ngrdcol, gr, C11_Skw_fnc, wm_zm, & + subroutine wp3_terms_ac_pr2_lhs( nz, ngrdcol, gr, C11_Skw_fnc, wm_zm, & lhs_ac_pr2_wp3 ) ! Description: @@ -4655,7 +4670,7 @@ pure subroutine wp3_terms_ac_pr2_lhs( nz, ngrdcol, gr, C11_Skw_fnc, wm_zm, & end subroutine wp3_terms_ac_pr2_lhs !============================================================================= - pure subroutine wp3_term_pr1_lhs( nz, ngrdcol, C8, C8b, & + subroutine wp3_term_pr1_lhs( nz, ngrdcol, C8, C8b, & invrs_tau_wp3_zt, Skw_zt, & l_damp_wp3_Skw_squared, & lhs_pr1_wp3 ) @@ -4787,7 +4802,7 @@ pure subroutine wp3_term_pr1_lhs( nz, ngrdcol, C8, C8b, & end subroutine wp3_term_pr1_lhs !============================================================================= - pure subroutine wp3_term_ta_explicit_rhs( nz, ngrdcol, gr, & + subroutine wp3_term_ta_explicit_rhs( nz, ngrdcol, gr, & wp4, rho_ds_zm, invrs_rho_ds_zt, & rhs_ta_wp3 ) @@ -4898,7 +4913,7 @@ pure subroutine wp3_term_ta_explicit_rhs( nz, ngrdcol, gr, & end subroutine wp3_term_ta_explicit_rhs !============================================================================= - pure subroutine wp3_terms_bp1_pr2_rhs( nz, ngrdcol, C11_Skw_fnc, & + subroutine wp3_terms_bp1_pr2_rhs( nz, ngrdcol, C11_Skw_fnc, & thv_ds_zt, wp2thvp, & rhs_bp1_pr2_wp3 ) @@ -4986,7 +5001,7 @@ pure subroutine wp3_terms_bp1_pr2_rhs( nz, ngrdcol, C11_Skw_fnc, & end subroutine wp3_terms_bp1_pr2_rhs !============================================================================= - pure subroutine wp3_term_pr_turb_rhs( nz, ngrdcol, gr, C_wp3_pr_turb, Kh_zt, wpthvp, & + subroutine wp3_term_pr_turb_rhs( nz, ngrdcol, gr, C_wp3_pr_turb, Kh_zt, wpthvp, & dum_dz, dvm_dz, & upwp, vpwp, & thv_ds_zt, & @@ -5106,7 +5121,7 @@ pure subroutine wp3_term_pr_turb_rhs( nz, ngrdcol, gr, C_wp3_pr_turb, Kh_zt, wpt end subroutine wp3_term_pr_turb_rhs !============================================================================= - pure subroutine wp3_term_pr_dfsn_rhs( nz, ngrdcol, gr, C_wp3_pr_dfsn, & + subroutine wp3_term_pr_dfsn_rhs( nz, ngrdcol, gr, C_wp3_pr_dfsn, & rho_ds_zm, invrs_rho_ds_zt, & wp2up2, wp2vp2, wp4, & up2, vp2, wp2, & @@ -5220,7 +5235,7 @@ pure subroutine wp3_term_pr_dfsn_rhs( nz, ngrdcol, gr, C_wp3_pr_dfsn, & end subroutine wp3_term_pr_dfsn_rhs !============================================================================= - pure subroutine wp3_term_pr1_rhs( nz, ngrdcol, gr, C8, C8b, & + subroutine wp3_term_pr1_rhs( nz, ngrdcol, gr, C8, C8b, & invrs_tau_wp3_zt, Skw_zt, wp3, & l_damp_wp3_Skw_squared, & rhs_pr1_wp3 ) diff --git a/src/CLUBB_core/advance_xm_wpxp_module.F90 b/src/CLUBB_core/advance_xm_wpxp_module.F90 index 78648bbad..ca7d671c7 100644 --- a/src/CLUBB_core/advance_xm_wpxp_module.F90 +++ b/src/CLUBB_core/advance_xm_wpxp_module.F90 @@ -5052,7 +5052,7 @@ subroutine xm_wpxp_clipping_and_stats( & end subroutine xm_wpxp_clipping_and_stats !============================================================================= - pure subroutine xm_term_ta_lhs( nz, ngrdcol, gr, & + subroutine xm_term_ta_lhs( nz, ngrdcol, gr, & rho_ds_zm, invrs_rho_ds_zt, & lhs_ta_xm ) @@ -5164,7 +5164,7 @@ pure subroutine xm_term_ta_lhs( nz, ngrdcol, gr, & end subroutine xm_term_ta_lhs !============================================================================= - pure subroutine wpxp_term_tp_lhs( nz, ngrdcol, gr, wp2, & + subroutine wpxp_term_tp_lhs( nz, ngrdcol, gr, wp2, & lhs_tp ) ! Description: @@ -5277,7 +5277,7 @@ pure subroutine wpxp_term_tp_lhs( nz, ngrdcol, gr, wp2, & end subroutine wpxp_term_tp_lhs !============================================================================= - pure subroutine wpxp_terms_ac_pr2_lhs( nz, ngrdcol, C7_Skw_fnc, & + subroutine wpxp_terms_ac_pr2_lhs( nz, ngrdcol, C7_Skw_fnc, & wm_zt, invrs_dzm, & lhs_ac_pr2 ) @@ -5392,7 +5392,7 @@ pure subroutine wpxp_terms_ac_pr2_lhs( nz, ngrdcol, C7_Skw_fnc, & end subroutine wpxp_terms_ac_pr2_lhs !============================================================================= - pure subroutine wpxp_term_pr1_lhs( nz, ngrdcol, C6rt_Skw_fnc, C6thl_Skw_fnc, C7_Skw_fnc, & + subroutine wpxp_term_pr1_lhs( nz, ngrdcol, C6rt_Skw_fnc, C6thl_Skw_fnc, C7_Skw_fnc, & invrs_tau_C6_zm, l_scalar_calc, & lhs_pr1_wprtp, lhs_pr1_wpthlp, & lhs_pr1_wpsclrp ) @@ -5522,7 +5522,7 @@ pure subroutine wpxp_term_pr1_lhs( nz, ngrdcol, C6rt_Skw_fnc, C6thl_Skw_fnc, C7_ end subroutine wpxp_term_pr1_lhs !============================================================================= - pure subroutine wpxp_terms_bp_pr3_rhs( nz, ngrdcol, C7_Skw_fnc, thv_ds_zm, xpthvp, & + subroutine wpxp_terms_bp_pr3_rhs( nz, ngrdcol, C7_Skw_fnc, thv_ds_zm, xpthvp, & rhs_bp_pr3 ) ! Description: @@ -5902,7 +5902,7 @@ end subroutine damp_coefficient !----------------------------------------------------------------------- !===================================================================================== - pure subroutine diagnose_upxp( nz, ngrdcol, gr, ypwp, xm, wpxp, ym, & + subroutine diagnose_upxp( nz, ngrdcol, gr, ypwp, xm, wpxp, ym, & C6x_Skw_fnc, tau_C6_zm, C7_Skw_fnc, & ypxp ) ! Description: diff --git a/src/CLUBB_core/advance_xp2_xpyp_module.F90 b/src/CLUBB_core/advance_xp2_xpyp_module.F90 index 3c91399a2..b6a09be50 100644 --- a/src/CLUBB_core/advance_xp2_xpyp_module.F90 +++ b/src/CLUBB_core/advance_xp2_xpyp_module.F90 @@ -5193,7 +5193,7 @@ subroutine calc_xp2_xpyp_ta_terms( nz, ngrdcol, gr, wprtp, wprtp2, wpthlp, wpthl end subroutine calc_xp2_xpyp_ta_terms !============================================================================= - pure function term_tp( xamp1, xam, xbmp1, xbm, & + function term_tp( xamp1, xam, xbmp1, xbm, & wpxbp, wpxap, invrs_dzm ) & result( rhs ) !$acc routine seq @@ -5255,7 +5255,7 @@ pure function term_tp( xamp1, xam, xbmp1, xbm, & end function term_tp !============================================================================= - pure function term_dp1_lhs( Cn, invrs_tau_zm ) & + function term_dp1_lhs( Cn, invrs_tau_zm ) & result( lhs ) !$acc routine seq @@ -5327,7 +5327,7 @@ pure function term_dp1_lhs( Cn, invrs_tau_zm ) & end function term_dp1_lhs !============================================================================= - pure function term_dp1_rhs( Cn, invrs_tau_zm, threshold ) & + function term_dp1_rhs( Cn, invrs_tau_zm, threshold ) & result( rhs ) !$acc routine seq @@ -5390,7 +5390,7 @@ pure function term_dp1_rhs( Cn, invrs_tau_zm, threshold ) & end function term_dp1_rhs !============================================================================= - pure function term_pr1( C4, C14, xbp2, wp2, invrs_tau_C4_zm, invrs_tau_C14_zm ) & + function term_pr1( C4, C14, xbp2, wp2, invrs_tau_C4_zm, invrs_tau_C14_zm ) & result( rhs ) !$acc routine seq diff --git a/src/CLUBB_core/advance_xp3_module.F90 b/src/CLUBB_core/advance_xp3_module.F90 index 1ab734e79..a239c3391 100644 --- a/src/CLUBB_core/advance_xp3_module.F90 +++ b/src/CLUBB_core/advance_xp3_module.F90 @@ -464,7 +464,7 @@ subroutine advance_xp3_simplified( nz, ngrdcol, gr, solve_type, dt, & ! Intent(i end subroutine advance_xp3_simplified !============================================================================= - pure function term_tp_rhs( xp2_zt, wpxp, wpxpm1, & + function term_tp_rhs( xp2_zt, wpxp, wpxpm1, & rho_ds_zm, rho_ds_zmm1, & invrs_rho_ds_zt, & invrs_dzt ) & @@ -541,7 +541,7 @@ pure function term_tp_rhs( xp2_zt, wpxp, wpxpm1, & end function term_tp_rhs !============================================================================= - pure function term_ac_rhs( xm_zm, xm_zmm1, wpxp2, & + function term_ac_rhs( xm_zm, xm_zmm1, wpxp2, & invrs_dzt ) & result( term_ac ) diff --git a/src/CLUBB_core/calc_roots.F90 b/src/CLUBB_core/calc_roots.F90 index 9c2f663b4..15eff0c5f 100644 --- a/src/CLUBB_core/calc_roots.F90 +++ b/src/CLUBB_core/calc_roots.F90 @@ -14,7 +14,7 @@ module calc_roots contains !============================================================================= - pure function cubic_solve( nz, a_coef, b_coef, c_coef, d_coef ) & + function cubic_solve( nz, a_coef, b_coef, c_coef, d_coef ) & result( roots ) ! Description: @@ -172,7 +172,7 @@ pure function cubic_solve( nz, a_coef, b_coef, c_coef, d_coef ) & end function cubic_solve !============================================================================= - pure function quadratic_solve( nz, a_coef, b_coef, c_coef ) & + function quadratic_solve( nz, a_coef, b_coef, c_coef ) & result( roots ) ! Description: @@ -261,7 +261,7 @@ pure function quadratic_solve( nz, a_coef, b_coef, c_coef ) & end function quadratic_solve !============================================================================= - pure function cube_root( x ) + function cube_root( x ) ! Description: ! Calculates the cube root of x. diff --git a/src/CLUBB_core/clip_explicit.F90 b/src/CLUBB_core/clip_explicit.F90 index 40825cad1..22e3842a1 100644 --- a/src/CLUBB_core/clip_explicit.F90 +++ b/src/CLUBB_core/clip_explicit.F90 @@ -970,7 +970,7 @@ subroutine clip_variance( nz, ngrdcol, gr, solve_type, dt, threshold, & end if end do end do - !$acc end parallel + !$acc end parallel loop if ( l_stats_samp ) then !$acc update host( xp2 ) @@ -1205,7 +1205,7 @@ subroutine clip_skewness_core( nz, ngrdcol, gr, sfc_elevation, & wp2_zt_cubed(i,k) = wp2_zt(i,k)**3 end do end do - !$acc end parallel + !$acc end parallel loop if ( l_use_wp3_lim_with_smth_Heaviside ) then @@ -1217,7 +1217,7 @@ subroutine clip_skewness_core( nz, ngrdcol, gr, sfc_elevation, & zagl_thresh(i,k) = zagl_thresh(i,k) - 1.0_core_rknd end do end do - !$acc end parallel + !$acc end parallel loop H_zagl(:,:) = smooth_heaviside_peskin(nz, ngrdcol, zagl_thresh(:,:), 0.6_core_rknd) @@ -1230,7 +1230,7 @@ subroutine clip_skewness_core( nz, ngrdcol, gr, sfc_elevation, & * 0.0021_core_rknd *Skw_max_mag**2 ) end do end do - !$acc end parallel + !$acc end parallel loop else ! default method @@ -1248,7 +1248,7 @@ subroutine clip_skewness_core( nz, ngrdcol, gr, sfc_elevation, & endif end do end do - !$acc end parallel + !$acc end parallel loop end if @@ -1263,7 +1263,7 @@ subroutine clip_skewness_core( nz, ngrdcol, gr, sfc_elevation, & end if end do end do - !$acc end parallel + !$acc end parallel loop ! Clipping abs(wp3) to 100. This keeps wp3 from growing too large in some ! deep convective cases, which helps prevent these cases from blowing up. @@ -1275,7 +1275,7 @@ subroutine clip_skewness_core( nz, ngrdcol, gr, sfc_elevation, & end if end do end do - !$acc end parallel + !$acc end parallel loop !$acc exit data delete( wp2_zt_cubed, wp3_lim_sqd, zagl_thresh, H_zagl ) diff --git a/src/CLUBB_core/clip_semi_implicit.F90 b/src/CLUBB_core/clip_semi_implicit.F90 index ae1f53cad..65d7947df 100644 --- a/src/CLUBB_core/clip_semi_implicit.F90 +++ b/src/CLUBB_core/clip_semi_implicit.F90 @@ -366,7 +366,7 @@ function clip_semi_imp_lhs( dt, f_unclipped, & end function clip_semi_imp_lhs !============================================================================= - pure function compute_clip_lhs( dt_clip, B_fnc ) & + function compute_clip_lhs( dt_clip, B_fnc ) & result( lhs_contribution ) ! Description: diff --git a/src/CLUBB_core/clubb_api_module.F90 b/src/CLUBB_core/clubb_api_module.F90 index 4a54552dc..afd3038a2 100644 --- a/src/CLUBB_core/clubb_api_module.F90 +++ b/src/CLUBB_core/clubb_api_module.F90 @@ -3877,7 +3877,7 @@ end function calculate_spurious_source_api !================================================================================================ ! calculate_thlp2_rad - Computes the contribution of radiative cooling to thlp2 !================================================================================================ - pure subroutine calculate_thlp2_rad_api & + subroutine calculate_thlp2_rad_api & ( nz, rcm_zm, thlprcp, radht_zm, & ! Intent(in) clubb_params, & ! Intent(in) thlp2_forcing ) ! Intent(inout) diff --git a/src/CLUBB_core/corr_varnce_module.F90 b/src/CLUBB_core/corr_varnce_module.F90 index cb1269c96..ea254b639 100644 --- a/src/CLUBB_core/corr_varnce_module.F90 +++ b/src/CLUBB_core/corr_varnce_module.F90 @@ -224,7 +224,7 @@ subroutine init_default_corr_arrays( ) end subroutine init_default_corr_arrays !----------------------------------------------------------------------------- - pure function def_corr_idx( iiPDF_x ) result(ii_def_corr) + function def_corr_idx( iiPDF_x ) result(ii_def_corr) ! Description: ! Map from a iiPDF index to the corresponding index in the default diff --git a/src/CLUBB_core/diffusion.F90 b/src/CLUBB_core/diffusion.F90 index f0ce296e6..3ab8793f1 100644 --- a/src/CLUBB_core/diffusion.F90 +++ b/src/CLUBB_core/diffusion.F90 @@ -31,7 +31,7 @@ module diffusion contains !============================================================================= - pure subroutine diffusion_zt_lhs( nz, ngrdcol, gr, K_zm, K_zt, nu, & ! In + subroutine diffusion_zt_lhs( nz, ngrdcol, gr, K_zm, K_zt, nu, & ! In invrs_rho_ds_zt, rho_ds_zm, & ! In lhs ) ! Out @@ -522,7 +522,7 @@ pure subroutine diffusion_zt_lhs( nz, ngrdcol, gr, K_zm, K_zt, nu, & ! In end subroutine diffusion_zt_lhs !============================================================================= - pure function diffusion_cloud_frac_zt_lhs & + function diffusion_cloud_frac_zt_lhs & ( gr, K_zm, K_zmm1, cloud_frac_zt, cloud_frac_ztm1, & cloud_frac_ztp1, cloud_frac_zm, & cloud_frac_zmm1, & @@ -693,7 +693,7 @@ pure function diffusion_cloud_frac_zt_lhs & end function diffusion_cloud_frac_zt_lhs !============================================================================= - pure subroutine diffusion_zm_lhs( nz, ngrdcol, gr, K_zt, K_zm, nu, & ! In + subroutine diffusion_zm_lhs( nz, ngrdcol, gr, K_zt, K_zm, nu, & ! In invrs_rho_ds_zm, rho_ds_zt, & ! In lhs ) ! Out diff --git a/src/CLUBB_core/fill_holes.F90 b/src/CLUBB_core/fill_holes.F90 index 81b49d48a..96f6496ae 100644 --- a/src/CLUBB_core/fill_holes.F90 +++ b/src/CLUBB_core/fill_holes.F90 @@ -120,10 +120,8 @@ subroutine fill_holes_vertical( nz, ngrdcol, num_draw_pts, threshold, upper_hf_l ! --------------------- Begin Code --------------------- - !$acc declare copyin( dz, rho_ds ) & - !$acc copy( field ) & - !$acc create( invrs_denom_integral, field_clipped, denom_integral_global, rho_ds_dz, & - !$acc numer_integral_global, field_avg_global, mass_fraction_global ) + !$acc enter data create( invrs_denom_integral, field_clipped, denom_integral_global, rho_ds_dz, & + !$acc numer_integral_global, field_avg_global, mass_fraction_global ) l_field_below_threshold = .false. @@ -140,6 +138,8 @@ subroutine fill_holes_vertical( nz, ngrdcol, num_draw_pts, threshold, upper_hf_l ! If all field values are above the specified threshold, no hole filling is required if ( .not. l_field_below_threshold ) then + !$acc exit data delete( invrs_denom_integral, field_clipped, denom_integral_global, rho_ds_dz, & + !$acc numer_integral_global, field_avg_global, mass_fraction_global ) return end if @@ -236,6 +236,8 @@ subroutine fill_holes_vertical( nz, ngrdcol, num_draw_pts, threshold, upper_hf_l ! If all field values are above the threshold, no further hole filling is required if ( .not. l_field_below_threshold ) then + !$acc exit data delete( invrs_denom_integral, field_clipped, denom_integral_global, rho_ds_dz, & + !$acc numer_integral_global, field_avg_global, mass_fraction_global ) return end if @@ -322,6 +324,9 @@ subroutine fill_holes_vertical( nz, ngrdcol, num_draw_pts, threshold, upper_hf_l end do !$acc end parallel loop + + !$acc exit data delete( invrs_denom_integral, field_clipped, denom_integral_global, rho_ds_dz, & + !$acc numer_integral_global, field_avg_global, mass_fraction_global ) return @@ -822,12 +827,17 @@ subroutine fill_holes_driver( gr, nz, dt, hydromet_dim, & ! Intent(in) if ( hydromet_name(1:1) == "r" .and. l_hole_fill ) then + !$acc data copyin( gr, gr%dzt, rho_ds_zt ) & + !$acc copy( hydromet(:,i) ) + ! Apply the hole filling algorithm ! upper_hf_level = nz since we are filling the zt levels call fill_holes_vertical( gr%nz, 1, num_hf_draw_points, zero_threshold, gr%nz, & ! In gr%dzt, rho_ds_zt, & ! In hydromet(:,i) ) ! InOut + !$acc end data + endif ! Variable is a mixing ratio and l_hole_fill is true endif ! hydromet(:,i) < 0 diff --git a/src/CLUBB_core/grid_class.F90 b/src/CLUBB_core/grid_class.F90 index a94de605c..7bdb7f905 100644 --- a/src/CLUBB_core/grid_class.F90 +++ b/src/CLUBB_core/grid_class.F90 @@ -1447,7 +1447,7 @@ function redirect_interpolated_azt_2D( nz, ngrdcol, gr, azm ) end function redirect_interpolated_azt_2D !============================================================================= - pure subroutine linear_interpolated_azm_2D( nz, ngrdcol, gr, azt, & + subroutine linear_interpolated_azm_2D( nz, ngrdcol, gr, azt, & linear_interpolated_azm ) ! Description: @@ -1708,7 +1708,7 @@ function cubic_interpolated_azm_2D( nz, ngrdcol, gr, azt ) end function cubic_interpolated_azm_2D !============================================================================= - pure subroutine calc_zt2zm_weights( nz, ngrdcol, & + subroutine calc_zt2zm_weights( nz, ngrdcol, & gr ) ! Description: @@ -1918,7 +1918,7 @@ pure subroutine calc_zt2zm_weights( nz, ngrdcol, & end subroutine calc_zt2zm_weights !============================================================================= - pure subroutine linear_interpolated_azt_2D( nz, ngrdcol, gr, azm, & + subroutine linear_interpolated_azt_2D( nz, ngrdcol, gr, azm, & linear_interpolated_azt ) ! Description: @@ -2073,7 +2073,7 @@ function cubic_interpolated_azt_2D( nz, ngrdcol, gr, azm ) end function cubic_interpolated_azt_2D !============================================================================= - pure subroutine calc_zm2zt_weights( nz, ngrdcol, & + subroutine calc_zm2zt_weights( nz, ngrdcol, & gr ) ! Description: @@ -2286,7 +2286,7 @@ end subroutine calc_zm2zt_weights !============================================================================= ! Wrapped in interface ddzm - pure function gradzm_2D( nz, ngrdcol, gr, azm ) + function gradzm_2D( nz, ngrdcol, gr, azm ) ! Description: ! 2D version of gradzm @@ -2339,7 +2339,7 @@ end function gradzm_2D !============================================================================= ! Wrapped in interface ddzm - pure function gradzm_1D( gr, azm ) + function gradzm_1D( gr, azm ) ! Description: ! 2D version of gradzm @@ -2380,7 +2380,7 @@ end function gradzm_1D !============================================================================= ! Wrapped in interface ddzt - pure function gradzt_2D( nz, ngrdcol, gr, azt ) + function gradzt_2D( nz, ngrdcol, gr, azt ) ! Description: ! 2D version of gradzt @@ -2433,7 +2433,7 @@ end function gradzt_2D !============================================================================= ! Wrapped in interface ddzt - pure function gradzt_1D( gr, azt ) + function gradzt_1D( gr, azt ) ! Description: ! 2D version of gradzt @@ -2474,7 +2474,7 @@ pure function gradzt_1D( gr, azt ) end function gradzt_1D !============================================================================= - pure function flip( x, xdim ) + function flip( x, xdim ) ! Description: ! Flips a single dimension array (i.e. a vector), so the first element diff --git a/src/CLUBB_core/mean_adv.F90 b/src/CLUBB_core/mean_adv.F90 index 69a5e2f20..8535cab6a 100644 --- a/src/CLUBB_core/mean_adv.F90 +++ b/src/CLUBB_core/mean_adv.F90 @@ -30,7 +30,7 @@ module mean_adv contains !============================================================================= - pure subroutine term_ma_zt_lhs( nz, ngrdcol, wm_zt, weights_zt2zm, & ! Intent(in) + subroutine term_ma_zt_lhs( nz, ngrdcol, wm_zt, weights_zt2zm, & ! Intent(in) invrs_dzt, invrs_dzm, & ! Intent(in) l_upwind_xm_ma, & ! Intent(in) lhs_ma ) ! Intent(out) @@ -354,7 +354,7 @@ pure subroutine term_ma_zt_lhs( nz, ngrdcol, wm_zt, weights_zt2zm, & ! Intent(in end subroutine term_ma_zt_lhs !============================================================================= - pure subroutine term_ma_zm_lhs( nz, ngrdcol, wm_zm, & + subroutine term_ma_zm_lhs( nz, ngrdcol, wm_zm, & invrs_dzm, weights_zm2zt, & lhs_ma ) diff --git a/src/CLUBB_core/mono_flux_limiter.F90 b/src/CLUBB_core/mono_flux_limiter.F90 index 4b95fce9a..dcf64118e 100644 --- a/src/CLUBB_core/mono_flux_limiter.F90 +++ b/src/CLUBB_core/mono_flux_limiter.F90 @@ -1342,7 +1342,7 @@ subroutine mfl_xm_solve( nz, ngrdcol, solve_type, tridiag_solve_method, & do i = 1, ngrdcol xm(i,1) = xm(i,2) end do - !$acc end parallel + !$acc end parallel loop return end subroutine mfl_xm_solve diff --git a/src/CLUBB_core/numerical_check.F90 b/src/CLUBB_core/numerical_check.F90 index d749a4ca1..a1569eb95 100644 --- a/src/CLUBB_core/numerical_check.F90 +++ b/src/CLUBB_core/numerical_check.F90 @@ -1035,7 +1035,7 @@ end subroutine check_nan_sclr !------------------------------------------------------------------------- !----------------------------------------------------------------------- - pure function calculate_spurious_source( integral_after, integral_before, & + function calculate_spurious_source( integral_after, integral_before, & flux_top, flux_sfc, & integral_forcing, dt ) & result( spurious_source ) diff --git a/src/CLUBB_core/penta_lu_solver.F90 b/src/CLUBB_core/penta_lu_solver.F90 index f7f503d8d..2bb2aad8b 100644 --- a/src/CLUBB_core/penta_lu_solver.F90 +++ b/src/CLUBB_core/penta_lu_solver.F90 @@ -125,18 +125,18 @@ subroutine penta_lu_solve_single_rhs_multiple_lhs( ndim, ngrdcol, lhs, rhs, & upper_1, & ! First U band upper_2, & ! Second U band lower_diag_invrs, & ! Inverse of the diagonal of L - lower_1 ! First L band + lower_1, & ! First L band + lower_2 ! Second L band integer :: i, k, j ! Loop variables ! ----------------------- Begin Code ----------------------- - !$acc data create( upper_1, upper_2, lower_1, lower_diag_invrs ) & + !$acc data create( upper_1, upper_2, lower_1, lower_2, lower_diag_invrs ) & !$acc copyin( rhs, lhs ) & !$acc copyout( soln ) - - !$acc kernels + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lower_diag_invrs(i,1) = 1.0_core_rknd / lhs(0,i,1) upper_1(i,1) = lower_diag_invrs(i,1) * lhs(-1,i,1) @@ -147,38 +147,45 @@ subroutine penta_lu_solve_single_rhs_multiple_lhs( ndim, ngrdcol, lhs, rhs, & upper_1(i,2) = lower_diag_invrs(i,2) * ( lhs(-1,i,2) - lower_1(i,2) * upper_2(i,1) ) upper_2(i,2) = lower_diag_invrs(i,2) * lhs(-2,i,2) end do + !$acc end parallel loop + !$acc parallel loop gang vector default(present) + do i = 1, ngrdcol + do k = 3, ndim-2 + lower_2(i,k) = lhs(2,i,k) + lower_1(i,k) = lhs(1,i,k) - lower_2(i,k) * upper_1(i,k-2) - do k = 3, ndim-2 - do i = 1, ngrdcol - lower_1(i,k) = lhs(1,i,k) - lhs(2,i,k) * upper_1(i,k-2) - - lower_diag_invrs(i,k) = 1.0_core_rknd / ( lhs(0,i,k) - lhs(2,i,k) * upper_2(i,k-2) & + lower_diag_invrs(i,k) = 1.0_core_rknd / ( lhs(0,i,k) - lower_2(i,k) * upper_2(i,k-2) & - lower_1(i,k) * upper_1(i,k-1) ) - upper_1(i,k) = lower_diag_invrs(i,k) * ( lhs(-1,i,k) - lower_1(i,k) * upper_2(i,k-1) ) - upper_2(i,k) = lower_diag_invrs(i,k) * lhs(-2,i,k) + upper_1(i,k) = lower_diag_invrs(i,k) * ( lhs(-1,i,k) - lower_1(i,k) * upper_2(i,k-1) ) + upper_2(i,k) = lower_diag_invrs(i,k) * lhs(-2,i,k) end do end do + !$acc end parallel loop + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol - lower_1(i,ndim-1) = lhs(1,i,ndim-1) - lhs(2,i,ndim-1) * upper_1(i,ndim-3) + lower_2(i,ndim-1) = lhs(2,i,ndim-1) + lower_1(i,ndim-1) = lhs(1,i,ndim-1) - lower_2(i,ndim-1) * upper_1(i,ndim-3) lower_diag_invrs(i,ndim-1) = 1.0_core_rknd & - / ( lhs(0,i,ndim-1) - lhs(2,i,ndim-1) * upper_2(i,ndim-3) & + / ( lhs(0,i,ndim-1) - lower_2(i,ndim-1) * upper_2(i,ndim-3) & - lower_1(i,ndim-1) * upper_1(i,ndim-2) ) upper_1(i,ndim-1) = lower_diag_invrs(i,ndim-1) * ( lhs(-1,i,ndim-1) - lower_1(i,ndim-1) & - * upper_2(i,ndim-2) ) + * upper_2(i,ndim-2) ) - lower_1(i,ndim) = lhs(1,i,ndim) - lhs(2,i,ndim) * upper_1(i,ndim-2) + lower_2(i,ndim) = lhs(2,i,ndim) + lower_1(i,ndim) = lhs(1,i,ndim) - lower_2(i,ndim) * upper_1(i,ndim-2) lower_diag_invrs(i,ndim) = 1.0_core_rknd & - / ( lhs(0,i,ndim-1) - lhs(2,i,ndim) * upper_2(i,ndim-2) & + / ( lhs(0,i,ndim-1) - lower_2(i,ndim) * upper_2(i,ndim-2) & - lower_1(i,ndim) * upper_1(i,ndim-1) ) end do - + !$acc end parallel loop + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol soln(i,1) = lower_diag_invrs(i,1) * rhs(i,1) @@ -186,10 +193,14 @@ subroutine penta_lu_solve_single_rhs_multiple_lhs( ndim, ngrdcol, lhs, rhs, & soln(i,2) = lower_diag_invrs(i,2) * ( rhs(i,2) - lower_1(i,2) * soln(i,1) ) do k = 3, ndim - soln(i,k) = lower_diag_invrs(i,k) * ( rhs(i,k) - lhs(2,i,k) * soln(i,k-2) & + soln(i,k) = lower_diag_invrs(i,k) * ( rhs(i,k) - lower_2(i,k) * soln(i,k-2) & - lower_1(i,k) * soln(i,k-1) ) end do + end do + !$acc end parallel loop + !$acc parallel loop gang vector default(present) + do i = 1, ngrdcol soln(i,ndim-1) = soln(i,ndim-1) - upper_1(i,ndim-1) * soln(i,ndim) do k = ndim-2, 1, -1 @@ -197,8 +208,7 @@ subroutine penta_lu_solve_single_rhs_multiple_lhs( ndim, ngrdcol, lhs, rhs, & end do end do - - !$acc end kernels + !$acc end parallel loop !$acc end data @@ -237,18 +247,18 @@ subroutine penta_lu_solve_multiple_rhs_lhs( ndim, nrhs, ngrdcol, lhs, rhs, & upper_1, & ! First U band upper_2, & ! Second U band lower_diag_invrs, & ! Inverse of the diagonal of L - lower_1 ! First L band + lower_1, & ! First L band + lower_2 ! Second L band integer :: i, k, j ! Loop variables ! ----------------------- Begin Code ----------------------- - !$acc data create( upper_1, upper_2, lower_1, lower_diag_invrs ) & + !$acc data create( upper_1, upper_2, lower_1, lower_2, lower_diag_invrs ) & !$acc copyin( rhs, lhs ) & !$acc copyout( soln ) - !$acc kernels - + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lower_diag_invrs(i,1) = 1.0_core_rknd / lhs(0,i,1) upper_1(i,1) = lower_diag_invrs(i,1) * lhs(-1,i,1) @@ -259,38 +269,45 @@ subroutine penta_lu_solve_multiple_rhs_lhs( ndim, nrhs, ngrdcol, lhs, rhs, & upper_1(i,2) = lower_diag_invrs(i,2) * ( lhs(-1,i,2) - lower_1(i,2) * upper_2(i,1) ) upper_2(i,2) = lower_diag_invrs(i,2) * lhs(-2,i,2) end do + !$acc end parallel loop + !$acc parallel loop gang vector default(present) + do i = 1, ngrdcol + do k = 3, ndim-2 + lower_2(i,k) = lhs(2,i,k) + lower_1(i,k) = lhs(1,i,k) - lower_2(i,k) * upper_1(i,k-2) - do k = 3, ndim-2 - do i = 1, ngrdcol - lower_1(i,k) = lhs(1,i,k) - lhs(2,i,k) * upper_1(i,k-2) - - lower_diag_invrs(i,k) = 1.0_core_rknd / ( lhs(0,i,k) - lhs(2,i,k) * upper_2(i,k-2) & + lower_diag_invrs(i,k) = 1.0_core_rknd / ( lhs(0,i,k) - lower_2(i,k) * upper_2(i,k-2) & - lower_1(i,k) * upper_1(i,k-1) ) - upper_1(i,k) = lower_diag_invrs(i,k) * ( lhs(-1,i,k) - lower_1(i,k) * upper_2(i,k-1) ) - upper_2(i,k) = lower_diag_invrs(i,k) * lhs(-2,i,k) + upper_1(i,k) = lower_diag_invrs(i,k) * ( lhs(-1,i,k) - lower_1(i,k) * upper_2(i,k-1) ) + upper_2(i,k) = lower_diag_invrs(i,k) * lhs(-2,i,k) end do end do + !$acc end parallel loop + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol - lower_1(i,ndim-1) = lhs(1,i,ndim-1) - lhs(2,i,ndim-1) * upper_1(i,ndim-3) + lower_2(i,ndim-1) = lhs(2,i,ndim-1) + lower_1(i,ndim-1) = lhs(1,i,ndim-1) - lower_2(i,ndim-1) * upper_1(i,ndim-3) lower_diag_invrs(i,ndim-1) = 1.0_core_rknd & - / ( lhs(0,i,ndim-1) - lhs(2,i,ndim-1) * upper_2(i,ndim-3) & + / ( lhs(0,i,ndim-1) - lower_2(i,ndim-1) * upper_2(i,ndim-3) & - lower_1(i,ndim-1) * upper_1(i,ndim-2) ) upper_1(i,ndim-1) = lower_diag_invrs(i,ndim-1) * ( lhs(-1,i,ndim-1) - lower_1(i,ndim-1) & - * upper_2(i,ndim-2) ) + * upper_2(i,ndim-2) ) - lower_1(i,ndim) = lhs(1,i,ndim) - lhs(2,i,ndim) * upper_1(i,ndim-2) + lower_2(i,ndim) = lhs(2,i,ndim) + lower_1(i,ndim) = lhs(1,i,ndim) - lower_2(i,ndim) * upper_1(i,ndim-2) lower_diag_invrs(i,ndim) = 1.0_core_rknd & - / ( lhs(0,i,ndim-1) - lhs(2,i,ndim) * upper_2(i,ndim-2) & + / ( lhs(0,i,ndim-1) - lower_2(i,ndim) * upper_2(i,ndim-2) & - lower_1( i,ndim) * upper_1(i,ndim-1) ) end do + !$acc end parallel loop - + !$acc parallel loop gang vector collapse(2) default(present) do j = 1, nrhs do i = 1, ngrdcol @@ -299,10 +316,16 @@ subroutine penta_lu_solve_multiple_rhs_lhs( ndim, nrhs, ngrdcol, lhs, rhs, & soln(i,2,j) = lower_diag_invrs(i,2) * ( rhs(i,2,j) - lower_1(i,2) * soln(i,1,j) ) do k = 3, ndim - soln(i,k,j) = lower_diag_invrs(i,k) * ( rhs(i,k,j) - lhs(2,i,k) * soln(i,k-2,j) & + soln(i,k,j) = lower_diag_invrs(i,k) * ( rhs(i,k,j) - lower_2(i,k) * soln(i,k-2,j) & - lower_1(i,k) * soln(i,k-1,j) ) end do + end do + end do + !$acc end parallel loop + !$acc parallel loop gang vector collapse(2) default(present) + do j = 1, nrhs + do i = 1, ngrdcol soln(i,ndim-1,j) = soln(i,ndim-1,j) - upper_1(i,ndim-1) * soln(i,ndim,j) do k = ndim-2, 1, -1 @@ -311,8 +334,7 @@ subroutine penta_lu_solve_multiple_rhs_lhs( ndim, nrhs, ngrdcol, lhs, rhs, & end do end do - - !$acc end kernels + !$acc end parallel loop !$acc end data diff --git a/src/CLUBB_core/tridiag_lu_solver.F90 b/src/CLUBB_core/tridiag_lu_solver.F90 index 3aefdfaf3..6bcd094b2 100644 --- a/src/CLUBB_core/tridiag_lu_solver.F90 +++ b/src/CLUBB_core/tridiag_lu_solver.F90 @@ -123,8 +123,8 @@ subroutine tridiag_lu_solve_single_rhs_lhs( ndim, lhs, rhs, & upper(1) = lower_diag_invrs(1) * lhs(-1,1) do k = 2, ndim-1 - lower_diag_invrs(k) = 1.0_core_rknd / ( lhs(0,k) - lhs(1,k) * upper(k-1) ) - upper(k) = lower_diag_invrs(k) * lhs(-1,k) + lower_diag_invrs(k) = 1.0_core_rknd / ( lhs(0,k) - lhs(1,k) * upper(k-1) ) + upper(k) = lower_diag_invrs(k) * lhs(-1,k) end do lower_diag_invrs(ndim) = 1.0_core_rknd / ( lhs(0,ndim) - lhs(1,ndim) * upper(ndim-1) ) @@ -139,7 +139,6 @@ subroutine tridiag_lu_solve_single_rhs_lhs( ndim, lhs, rhs, & soln(k) = soln(k) - upper(k) * soln(k+1) end do - !$acc end kernels !$acc end data @@ -185,26 +184,29 @@ subroutine tridiag_lu_solve_single_rhs_multiple_lhs( ndim, ngrdcol, lhs, rhs, & !$acc copyin( rhs, lhs ) & !$acc copyout( soln ) - !$acc kernels - + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lower_diag_invrs(i,1) = 1.0_core_rknd / lhs(0,i,1) upper(i,1) = lower_diag_invrs(i,1) * lhs(-1,i,1) end do + !$acc end parallel loop - + !$acc parallel loop gang vector default(present) do k = 2, ndim-1 do i = 1, ngrdcol lower_diag_invrs(i,k) = 1.0_core_rknd / ( lhs(0,i,k) - lhs(1,i,k) * upper(i,k-1) ) upper(i,k) = lower_diag_invrs(i,k) * lhs(-1,i,k) end do end do + !$acc end parallel loop + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lower_diag_invrs(i,ndim) = 1.0_core_rknd / ( lhs(0,i,ndim) - lhs(1,i,ndim) * upper(i,ndim-1) ) end do + !$acc end parallel loop - + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol soln(i,1) = lower_diag_invrs(i,1) * rhs(i,1) @@ -212,14 +214,17 @@ subroutine tridiag_lu_solve_single_rhs_multiple_lhs( ndim, ngrdcol, lhs, rhs, & do k = 2, ndim soln(i,k) = lower_diag_invrs(i,k) * ( rhs(i,k) - lhs(1,i,k) * soln(i,k-1) ) end do + end do + !$acc end parallel loop + !$acc parallel loop gang vector default(present) + do i = 1, ngrdcol do k = ndim-1, 1, -1 soln(i,k) = soln(i,k) - upper(i,k) * soln(i,k+1) end do end do - - !$acc end kernels + !$acc end parallel loop !$acc end data @@ -266,26 +271,29 @@ subroutine tridiag_lu_solve_multiple_rhs_lhs( ndim, nrhs, ngrdcol, lhs, rhs, & !$acc copyin( rhs, lhs ) & !$acc copyout( soln ) - !$acc kernels - + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lower_diag_invrs(i,1) = 1.0_core_rknd / lhs(0,i,1) upper(i,1) = lower_diag_invrs(i,1) * lhs(-1,i,1) end do + !$acc end parallel loop - - do k = 2, ndim-1 - do i = 1, ngrdcol + !$acc parallel loop gang vector default(present) + do i = 1, ngrdcol + do k = 2, ndim-1 lower_diag_invrs(i,k) = 1.0_core_rknd / ( lhs(0,i,k) - lhs(1,i,k) * upper(i,k-1) ) upper(i,k) = lower_diag_invrs(i,k) * lhs(-1,i,k) end do end do + !$acc end parallel loop + !$acc parallel loop gang vector default(present) do i = 1, ngrdcol lower_diag_invrs(i,ndim) = 1.0_core_rknd / ( lhs(0,i,ndim) - lhs(1,i,ndim) * upper(i,ndim-1) ) end do + !$acc end parallel loop - + !$acc parallel loop gang vector collapse(2) default(present) do j = 1, nrhs do i = 1, ngrdcol @@ -294,15 +302,19 @@ subroutine tridiag_lu_solve_multiple_rhs_lhs( ndim, nrhs, ngrdcol, lhs, rhs, & do k = 2, ndim soln(i,k,j) = lower_diag_invrs(i,k) * ( rhs(i,k,j) - lhs(1,i,k) * soln(i,k-1,j) ) end do + end do + end do + !$acc end parallel loop + !$acc parallel loop gang vector collapse(2) default(present) + do j = 1, nrhs + do i = 1, ngrdcol do k = ndim-1, 1, -1 soln(i,k,j) = soln(i,k,j) - upper(i,k) * soln(i,k+1,j) end do - end do end do - - !$acc end kernels + !$acc end parallel loop !$acc end data diff --git a/src/CLUBB_core/turbulent_adv_pdf.F90 b/src/CLUBB_core/turbulent_adv_pdf.F90 index 46eb4241c..2d340b82d 100644 --- a/src/CLUBB_core/turbulent_adv_pdf.F90 +++ b/src/CLUBB_core/turbulent_adv_pdf.F90 @@ -30,7 +30,7 @@ module turbulent_adv_pdf contains !============================================================================= - pure subroutine xpyp_term_ta_pdf_lhs( nz, ngrdcol, gr, coef_wpxpyp_implicit, & ! In + subroutine xpyp_term_ta_pdf_lhs( nz, ngrdcol, gr, coef_wpxpyp_implicit, & ! In rho_ds_zt, rho_ds_zm, & ! In invrs_rho_ds_zm, & ! In l_upwind_xpyp_turbulent_adv, & ! In @@ -478,7 +478,7 @@ pure subroutine xpyp_term_ta_pdf_lhs( nz, ngrdcol, gr, coef_wpxpyp_implicit, & ! end subroutine xpyp_term_ta_pdf_lhs !============================================================================================= - pure subroutine xpyp_term_ta_pdf_lhs_godunov( nz, ngrdcol, gr, & ! Intent(in) + subroutine xpyp_term_ta_pdf_lhs_godunov( nz, ngrdcol, gr, & ! Intent(in) coef_wpxpyp_implicit, & ! Intent(in) invrs_rho_ds_zm, rho_ds_zm, & ! Intent(in) lhs_ta ) @@ -589,7 +589,7 @@ pure subroutine xpyp_term_ta_pdf_lhs_godunov( nz, ngrdcol, gr, & ! Intent(in) end subroutine xpyp_term_ta_pdf_lhs_godunov !============================================================================= - pure subroutine xpyp_term_ta_pdf_rhs( nz, ngrdcol, gr, term_wpxpyp_explicit, & ! In + subroutine xpyp_term_ta_pdf_rhs( nz, ngrdcol, gr, term_wpxpyp_explicit, & ! In rho_ds_zt, rho_ds_zm, & ! In invrs_rho_ds_zm, & ! In l_upwind_xpyp_turbulent_adv, & ! In @@ -960,7 +960,7 @@ pure subroutine xpyp_term_ta_pdf_rhs( nz, ngrdcol, gr, term_wpxpyp_explicit, & end subroutine xpyp_term_ta_pdf_rhs !============================================================================= - pure subroutine xpyp_term_ta_pdf_rhs_godunov( nz, ngrdcol, gr, & ! Intent(in) + subroutine xpyp_term_ta_pdf_rhs_godunov( nz, ngrdcol, gr, & ! Intent(in) term_wpxpyp_explicit_zm, & ! Intent(in) invrs_rho_ds_zm, & ! Intent(in) sgn_turbulent_vel, & ! Intent(in)