From 55eae6f16ef1590bc31895a3cb9dc430cb11fec7 Mon Sep 17 00:00:00 2001 From: mlee03 Date: Tue, 14 Nov 2023 12:46:51 -0500 Subject: [PATCH 1/9] first step --- tools/libfrencutils/create_xgrid_acc.c | 125 +++++++++++++++++++++++++ tools/libfrencutils/create_xgrid_acc.h | 5 + 2 files changed, 130 insertions(+) diff --git a/tools/libfrencutils/create_xgrid_acc.c b/tools/libfrencutils/create_xgrid_acc.c index dcf44397..ef2f8316 100644 --- a/tools/libfrencutils/create_xgrid_acc.c +++ b/tools/libfrencutils/create_xgrid_acc.c @@ -33,6 +33,131 @@ #define EPSLN30 (1.0e-30) #define EPSLN10 (1.0e-10) + +/******************************************************************************* + prepare_create_xgrid_2dx2d_order2_ac +*******************************************************************************/ +int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, + const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, + Minmaxavg_lists *out_minmaxavg_lists, const double *mask_in, + int *counts_per_ij1, int *ij2_start, int *ij2_end, int nxgrid) + +{ + +#define MAX_V 8 + int nx1, nx2, ny1, ny2, nx1p, nx2p; + double *area_in, *area_out; + int n=0; + + nx1 = *nlon_in; + ny1 = *nlat_in; + nx2 = *nlon_out; + ny2 = *nlat_out; + nx1p = nx1 + 1; + nx2p = nx2 + 1; + + nxgrid = 0; + +#pragma acc data present(lon_out[0:(nx2+1)*(ny2+1)], lat_out[0:(nx2+1)*(ny2+1)]) +#pragma acc data present(lon_in[0:(nx1+1)*(ny1+1)], lat_in[0:(nx1+1)*(ny1+1)], mask_in[0:nx1*ny1]) +#pragma acc data present(out_minmaxavg_lists->lon_list[0:MAX_V*nx2*ny2], out_minmaxavg_lists->lat_list[0:MAX_V*nx2*ny2]) +#pragma acc data present(out_minmaxavg_lists->n_list[0:nx2*ny2], out_minmaxavg_lists->lon_avg[0:nx2*ny2]) +#pragma acc data present(out_minmaxavg_lists->lat_min_list[0:nx2*ny2], out_minmaxavg_lists->lat_max_list[0:nx2*ny2]) +#pragma acc data present(out_minmaxavg_lists->lon_min_list[0:nx2*ny2], out_minmaxavg_lists->lon_max_list[0:nx2*ny2]) +#pragma acc data present(counts_per_ij1[nx1*ny1], ij2_start[nx1*ny1], ij2_end[nx1*ny1]) +#pragma acc data copy(nxgrid) +#pragma acc parallel +{ +#pragma acc loop independent reduction(+:nxgrid) + for( int ij1=0 ; ij1 < nx1*ny1 ; ij1++ ) { + + int icount=0; + int ij2_max=0, ij2_min=nx2*ny2+1; + + if( mask_in[ij1] > MASK_THRESH ) { + int i1, j1; + int n0, n1, n2, n3, n1_in; + double lat_in_min,lat_in_max,lon_in_min,lon_in_max,lon_in_avg; + double x1_in[MV], y1_in[MV]; + + counts_per_ij1[ij1]=0; + + i1=ij1%nx1; + j1=ij1/nx1; + + n0 = j1*nx1p+i1; n1 = j1*nx1p+i1+1; + n2 = (j1+1)*nx1p+i1+1; n3 = (j1+1)*nx1p+i1; + x1_in[0] = lon_in[n0]; y1_in[0] = lat_in[n0]; + x1_in[1] = lon_in[n1]; y1_in[1] = lat_in[n1]; + x1_in[2] = lon_in[n2]; y1_in[2] = lat_in[n2]; + x1_in[3] = lon_in[n3]; y1_in[3] = lat_in[n3]; + lat_in_min = minval_double(4, y1_in); + lat_in_max = maxval_double(4, y1_in); + n1_in = fix_lon(x1_in, y1_in, 4, M_PI); + lon_in_min = minval_double(n1_in, x1_in); + lon_in_max = maxval_double(n1_in, x1_in); + lon_in_avg = avgval_double(n1_in, x1_in); + +#pragma acc loop independent reduction(+:nxgrid) reduction(+:icount) reduction(max:ij2_max) reduction(min:ij2_min) + for(int ij2=0; ij2lat_min_list[ij2] >= lat_in_max || out_minmaxavg_lists->lat_max_list[ij2] <= lat_in_min ) continue; + + /* adjust x2_in according to lon_in_avg*/ + n2_in = out_minmaxavg_lists->n_list[ij2]; +#pragma acc loop seq + for(l=0; llon_list[ij2*MAX_V+l]; + y2_in[l] = out_minmaxavg_lists->lat_list[ij2*MAX_V+l]; + } + lon_out_min = out_minmaxavg_lists->lon_min_list[ij2]; + lon_out_max = out_minmaxavg_lists->lon_max_list[ij2]; + dx = out_minmaxavg_lists->lon_avg[ij2] - lon_in_avg; + if(dx < -M_PI ) { + lon_out_min += TPI; + lon_out_max += TPI; +#pragma acc loop seq + for (l=0; l M_PI) { + lon_out_min -= TPI; + lon_out_max -= TPI; +#pragma acc loop seq + for (l=0; l= lon_in_max || lon_out_max <= lon_in_min ) continue; + + nxgrid++; + icount++; + ij2_max=max(ij2_max,ij2); + ij2_min=min(ij2_min,ij2); + + } //ij2 + + counts_per_ij1[ij1]=icount; + ij2_end[ij1]=ij2_max; + ij2_start[ij1]=ij2_min; + + } //if + } // ij1 + } + + return nxgrid; + +} /* prepare_create_xgrid_2dx2d_order2 + + /******************************************************************************* create_xgrid_2dx2d_order2 OPENACC version *******************************************************************************/ diff --git a/tools/libfrencutils/create_xgrid_acc.h b/tools/libfrencutils/create_xgrid_acc.h index 0913f0ff..7bc98b6d 100644 --- a/tools/libfrencutils/create_xgrid_acc.h +++ b/tools/libfrencutils/create_xgrid_acc.h @@ -22,6 +22,11 @@ #include "globals.h" +int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, + const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, + Minmaxavg_lists *out_minmaxavg_lists, const double *mask_in, + int *counts_per_ij2, int *ij2_start, int *ij2_end, int nxgrid) ; + int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, Minmaxavg_lists *out_minmaxavg_lists, const double *mask_in, From 90e8606ead64ec363de76d6fea3f796693af4d1b Mon Sep 17 00:00:00 2001 From: mlee03 Date: Thu, 16 Nov 2023 10:41:38 -0500 Subject: [PATCH 2/9] fix prepare_create --- tools/fregrid/conserve_interp.c | 34 +++-- tools/libfrencutils/create_xgrid.c | 170 ------------------------- tools/libfrencutils/create_xgrid_acc.c | 138 ++++++++++---------- tools/libfrencutils/create_xgrid_acc.h | 5 +- 4 files changed, 96 insertions(+), 251 deletions(-) diff --git a/tools/fregrid/conserve_interp.c b/tools/fregrid/conserve_interp.c index c05b5035..a8e5d86e 100644 --- a/tools/fregrid/conserve_interp.c +++ b/tools/fregrid/conserve_interp.c @@ -871,13 +871,15 @@ void do_create_xgrid_order2( const int n, const int m, const Grid_config *grid_i double *xgrid_area=NULL, *xgrid_clon=NULL, *xgrid_clat=NULL ; double *mask; - int mxxgrid, nxgrid; + int mxxgrid, nxgrid, approx_nxgrid; int nx_out, ny_out, nx_in, ny_in ; int jstart, jend, ny_now; int zero=0; clock_t time_start, time_end, time_nxgrid; + int *counts_per_ij1, *ij2_start, *ij2_end; + time_start = clock(); nx_out = grid_out[n].nxc; @@ -885,29 +887,43 @@ void do_create_xgrid_order2( const int n, const int m, const Grid_config *grid_i nx_in = grid_in[m].nx; ny_in = grid_in[m].ny; + get_jstart_jend( nx_out, ny_out, nx_in, ny_in, + grid_out[n].latc, grid_in[m].latc, &jstart, &jend, &ny_now); + mask = (double *)malloc(nx_in*ny_in*sizeof(double)); for(int i=0; i MAX_V) error_handler("create_xgrid.c: n2_in is greater than MAX_V"); - lon_out_min_list[n] = minval_double(n2_in, x2_in); - lon_out_max_list[n] = maxval_double(n2_in, x2_in); - lon_out_avg[n] = avgval_double(n2_in, x2_in); - n2_list[n] = n2_in; -#pragma acc loop independent - for(l=0; l MASK_THRESH ) { - int n0, n1, n2, n3, n1_in; - double lat_in_min,lat_in_max,lon_in_min,lon_in_max,lon_in_avg; - double x1_in[MV], y1_in[MV]; - n0 = j1*nx1p+i1; n1 = j1*nx1p+i1+1; - n2 = (j1+1)*nx1p+i1+1; n3 = (j1+1)*nx1p+i1; - x1_in[0] = lon_in[n0]; y1_in[0] = lat_in[n0]; - x1_in[1] = lon_in[n1]; y1_in[1] = lat_in[n1]; - x1_in[2] = lon_in[n2]; y1_in[2] = lat_in[n2]; - x1_in[3] = lon_in[n3]; y1_in[3] = lat_in[n3]; - lat_in_min = minval_double(4, y1_in); - lat_in_max = maxval_double(4, y1_in); - n1_in = fix_lon(x1_in, y1_in, 4, M_PI); - lon_in_min = minval_double(n1_in, x1_in); - lon_in_max = maxval_double(n1_in, x1_in); - lon_in_avg = avgval_double(n1_in, x1_in); -#pragma acc loop independent //reduction(+:nxgrid) - for(ij=0; ij= lat_in_max || lat_out_max_list[ij] <= lat_in_min ) continue; - /* adjust x2_in according to lon_in_avg*/ - n2_in = n2_list[ij]; -#pragma acc loop seq - for(l=0; l M_PI) { - lon_out_min -= TPI; - lon_out_max -= TPI; -#pragma acc loop seq - for (l=0; l= lon_in_max || lon_out_max <= lon_in_min ) continue; - n_out = 1; - if ( (n_out = clip_2dx2d( x1_in, y1_in, n1_in, x2_in, y2_in, n2_in, x_out, y_out )) > 0) { - double min_area; - xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1]; - min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]); - if( xarea/min_area > AREA_RATIO_THRESH ) { - xgrid_area[nxgrid] = xarea; - xgrid_clon[nxgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg); - xgrid_clat[nxgrid] = poly_ctrlat (x_out, y_out, n_out ); - i_in[nxgrid] = i1; - j_in[nxgrid] = j1; - i_out[nxgrid] = i2; - j_out[nxgrid] = j2; -#pragma atomic update - nxgrid++; - } - } - } - } - } - - - free(area_in); - free(area_out); - free(lon_out_min_list); - free(lon_out_max_list); - free(lat_out_min_list); - free(lat_out_max_list); - free(lon_out_avg); - free(n2_list); - free(lon_out_list); - free(lat_out_list); - - return nxgrid; - -};/* get_xgrid_2Dx2D_order2 */ -#else int create_xgrid_2dx2d_order2(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, const double *mask_in, int *i_in, int *j_in, int *i_out, int *j_out, @@ -1157,7 +988,6 @@ nxgrid = 0; return nxgrid; };/* get_xgrid_2Dx2D_order2 */ -#endif #ifndef __AIX int create_xgrid_great_circle_(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, diff --git a/tools/libfrencutils/create_xgrid_acc.c b/tools/libfrencutils/create_xgrid_acc.c index ef2f8316..dd3120ca 100644 --- a/tools/libfrencutils/create_xgrid_acc.c +++ b/tools/libfrencutils/create_xgrid_acc.c @@ -32,22 +32,20 @@ #define EPSLN8 (1.e-8) #define EPSLN30 (1.0e-30) #define EPSLN10 (1.0e-10) - +#define MAX_V 8 /******************************************************************************* - prepare_create_xgrid_2dx2d_order2_ac + prepare_create_xgrid_2dx2d_order2_acc *******************************************************************************/ int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, Minmaxavg_lists *out_minmaxavg_lists, const double *mask_in, - int *counts_per_ij1, int *ij2_start, int *ij2_end, int nxgrid) - + int *counts_per_ij1, int *ij2_start, int *ij2_end) { #define MAX_V 8 int nx1, nx2, ny1, ny2, nx1p, nx2p; - double *area_in, *area_out; - int n=0; + size_t nxgrid; nx1 = *nlon_in; ny1 = *nlat_in; @@ -64,27 +62,28 @@ int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in #pragma acc data present(out_minmaxavg_lists->n_list[0:nx2*ny2], out_minmaxavg_lists->lon_avg[0:nx2*ny2]) #pragma acc data present(out_minmaxavg_lists->lat_min_list[0:nx2*ny2], out_minmaxavg_lists->lat_max_list[0:nx2*ny2]) #pragma acc data present(out_minmaxavg_lists->lon_min_list[0:nx2*ny2], out_minmaxavg_lists->lon_max_list[0:nx2*ny2]) -#pragma acc data present(counts_per_ij1[nx1*ny1], ij2_start[nx1*ny1], ij2_end[nx1*ny1]) +#pragma acc data present(counts_per_ij1[0:nx1*ny1], ij2_start[0:nx1*ny1], ij2_end[0:nx1*ny1]) #pragma acc data copy(nxgrid) -#pragma acc parallel +#pragma acc kernels { #pragma acc loop independent reduction(+:nxgrid) - for( int ij1=0 ; ij1 < nx1*ny1 ; ij1++ ) { + for( int ij1=0 ; ij1 < nx1*ny1 ; ij1++) { + + int i1, j1; + int icount=0 ; + int ij2_max=0 , ij2_min=nx2*ny2+1; - int icount=0; - int ij2_max=0, ij2_min=nx2*ny2+1; + i1 = ij1%nx1; + j1 = ij1/nx1; + + counts_per_ij1[ij1]=0; if( mask_in[ij1] > MASK_THRESH ) { - int i1, j1; + int n0, n1, n2, n3, n1_in; - double lat_in_min,lat_in_max,lon_in_min,lon_in_max,lon_in_avg; + double lat_in_min, lat_in_max, lon_in_min, lon_in_max, lon_in_avg; double x1_in[MV], y1_in[MV]; - counts_per_ij1[ij1]=0; - - i1=ij1%nx1; - j1=ij1/nx1; - n0 = j1*nx1p+i1; n1 = j1*nx1p+i1+1; n2 = (j1+1)*nx1p+i1+1; n3 = (j1+1)*nx1p+i1; x1_in[0] = lon_in[n0]; y1_in[0] = lat_in[n0]; @@ -98,11 +97,11 @@ int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in lon_in_max = maxval_double(n1_in, x1_in); lon_in_avg = avgval_double(n1_in, x1_in); -#pragma acc loop independent reduction(+:nxgrid) reduction(+:icount) reduction(max:ij2_max) reduction(min:ij2_min) +#pragma acc loop independent reduction(+:nxgrid) reduction(+:icount) reduction(min:ij2_min) reduction(max:ij2_max) for(int ij2=0; ij2lat_min_list[ij2] >= lat_in_max || out_minmaxavg_lists->lat_max_list[ij2] <= lat_in_min ) continue; /* adjust x2_in according to lon_in_avg*/ - n2_in = out_minmaxavg_lists->n_list[ij2]; -#pragma acc loop seq - for(l=0; llon_list[ij2*MAX_V+l]; - y2_in[l] = out_minmaxavg_lists->lat_list[ij2*MAX_V+l]; - } lon_out_min = out_minmaxavg_lists->lon_min_list[ij2]; lon_out_max = out_minmaxavg_lists->lon_max_list[ij2]; dx = out_minmaxavg_lists->lon_avg[ij2] - lon_in_avg; + if(dx < -M_PI ) { lon_out_min += TPI; lon_out_max += TPI; -#pragma acc loop seq - for (l=0; l M_PI) { lon_out_min -= TPI; lon_out_max -= TPI; -#pragma acc loop seq - for (l=0; l= lon_in_max || lon_out_max <= lon_in_min ) continue; nxgrid++; icount++; - ij2_max=max(ij2_max,ij2); - ij2_min=min(ij2_min,ij2); + ij2_min = min(ij2_min, ij2); + ij2_max = max(ij2_max, ij2); } //ij2 - counts_per_ij1[ij1]=icount; - ij2_end[ij1]=ij2_max; - ij2_start[ij1]=ij2_min; + counts_per_ij1[ij1] = icount; + ij2_start[ij1] = ij2_min ; + ij2_end[ij1] = ij2_max; - } //if - } // ij1 - } + } // mask + } //ij1 +} //kernel return nxgrid; -} /* prepare_create_xgrid_2dx2d_order2 +} /******************************************************************************* @@ -163,17 +152,18 @@ int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in *******************************************************************************/ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, - Minmaxavg_lists *out_minmaxavg_lists, const double *mask_in, + Minmaxavg_lists *out_minmaxavg_lists, const double *mask_in, const int approx_nxgrid, + const int *counts_per_ij1, const int *ij2_start, const int *ij2_end, int *i_in, int *j_in, int *i_out, int *j_out, double *xgrid_area, double *xgrid_clon, double *xgrid_clat) { #define MAX_V 8 - int nx1, nx2, ny1, ny2, nx1p, nx2p, nxgrid; + int nx1, nx2, ny1, ny2, nx1p, nx2p; double *area_in, *area_out; - int ij, i1, j1; int mxxgrid; - int n=0; + + size_t nxgrid; nx1 = *nlon_in; ny1 = *nlat_in; @@ -202,11 +192,20 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const #pragma acc data copy(nxgrid) #pragma acc kernels { -#pragma acc loop independent collapse(2) //reduction(+:nxgrid) - for(j1=0; j1 MASK_THRESH ) { +#pragma acc loop independent //reduction(+:nxgrid) + for( int ij1=0 ; ij1 MASK_THRESH ) { + int n0, n1, n2, n3, n1_in; double lat_in_min,lat_in_max,lon_in_min,lon_in_max,lon_in_avg; double x1_in[MV], y1_in[MV]; + n0 = j1*nx1p+i1; n1 = j1*nx1p+i1+1; n2 = (j1+1)*nx1p+i1+1; n3 = (j1+1)*nx1p+i1; x1_in[0] = lon_in[n0]; y1_in[0] = lat_in[n0]; @@ -215,30 +214,31 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const x1_in[3] = lon_in[n3]; y1_in[3] = lat_in[n3]; lat_in_min = minval_double(4, y1_in); lat_in_max = maxval_double(4, y1_in); + n1_in = fix_lon(x1_in, y1_in, 4, M_PI); lon_in_min = minval_double(n1_in, x1_in); lon_in_max = maxval_double(n1_in, x1_in); lon_in_avg = avgval_double(n1_in, x1_in); + #pragma acc loop independent //reduction(+:nxgrid) - for(ij=0; ijlat_min_list[ij] >= lat_in_max || out_minmaxavg_lists->lat_max_list[ij] <= lat_in_min ) continue; /* adjust x2_in according to lon_in_avg*/ - n2_in = out_minmaxavg_lists->n_list[ij]; + n2_in = out_minmaxavg_lists->n_list[ij2]; #pragma acc loop seq for(l=0; llon_list[ij*MAX_V+l]; - y2_in[l] = out_minmaxavg_lists->lat_list[ij*MAX_V+l]; + x2_in[l] = out_minmaxavg_lists->lon_list[ij2*MAX_V+l]; + y2_in[l] = out_minmaxavg_lists->lat_list[ij2*MAX_V+l]; } - lon_out_min = out_minmaxavg_lists->lon_min_list[ij]; - lon_out_max = out_minmaxavg_lists->lon_max_list[ij]; - dx = out_minmaxavg_lists->lon_avg[ij] - lon_in_avg; + lon_out_min = out_minmaxavg_lists->lon_min_list[ij2]; + lon_out_max = out_minmaxavg_lists->lon_max_list[ij2]; + dx = out_minmaxavg_lists->lon_avg[ij2] - lon_in_avg; if(dx < -M_PI ) { lon_out_min += TPI; lon_out_max += TPI; @@ -252,15 +252,12 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const for (l=0; l= lon_in_max || lon_out_max <= lon_in_min ) continue; n_out = 1; if ( (n_out = clip_2dx2d( x1_in, y1_in, n1_in, x2_in, y2_in, n2_in, x_out, y_out )) > 0) { double min_area; - xarea = poly_area (x_out, y_out, n_out ) * mask_in[j1*nx1+i1]; - min_area = min(area_in[j1*nx1+i1], area_out[j2*nx2+i2]); + xarea = poly_area (x_out, y_out, n_out ) * mask_in[ij1]; + min_area = min(area_in[ij1], area_out[ij2]); + printf("HEREEND3 %d %d\n", ij1, ij2); if( xarea/min_area > AREA_RATIO_THRESH ) { xgrid_area[nxgrid] = xarea; xgrid_clon[nxgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg); @@ -271,11 +268,12 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const j_out[nxgrid] = j2; #pragma atomic update nxgrid++; - } - } - } - } - } + } //if + } //if + } //ij2 + } //mask + } //ij1 +} //kernel free(area_in); free(area_out); diff --git a/tools/libfrencutils/create_xgrid_acc.h b/tools/libfrencutils/create_xgrid_acc.h index 7bc98b6d..802ed44f 100644 --- a/tools/libfrencutils/create_xgrid_acc.h +++ b/tools/libfrencutils/create_xgrid_acc.h @@ -25,11 +25,12 @@ int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, Minmaxavg_lists *out_minmaxavg_lists, const double *mask_in, - int *counts_per_ij2, int *ij2_start, int *ij2_end, int nxgrid) ; + int *counts_per_ij1, int *ij2_start, int *ij2_end) ; int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int *nlon_out, const int *nlat_out, const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, - Minmaxavg_lists *out_minmaxavg_lists, const double *mask_in, + Minmaxavg_lists *out_minmaxavg_lists, const double *mask_in, const int approx_nxgrid, + const int *counts_per_ij1, const int *ij2_start, const int *ij2_end, int *i_in, int *j_in, int *i_out, int *j_out, double *xgrid_area, double *xgrid_clon, double *xgrid_clat); #endif From d84e7757f64b4f04f0877673990bec6bb4eb0403 Mon Sep 17 00:00:00 2001 From: mlee03 Date: Thu, 16 Nov 2023 13:56:01 -0500 Subject: [PATCH 3/9] in progress --- tools/fregrid/conserve_interp.c | 12 +++-- tools/libfrencutils/create_xgrid_acc.c | 67 +++++++++++++++++--------- 2 files changed, 50 insertions(+), 29 deletions(-) diff --git a/tools/fregrid/conserve_interp.c b/tools/fregrid/conserve_interp.c index a8e5d86e..4c07dbd5 100644 --- a/tools/fregrid/conserve_interp.c +++ b/tools/fregrid/conserve_interp.c @@ -897,10 +897,13 @@ void do_create_xgrid_order2( const int n, const int m, const Grid_config *grid_i ij2_start = (int *)malloc( nx_in*ny_in*sizeof(int) ); ij2_end = (int *)malloc( nx_in*ny_in*sizeof(int) ); + //mxxgrid=get_maxxgrid(); + //malloc_xgrid_arrays(mxxgrid, &i_in, &j_in, &i_out, &j_out, &xgrid_area, &xgrid_clon, &xgrid_clat); + #pragma acc enter data copyin(grid_in[m].latc[0:(nx_in+1)*(ny_in+1)], \ grid_in[m].lonc[0:(nx_in+1)*(ny_in+1)], mask[0:nx_in*ny_in]) -#pragma acc enter data create(xgrid_area[0:mxxgrid], xgrid_clon[0:mxxgrid], xgrid_clat[0:mxxgrid], \ - i_in[0:mxxgrid], j_in[0:mxxgrid], i_out[0:mxxgrid],j_out[0:mxxgrid]) + //#pragma acc enter data create(xgrid_area[0:mxxgrid], xgrid_clon[0:mxxgrid], xgrid_clat[0:mxxgrid], \ + //i_in[0:mxxgrid], j_in[0:mxxgrid], i_out[0:mxxgrid],j_out[0:mxxgrid]) #pragma acc enter data create(counts_per_ij1[0:nx_in*ny_in], ij2_start[0:nx_in*ny_in], ij2_end[0:nx_in*ny_in]) #ifdef _OPENACC @@ -909,14 +912,13 @@ void do_create_xgrid_order2( const int n, const int m, const Grid_config *grid_i grid_in[m].latc+jstart*(nx_in+1), grid_out[n].lonc, grid_out[n].latc, out_minmaxavg_lists, mask, counts_per_ij1, ij2_start, ij2_end ); - printf("acc1 %d\n", approx_nxgrid); + printf("approx %d\n", approx_nxgrid); nxgrid = create_xgrid_2dx2d_order2_acc(&nx_in, &ny_now, &nx_out, &ny_out, grid_in[m].lonc+jstart*(nx_in+1), grid_in[m].latc+jstart*(nx_in+1), grid_out[n].lonc, grid_out[n].latc, out_minmaxavg_lists, mask, approx_nxgrid, counts_per_ij1, ij2_start, ij2_end, i_in, j_in, i_out, j_out, xgrid_area, xgrid_clon, xgrid_clat); - - printf("acc2 %d\n", nxgrid); + printf("%d\n", nxgrid); #else diff --git a/tools/libfrencutils/create_xgrid_acc.c b/tools/libfrencutils/create_xgrid_acc.c index dd3120ca..1f51abed 100644 --- a/tools/libfrencutils/create_xgrid_acc.c +++ b/tools/libfrencutils/create_xgrid_acc.c @@ -161,7 +161,9 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const #define MAX_V 8 int nx1, nx2, ny1, ny2, nx1p, nx2p; double *area_in, *area_out; - int mxxgrid; + + int *ij_in_tmp, *ij_out_tmp; + double *xgrid_area_tmp, *xgrid_clon_tmp, *xgrid_clat_tmp; size_t nxgrid; @@ -171,41 +173,48 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const ny2 = *nlat_out; nx1p = nx1 + 1; nx2p = nx2 + 1; - mxxgrid = get_maxxgrid(); area_in = (double *)malloc(nx1*ny1*sizeof(double)); area_out = (double *)malloc(nx2*ny2*sizeof(double)); get_grid_area(nlon_in, nlat_in, lon_in, lat_in, area_in); get_grid_area(nlon_out, nlat_out, lon_out, lat_out, area_out); + ij_in_tmp = (int *)malloc(approx_nxgrid*sizeof(int)); + ij_out_tmp = (int *)malloc(approx_nxgrid*sizeof(int)); + xgrid_area_tmp = (double *)malloc(approx_nxgrid*sizeof(double)); + xgrid_clon_tmp = (double *)malloc(approx_nxgrid*sizeof(double)); + xgrid_clat_tmp = (double *)malloc(approx_nxgrid*sizeof(double)); + nxgrid = 0; + for(int i=0 ; ilon_list[0:MAX_V*nx2*ny2], out_minmaxavg_lists->lat_list[0:MAX_V*nx2*ny2]) #pragma acc data present(out_minmaxavg_lists->n_list[0:nx2*ny2], out_minmaxavg_lists->lon_avg[0:nx2*ny2]) #pragma acc data present(out_minmaxavg_lists->lat_min_list[0:nx2*ny2], out_minmaxavg_lists->lat_max_list[0:nx2*ny2]) #pragma acc data present(out_minmaxavg_lists->lon_min_list[0:nx2*ny2], out_minmaxavg_lists->lon_max_list[0:nx2*ny2]) -#pragma acc data present(xgrid_area[0:mxxgrid], xgrid_clon[0:mxxgrid], xgrid_clat[0:mxxgrid], \ - i_in[0:mxxgrid], j_in[0:mxxgrid], i_out[0:mxxgrid],j_out[0:mxxgrid]) +#pragma acc data copyout(xgrid_area_tmp[0:approx_nxgrid], xgrid_clon_tmp[0:approx_nxgrid], xgrid_clat_tmp[0:approx_nxgrid], \ + ij_out_tmp[0:approx_nxgrid]) +#pragma acc data copy(ij_in_tmp[0:approx_nxgrid]) #pragma acc data copyin(area_in[0:nx1*ny1], area_out[0:nx2*ny2]) #pragma acc data copy(nxgrid) -#pragma acc kernels +#pragma acc parallel { -#pragma acc loop independent //reduction(+:nxgrid) +#pragma acc loop independent reduction(+:nxgrid) for( int ij1=0 ; ij1 MASK_THRESH ) { int n0, n1, n2, n3, n1_in; - double lat_in_min,lat_in_max,lon_in_min,lon_in_max,lon_in_avg; + int i1, j1; + int ij1_start, ixgrid; + double lat_in_min, lat_in_max, lon_in_min, lon_in_max, lon_in_avg; double x1_in[MV], y1_in[MV]; + i1 = ij1%nx1; + j1 = ij1/nx1; + n0 = j1*nx1p+i1; n1 = j1*nx1p+i1+1; n2 = (j1+1)*nx1p+i1+1; n3 = (j1+1)*nx1p+i1; x1_in[0] = lon_in[n0]; y1_in[0] = lat_in[n0]; @@ -220,12 +229,22 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const lon_in_max = maxval_double(n1_in, x1_in); lon_in_avg = avgval_double(n1_in, x1_in); -#pragma acc loop independent //reduction(+:nxgrid) - for(int ij2=0; ij20) { +#pragma acc loop seq + for(int i=0 ; ilat_min_list[ij2] >= lat_in_max || out_minmaxavg_lists->lat_max_list[ij2] <= lat_in_min ) continue; + i2 = ij2%nx2; j2 = ij2/nx2; @@ -257,16 +276,13 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const double min_area; xarea = poly_area (x_out, y_out, n_out ) * mask_in[ij1]; min_area = min(area_in[ij1], area_out[ij2]); - printf("HEREEND3 %d %d\n", ij1, ij2); if( xarea/min_area > AREA_RATIO_THRESH ) { - xgrid_area[nxgrid] = xarea; - xgrid_clon[nxgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg); - xgrid_clat[nxgrid] = poly_ctrlat (x_out, y_out, n_out ); - i_in[nxgrid] = i1; - j_in[nxgrid] = j1; - i_out[nxgrid] = i2; - j_out[nxgrid] = j2; -#pragma atomic update + xgrid_area_tmp[ij1_start+ixgrid] = xarea; + xgrid_clon_tmp[ij1_start+ixgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg); + xgrid_clat_tmp[ij1_start+ixgrid] = poly_ctrlat (x_out, y_out, n_out ); + ij_in_tmp[ij1_start+ixgrid] = ij1; + ij_out_tmp[ij1_start+ixgrid] = ij2; + ixgrid++; nxgrid++; } //if } //if @@ -275,6 +291,9 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const } //ij1 } //kernel + +//mkl here, transfer over data to the nxgrid arrays + free(area_in); free(area_out); From bd8d065e273764cfb453e1eeaa7c2b84525add56 Mon Sep 17 00:00:00 2001 From: mlee03 Date: Thu, 16 Nov 2023 14:26:34 -0500 Subject: [PATCH 4/9] debugging --- tools/fregrid/conserve_interp.c | 12 ++++----- tools/libfrencutils/create_xgrid_acc.c | 36 ++++++++++---------------- 2 files changed, 18 insertions(+), 30 deletions(-) diff --git a/tools/fregrid/conserve_interp.c b/tools/fregrid/conserve_interp.c index 4c07dbd5..db288331 100644 --- a/tools/fregrid/conserve_interp.c +++ b/tools/fregrid/conserve_interp.c @@ -897,13 +897,8 @@ void do_create_xgrid_order2( const int n, const int m, const Grid_config *grid_i ij2_start = (int *)malloc( nx_in*ny_in*sizeof(int) ); ij2_end = (int *)malloc( nx_in*ny_in*sizeof(int) ); - //mxxgrid=get_maxxgrid(); - //malloc_xgrid_arrays(mxxgrid, &i_in, &j_in, &i_out, &j_out, &xgrid_area, &xgrid_clon, &xgrid_clat); - #pragma acc enter data copyin(grid_in[m].latc[0:(nx_in+1)*(ny_in+1)], \ grid_in[m].lonc[0:(nx_in+1)*(ny_in+1)], mask[0:nx_in*ny_in]) - //#pragma acc enter data create(xgrid_area[0:mxxgrid], xgrid_clon[0:mxxgrid], xgrid_clat[0:mxxgrid], \ - //i_in[0:mxxgrid], j_in[0:mxxgrid], i_out[0:mxxgrid],j_out[0:mxxgrid]) #pragma acc enter data create(counts_per_ij1[0:nx_in*ny_in], ij2_start[0:nx_in*ny_in], ij2_end[0:nx_in*ny_in]) #ifdef _OPENACC @@ -911,14 +906,17 @@ void do_create_xgrid_order2( const int n, const int m, const Grid_config *grid_i approx_nxgrid = prepare_create_xgrid_2dx2d_order2_acc(&nx_in, &ny_now, &nx_out, &ny_out, grid_in[m].lonc+jstart*(nx_in+1), grid_in[m].latc+jstart*(nx_in+1), grid_out[n].lonc, grid_out[n].latc, out_minmaxavg_lists, mask, counts_per_ij1, ij2_start, ij2_end ); + malloc_xgrid_arrays(approx_nxgrid, &i_in, &j_in, &i_out, &j_out, &xgrid_area, &xgrid_clon, &xgrid_clat); +#pragma acc enter data create(xgrid_area[0:approx_nxgrid], xgrid_clon[0:approx_nxgrid], xgrid_clat[0:approx_nxgrid], \ + i_in[0:approx_nxgrid], j_in[0:approx_nxgrid], i_out[0:approx_nxgrid],j_out[0:approx_nxgrid]) - printf("approx %d\n", approx_nxgrid); + printf("approx_nxgrid %d\n", approx_nxgrid); nxgrid = create_xgrid_2dx2d_order2_acc(&nx_in, &ny_now, &nx_out, &ny_out, grid_in[m].lonc+jstart*(nx_in+1), grid_in[m].latc+jstart*(nx_in+1), grid_out[n].lonc, grid_out[n].latc, out_minmaxavg_lists, mask, approx_nxgrid, counts_per_ij1, ij2_start, ij2_end, i_in, j_in, i_out, j_out, xgrid_area, xgrid_clon, xgrid_clat); - printf("%d\n", nxgrid); + printf("nxgrid %d\n", nxgrid); #else diff --git a/tools/libfrencutils/create_xgrid_acc.c b/tools/libfrencutils/create_xgrid_acc.c index 1f51abed..d4821fd6 100644 --- a/tools/libfrencutils/create_xgrid_acc.c +++ b/tools/libfrencutils/create_xgrid_acc.c @@ -162,9 +162,6 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const int nx1, nx2, ny1, ny2, nx1p, nx2p; double *area_in, *area_out; - int *ij_in_tmp, *ij_out_tmp; - double *xgrid_area_tmp, *xgrid_clon_tmp, *xgrid_clat_tmp; - size_t nxgrid; nx1 = *nlon_in; @@ -179,25 +176,17 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const get_grid_area(nlon_in, nlat_in, lon_in, lat_in, area_in); get_grid_area(nlon_out, nlat_out, lon_out, lat_out, area_out); - ij_in_tmp = (int *)malloc(approx_nxgrid*sizeof(int)); - ij_out_tmp = (int *)malloc(approx_nxgrid*sizeof(int)); - xgrid_area_tmp = (double *)malloc(approx_nxgrid*sizeof(double)); - xgrid_clon_tmp = (double *)malloc(approx_nxgrid*sizeof(double)); - xgrid_clat_tmp = (double *)malloc(approx_nxgrid*sizeof(double)); - nxgrid = 0; - for(int i=0 ; ilon_list[0:MAX_V*nx2*ny2], out_minmaxavg_lists->lat_list[0:MAX_V*nx2*ny2]) #pragma acc data present(out_minmaxavg_lists->n_list[0:nx2*ny2], out_minmaxavg_lists->lon_avg[0:nx2*ny2]) #pragma acc data present(out_minmaxavg_lists->lat_min_list[0:nx2*ny2], out_minmaxavg_lists->lat_max_list[0:nx2*ny2]) #pragma acc data present(out_minmaxavg_lists->lon_min_list[0:nx2*ny2], out_minmaxavg_lists->lon_max_list[0:nx2*ny2]) -#pragma acc data copyout(xgrid_area_tmp[0:approx_nxgrid], xgrid_clon_tmp[0:approx_nxgrid], xgrid_clat_tmp[0:approx_nxgrid], \ - ij_out_tmp[0:approx_nxgrid]) -#pragma acc data copy(ij_in_tmp[0:approx_nxgrid]) +#pragma acc data present(xgrid_area[0:approx_nxgrid], xgrid_clon[0:approx_nxgrid], xgrid_clat[0:approx_nxgrid], \ + i_in[0:approx_nxgrid], j_in[0:approx_nxgrid], j_out[0:approx_nxgrid], i_out[0:approx_nxgrid]) +#pragma acc data present(counts_per_ij1[0:nx1*ny1], ij2_start[0:nx1*ny1], ij2_end[0:nx1*ny1]) #pragma acc data copyin(area_in[0:nx1*ny1], area_out[0:nx2*ny2]) #pragma acc data copy(nxgrid) #pragma acc parallel @@ -233,7 +222,7 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const ij1_start=0; if(ij1>0) { #pragma acc loop seq - for(int i=0 ; i AREA_RATIO_THRESH ) { - xgrid_area_tmp[ij1_start+ixgrid] = xarea; - xgrid_clon_tmp[ij1_start+ixgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg); - xgrid_clat_tmp[ij1_start+ixgrid] = poly_ctrlat (x_out, y_out, n_out ); - ij_in_tmp[ij1_start+ixgrid] = ij1; - ij_out_tmp[ij1_start+ixgrid] = ij2; + xgrid_area[ij1_start+ixgrid] = xarea; + xgrid_clon[ij1_start+ixgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg); + xgrid_clat[ij1_start+ixgrid] = poly_ctrlat (x_out, y_out, n_out ); + i_in[ij1_start+ixgrid] = i1; + j_in[ij1_start+ixgrid] = j1; + i_out[ij1_start+ixgrid] = i2; + j_out[ij1_start+ixgrid] = j2; ixgrid++; nxgrid++; } //if @@ -291,12 +282,11 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const } //ij1 } //kernel - -//mkl here, transfer over data to the nxgrid arrays - free(area_in); free(area_out); +#pragma acc exit data delete( area_in, area_out ) + return nxgrid; };/* get_xgrid_2Dx2D_order2 */ From c6d31bbc269b12cc7a4c9d875ecb800a9b654596 Mon Sep 17 00:00:00 2001 From: mlee03 Date: Fri, 17 Nov 2023 12:47:30 -0500 Subject: [PATCH 5/9] working prepare_create --- tools/fregrid/conserve_interp.c | 20 +++----- tools/libfrencutils/create_xgrid_acc.c | 69 +++++++++++++++++++------- tools/libfrencutils/create_xgrid_acc.h | 4 +- 3 files changed, 60 insertions(+), 33 deletions(-) diff --git a/tools/fregrid/conserve_interp.c b/tools/fregrid/conserve_interp.c index db288331..543dd93a 100644 --- a/tools/fregrid/conserve_interp.c +++ b/tools/fregrid/conserve_interp.c @@ -906,18 +906,11 @@ void do_create_xgrid_order2( const int n, const int m, const Grid_config *grid_i approx_nxgrid = prepare_create_xgrid_2dx2d_order2_acc(&nx_in, &ny_now, &nx_out, &ny_out, grid_in[m].lonc+jstart*(nx_in+1), grid_in[m].latc+jstart*(nx_in+1), grid_out[n].lonc, grid_out[n].latc, out_minmaxavg_lists, mask, counts_per_ij1, ij2_start, ij2_end ); - malloc_xgrid_arrays(approx_nxgrid, &i_in, &j_in, &i_out, &j_out, &xgrid_area, &xgrid_clon, &xgrid_clat); -#pragma acc enter data create(xgrid_area[0:approx_nxgrid], xgrid_clon[0:approx_nxgrid], xgrid_clat[0:approx_nxgrid], \ - i_in[0:approx_nxgrid], j_in[0:approx_nxgrid], i_out[0:approx_nxgrid],j_out[0:approx_nxgrid]) - - printf("approx_nxgrid %d\n", approx_nxgrid); nxgrid = create_xgrid_2dx2d_order2_acc(&nx_in, &ny_now, &nx_out, &ny_out, grid_in[m].lonc+jstart*(nx_in+1), grid_in[m].latc+jstart*(nx_in+1), grid_out[n].lonc, grid_out[n].latc, out_minmaxavg_lists, mask, approx_nxgrid, counts_per_ij1, ij2_start, ij2_end, - i_in, j_in, i_out, j_out, xgrid_area, xgrid_clon, xgrid_clat); - printf("nxgrid %d\n", nxgrid); - + &i_in, &j_in, &i_out, &j_out, &xgrid_area, &xgrid_clon, &xgrid_clat); #else nxgrid = create_xgrid_2dx2d_order2(&nx_in, &ny_now, &nx_out, &ny_out, grid_in[m].lonc+jstart*(nx_in+1), @@ -929,11 +922,6 @@ void do_create_xgrid_order2( const int n, const int m, const Grid_config *grid_i time_end = clock(); time_nxgrid += 1.0 * (time_end - time_start)/CLOCKS_PER_SEC; - free(mask); -#pragma acc exit data delete(mask) -#pragma acc exit data copyout(xgrid_area[0:mxxgrid], xgrid_clon[0:mxxgrid], xgrid_clat[0:mxxgrid], \ - i_in[0:mxxgrid], j_in[0:mxxgrid], i_out[0:mxxgrid],j_out[0:mxxgrid]) - for(int i=0; ilon_min_list[0:nx2*ny2], out_minmaxavg_lists->lon_max_list[0:nx2*ny2]) #pragma acc data present(counts_per_ij1[0:nx1*ny1], ij2_start[0:nx1*ny1], ij2_end[0:nx1*ny1]) #pragma acc data copy(nxgrid) -#pragma acc kernels +#pragma acc parallel { #pragma acc loop independent reduction(+:nxgrid) for( int ij1=0 ; ij1 < nx1*ny1 ; ij1++) { @@ -154,14 +154,18 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const const double *lon_in, const double *lat_in, const double *lon_out, const double *lat_out, Minmaxavg_lists *out_minmaxavg_lists, const double *mask_in, const int approx_nxgrid, const int *counts_per_ij1, const int *ij2_start, const int *ij2_end, - int *i_in, int *j_in, int *i_out, int *j_out, - double *xgrid_area, double *xgrid_clon, double *xgrid_clat) + int **i_in, int **j_in, int **i_out, int **j_out, + double **xgrid_area, double **xgrid_clon, double **xgrid_clat) { #define MAX_V 8 int nx1, nx2, ny1, ny2, nx1p, nx2p; double *area_in, *area_out; + int ixgrid2; + int *i_in2, *j_in2, *i_out2, *j_out2 ; + double *xgrid_area2, *xgrid_clon2, *xgrid_clat2; + size_t nxgrid; nx1 = *nlon_in; @@ -171,6 +175,15 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const nx1p = nx1 + 1; nx2p = nx2 + 1; + i_in2 = (int *)malloc(approx_nxgrid*sizeof(int)); + j_in2 = (int *)malloc(approx_nxgrid*sizeof(int)); + i_out2 = (int *)malloc(approx_nxgrid*sizeof(int)); + j_out2 = (int *)malloc(approx_nxgrid*sizeof(int)); + xgrid_area2 = (double *)malloc(approx_nxgrid*sizeof(double)); + xgrid_clon2 = (double *)malloc(approx_nxgrid*sizeof(double)); + xgrid_clat2 = (double *)malloc(approx_nxgrid*sizeof(double)); + for(int i=0 ; in_list[0:nx2*ny2], out_minmaxavg_lists->lon_avg[0:nx2*ny2]) #pragma acc data present(out_minmaxavg_lists->lat_min_list[0:nx2*ny2], out_minmaxavg_lists->lat_max_list[0:nx2*ny2]) #pragma acc data present(out_minmaxavg_lists->lon_min_list[0:nx2*ny2], out_minmaxavg_lists->lon_max_list[0:nx2*ny2]) -#pragma acc data present(xgrid_area[0:approx_nxgrid], xgrid_clon[0:approx_nxgrid], xgrid_clat[0:approx_nxgrid], \ - i_in[0:approx_nxgrid], j_in[0:approx_nxgrid], j_out[0:approx_nxgrid], i_out[0:approx_nxgrid]) #pragma acc data present(counts_per_ij1[0:nx1*ny1], ij2_start[0:nx1*ny1], ij2_end[0:nx1*ny1]) +#pragma acc data copyout(xgrid_area2[0:approx_nxgrid], xgrid_clon2[0:approx_nxgrid], xgrid_clat2[0:approx_nxgrid], \ + j_in2[0:approx_nxgrid], j_out2[0:approx_nxgrid], i_out2[0:approx_nxgrid]) #pragma acc data copyin(area_in[0:nx1*ny1], area_out[0:nx2*ny2]) -#pragma acc data copy(nxgrid) +#pragma acc data copy(nxgrid, i_in2[0:approx_nxgrid]) #pragma acc parallel { #pragma acc loop independent reduction(+:nxgrid) @@ -226,7 +239,7 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const } #pragma acc loop seq reduction(+:nxgrid) - for(int ij2=ij2_start[ij1]; ij2 AREA_RATIO_THRESH ) { - xgrid_area[ij1_start+ixgrid] = xarea; - xgrid_clon[ij1_start+ixgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg); - xgrid_clat[ij1_start+ixgrid] = poly_ctrlat (x_out, y_out, n_out ); - i_in[ij1_start+ixgrid] = i1; - j_in[ij1_start+ixgrid] = j1; - i_out[ij1_start+ixgrid] = i2; - j_out[ij1_start+ixgrid] = j2; + xgrid_area2[ij1_start+ixgrid] = xarea; + xgrid_clon2[ij1_start+ixgrid] = poly_ctrlon(x_out, y_out, n_out, lon_in_avg); + xgrid_clat2[ij1_start+ixgrid] = poly_ctrlat (x_out, y_out, n_out ); + i_in2[ij1_start+ixgrid] = i1; + j_in2[ij1_start+ixgrid] = j1; + i_out2[ij1_start+ixgrid] = i2; + j_out2[ij1_start+ixgrid] = j2; ixgrid++; nxgrid++; } //if @@ -282,10 +295,30 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const } //ij1 } //kernel - free(area_in); - free(area_out); - -#pragma acc exit data delete( area_in, area_out ) + free(area_in); + free(area_out); + + *i_in=(int *)malloc(nxgrid*sizeof(int)); + *j_in=(int *)malloc(nxgrid*sizeof(int)); + *i_out=(int *)malloc(nxgrid*sizeof(int)); + *j_out=(int *)malloc(nxgrid*sizeof(int)); + *xgrid_area=(double *)malloc(nxgrid*sizeof(double)); + *xgrid_clon=(double *)malloc(nxgrid*sizeof(double)); + *xgrid_clat=(double *)malloc(nxgrid*sizeof(double)); + + ixgrid2=0; + for(int i=0 ; i Date: Fri, 17 Nov 2023 13:07:13 -0500 Subject: [PATCH 6/9] cleanup --- tools/fregrid/conserve_interp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/fregrid/conserve_interp.c b/tools/fregrid/conserve_interp.c index 543dd93a..9dd508f2 100644 --- a/tools/fregrid/conserve_interp.c +++ b/tools/fregrid/conserve_interp.c @@ -871,14 +871,14 @@ void do_create_xgrid_order2( const int n, const int m, const Grid_config *grid_i double *xgrid_area=NULL, *xgrid_clon=NULL, *xgrid_clat=NULL ; double *mask; - int mxxgrid, nxgrid, approx_nxgrid; + int nxgrid, approx_nxgrid; int nx_out, ny_out, nx_in, ny_in ; int jstart, jend, ny_now; int zero=0; clock_t time_start, time_end, time_nxgrid; - int *counts_per_ij1, *ij2_start, *ij2_end; + int *counts_per_ij1=NULL, *ij2_start=NULL, *ij2_end=NULL; time_start = clock(); From fab4ac50741aa53e82825929ef0e6075a2aa9089 Mon Sep 17 00:00:00 2001 From: mlee03 Date: Fri, 17 Nov 2023 13:19:27 -0500 Subject: [PATCH 7/9] add malloc back in --- tools/fregrid/conserve_interp.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/fregrid/conserve_interp.c b/tools/fregrid/conserve_interp.c index 9dd508f2..847503a8 100644 --- a/tools/fregrid/conserve_interp.c +++ b/tools/fregrid/conserve_interp.c @@ -893,16 +893,17 @@ void do_create_xgrid_order2( const int n, const int m, const Grid_config *grid_i mask = (double *)malloc(nx_in*ny_in*sizeof(double)); for(int i=0; i Date: Fri, 17 Nov 2023 13:30:58 -0500 Subject: [PATCH 8/9] cleanup --- tools/libfrencutils/create_xgrid_acc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/libfrencutils/create_xgrid_acc.c b/tools/libfrencutils/create_xgrid_acc.c index 5137b9d1..a42606f2 100644 --- a/tools/libfrencutils/create_xgrid_acc.c +++ b/tools/libfrencutils/create_xgrid_acc.c @@ -45,7 +45,7 @@ int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in #define MAX_V 8 int nx1, nx2, ny1, ny2, nx1p, nx2p; - size_t nxgrid; + size_t approx_nxgrid; nx1 = *nlon_in; ny1 = *nlat_in; @@ -54,7 +54,7 @@ int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in nx1p = nx1 + 1; nx2p = nx2 + 1; - nxgrid = 0; + approx_nxgrid = 0; #pragma acc data present(lon_out[0:(nx2+1)*(ny2+1)], lat_out[0:(nx2+1)*(ny2+1)]) #pragma acc data present(lon_in[0:(nx1+1)*(ny1+1)], lat_in[0:(nx1+1)*(ny1+1)], mask_in[0:nx1*ny1]) @@ -63,10 +63,10 @@ int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in #pragma acc data present(out_minmaxavg_lists->lat_min_list[0:nx2*ny2], out_minmaxavg_lists->lat_max_list[0:nx2*ny2]) #pragma acc data present(out_minmaxavg_lists->lon_min_list[0:nx2*ny2], out_minmaxavg_lists->lon_max_list[0:nx2*ny2]) #pragma acc data present(counts_per_ij1[0:nx1*ny1], ij2_start[0:nx1*ny1], ij2_end[0:nx1*ny1]) -#pragma acc data copy(nxgrid) +#pragma acc data copy(approx_nxgrid) #pragma acc parallel { -#pragma acc loop independent reduction(+:nxgrid) +#pragma acc loop independent reduction(+:approx_nxgrid) for( int ij1=0 ; ij1 < nx1*ny1 ; ij1++) { int i1, j1; @@ -97,7 +97,7 @@ int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in lon_in_max = maxval_double(n1_in, x1_in); lon_in_avg = avgval_double(n1_in, x1_in); -#pragma acc loop independent reduction(+:nxgrid) reduction(+:icount) reduction(min:ij2_min) reduction(max:ij2_max) +#pragma acc loop independent reduction(+:approx_nxgrid) reduction(+:icount) reduction(min:ij2_min) reduction(max:ij2_max) for(int ij2=0; ij2= lon_in_max || lon_out_max <= lon_in_min ) continue; - nxgrid++; + approx_nxgrid++; icount++; ij2_min = min(ij2_min, ij2); ij2_max = max(ij2_max, ij2); @@ -142,7 +142,7 @@ int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in } //ij1 } //kernel - return nxgrid; + return approx_nxgrid; } From b487e7d7c5e0a8b39f21f2bb2770491854234508 Mon Sep 17 00:00:00 2001 From: mlee03 Date: Fri, 17 Nov 2023 13:45:40 -0500 Subject: [PATCH 9/9] add comments --- tools/libfrencutils/create_xgrid_acc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/libfrencutils/create_xgrid_acc.c b/tools/libfrencutils/create_xgrid_acc.c index a42606f2..7801dc50 100644 --- a/tools/libfrencutils/create_xgrid_acc.c +++ b/tools/libfrencutils/create_xgrid_acc.c @@ -127,6 +127,9 @@ int prepare_create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in /* x2_in should in the same range as x1_in after lon_fix, so no need to consider cyclic condition */ if(lon_out_min >= lon_in_max || lon_out_max <= lon_in_min ) continue; + + //Note, the check for AREA_RATIO_THRESH has been removed + //Thus, the computed value of approx_nxgrid will be equal to or greater than nxgrid approx_nxgrid++; icount++; ij2_min = min(ij2_min, ij2); @@ -175,6 +178,9 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const nx1p = nx1 + 1; nx2p = nx2 + 1; + //Temporarily holds information about exchange grid cells. + //Not all elements of these arrays will be filled in because + //approx_nxgrid >= nxgrid i_in2 = (int *)malloc(approx_nxgrid*sizeof(int)); j_in2 = (int *)malloc(approx_nxgrid*sizeof(int)); i_out2 = (int *)malloc(approx_nxgrid*sizeof(int)); @@ -232,6 +238,8 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const lon_in_avg = avgval_double(n1_in, x1_in); ixgrid=0; + // ij1_start, the total number of exchange grid cells computed for input cell ij1 + // is an approximation. ij1_start=0; if(ij1>0) { #pragma acc loop seq @@ -298,6 +306,7 @@ int create_xgrid_2dx2d_order2_acc(const int *nlon_in, const int *nlat_in, const free(area_in); free(area_out); + // record data *i_in=(int *)malloc(nxgrid*sizeof(int)); *j_in=(int *)malloc(nxgrid*sizeof(int)); *i_out=(int *)malloc(nxgrid*sizeof(int));