![]() |
MIDAPACK - MIcrowave Data Analysis PACKage 1.0beta
Parallel software tools for high performance CMB DA analysis
|
00001 /* 00002 ** file: toeplitz_seq.c, version 1.0, May 2012 00003 ** 00004 ** Project: Midapack library, ANR MIDAS'09 00005 ** Purpose: Provide Toeplitz algebra tools suitable for Cosmic Microwave Background (CMB) 00006 ** data analysis. 00007 ** 00008 ** Authors: Pierre Cargemel, Frederic Dauvergne, Maude Le Jeune, Antoine Rogier, 00009 ** Radek Stompor (APC, Paris) 00010 ** 00011 ** 00012 ** Copyright (c) 2010-2012 APC CNRS Université Paris Diderot 00013 ** 00014 ** This program is free software; you can redistribute it and/or modify 00015 ** it under the terms of the GNU General Public License as published by 00016 ** the Free Software Foundation; either version 3 of the License, or 00017 ** (at your option) any later version. 00018 ** This program is distributed in the hope that it will be useful, 00019 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 00020 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00021 ** GNU General Public License for more details. 00022 ** 00023 ** You should have received a copy of the GNU General Public License 00024 ** along with this program; if not, see http://www.gnu.org/licenses/gpl.html 00025 ** 00026 ** For more information about ANR MIDAS'09 project see 00027 ** http://www.apc.univ-paris7.fr/APC_CS/Recherche/Adamis/MIDAS09/index.html 00028 ** 00029 ** ACKNOWLEDGMENT: This work has been supported in part by the French National 00030 ** Research Agency (ANR) through COSINUS program (project MIDAS no. ANR-09-COSI-009). 00031 ** 00032 ** 00033 ** Revision 1.0 2012/05/01? fd@APC 00034 ** Official release 1.0, including only the Toeplitz algebra module. 00035 ** 00036 */ 00037 00038 00039 #include "toeplitz.h" 00040 //#include "toeplitz_seq.h" 00041 extern int NFFT; 00042 extern int FFTW_FLAG; 00043 00044 00045 //========================================================================= 00046 00048 00071 int stbmm(double **V, int *n, int m, int nrow, double *T, int nb_blocks_local, int nb_blocks_all, int *lambda, int *idv, int idp, int local_V_size) 00072 { 00073 00074 //MPI parameters 00075 int rank=-1; //sequential use 00076 00077 FILE *file; 00078 file = stdout; 00079 00080 int i,j,k; //some indexes 00081 00082 00083 //Define the indices for each process 00084 int idv0, idvn; //indice of the first and the last block of V for each processes 00085 00086 int *nnew; 00087 nnew = (int*) calloc( nb_blocks_local, sizeof(int)); 00088 int idpnew; 00089 int local_V_size_new; 00090 00091 00092 int status_params = get_overlapping_blocks_params( nb_blocks_local, idv, n, local_V_size, nrow, idp, &idpnew, &local_V_size_new, nnew, &idv0, &idvn); 00093 00094 if (VERBOSE==1) 00095 printf("status_params=%d\n", status_params); 00096 00097 if( status_params == 0) { 00098 free( nnew); 00099 return(0); //no work to be done 00100 } 00101 00102 00103 if (VERBOSE==1) { //print on screen news parameters definition if VERBOSE 00104 fprintf(file, "news parameters definition:\n"); 00105 fprintf(file, "[%d] idp=%d ; idpnew=%d", idp, idpnew); 00106 fprintf(file, "[%d] local_V_size=%d ; local_V_size_new=%d\n", rank, local_V_size, local_V_size_new); 00107 for(i=0;i<nb_blocks_local;i++) 00108 fprintf(file, "[%d] n[%d]=%d ; nnew[%d]=%d\n", rank, i, n[i], i, nnew[i]); 00109 } 00110 00111 00112 int vShft=idpnew-idp; //new first element of relevance in V 00113 00114 //Define the column indices: 00115 //index of the first and the last column of V for the current process 00116 int idvm0 = idpnew/nrow; 00117 int idvmn = (idpnew+local_V_size_new-1)/nrow; 00118 //number of columns of V for the current process 00119 int ncol_rank = idvmn-idvm0+1; 00120 //number of blocks for the current process with possibly repetitions 00121 int nb_blocs_rank; 00122 00123 if(ncol_rank == 1) // Empty process not allowed 00124 nb_blocs_rank = idvn - idv0 + 1; 00125 else 00126 nb_blocs_rank = (ncol_rank-2)*nb_blocks_local + (nb_blocks_local-idv0) + (idvn+1); //in this case nb_blocks_local = nblocs_all 00127 00128 if (VERBOSE==1) { //print on screen 00129 fprintf(file, "[%d] nb_blocs_rank=%d, nb_blocks_local=%d\n", rank, nb_blocs_rank, nb_blocks_local); 00130 } 00131 00132 //Define the indices for the first and the last element in each blocks 00133 int idvp0 = idpnew%nrow-idv[idv0]; //index of the first element of the process in the first block 00134 int idvpn; //reverse index of the last element of the process in the last block 00135 //It's the number of remaining elements needed to fully complete the last block 00136 idvpn = idv[idvn]+nnew[idvn]-1 - (idpnew+local_V_size_new-1)%nrow ; 00137 00138 00139 //Define the offsets for the first and last blocks of the process for V1 00140 int v1_size = local_V_size_new; //length of the new vector V1 00141 int offset0=0; 00142 int offsetn=0; 00143 00144 00145 //--------------------------------------- 00146 //initialization for the blocks loop 00147 00148 int idv1=0; //index of *V1 00149 00150 int mid; //local number of column for the current block 00151 //index of the first element of the process inside the first block 00152 int offset_id0; 00153 offset_id0 = idvp0; 00154 00155 //fftw variables 00156 fftw_complex *V_fft, *T_fft; 00157 double *V_rfft; 00158 fftw_plan plan_f, plan_b; 00159 //init local block vector 00160 double *V1block; 00161 int lambdaShft; 00162 00163 //-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 00164 //loop on the blocks inside the process 00165 int nfft, blocksize; 00166 int iblock; //index for the loop on the blocks 00167 // int loopindex; 00168 int id; //indice of the current block 00169 00170 int vblock_size; 00171 int id0block; 00172 00173 for(iblock=idv0;iblock<idv0+nb_blocs_rank;iblock++) { 00174 id = iblock%nb_blocks_local; //index of current block 00175 00176 00177 if(nnew[id]>0) { //the block is ok 00178 00179 mid=1; //always one! 00180 00181 for( lambdaShft=k=0; k<id; k++) 00182 lambdaShft += lambda[k]; 00183 00184 //-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 00185 //Generic case : Generic block of the process 00186 if(VERBOSE) 00187 fprintf(file, "[%d] generic block...\n"); 00188 00189 vblock_size=nnew[id]; 00190 id0block=0; 00191 00192 V1block = (double *) calloc(vblock_size, sizeof(double)); 00193 00194 idv1 = idv[id]-idp%nrow - vShft + offset0 +nrow*( (iblock/nb_blocks_local) ); 00195 00196 #pragma omp parallel for 00197 for (j=0;j<vblock_size;j++) 00198 V1block[j] = (*V)[j+idv1-offset0+vShft]; 00199 00200 //init Toeplitz arrays 00201 tpltz_init(nnew[id], lambda[id], &nfft, &blocksize, &T_fft, (T+lambdaShft), &V_fft, &V_rfft, &plan_f, &plan_b); 00202 //Toeplitz computation 00203 if(VERBOSE) 00204 fprintf(file, "[%d] Before middle-level call : nfft = %d, blocksize = %d\n", rank, nfft, blocksize); 00205 stmm(&V1block, nnew[id], mid, id0block, vblock_size, T_fft, lambda[id], V_fft, V_rfft, plan_f, plan_b, blocksize, nfft); 00206 tpltz_cleanup(&T_fft, &V_fft, &V_rfft, &plan_f, &plan_b); 00207 00208 #pragma omp parallel for 00209 for (j=0;j<vblock_size;j++) 00210 (*V)[j+idv[id] - idp%nrow + nrow*( (iblock/nb_blocks_local) )] = V1block[j]; 00211 00212 00213 free(V1block); 00214 00215 //-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 00216 }//end of if(nnew[id]>0) 00217 }//end of loop over the blocks 00218 00219 00220 return 0; 00221 } 00222 00223 00224 //==================================================================== 00225 00227 00253 int gstbmm(double **V, int *n, int m, int nrow, double *T, int nb_blocks_local, int nb_blocks_all, int *lambda, int *idv, int id0p, int local_V_size, int *id0gap, int *lgap, int ngap) 00254 { 00255 00256 int rank=-1; //sequential use 00257 00258 int i,j,k; //some indexes 00259 00260 int flag_skip_build_gappy_blocks=0; 00261 00262 FILE *file; 00263 file = stdout; 00264 00265 //put zeros at the gaps location 00266 reset_gaps( V, id0p, local_V_size, m, nrow, id0gap, lgap, ngap); 00267 00268 //allocation for the gappy structure of the diagonal block Toeplitz matrix 00269 int nb_blocks_gappy; 00270 double *Tgappy; 00271 int *idvgappy; 00272 int *ngappy; 00273 int *lambdagappy; 00274 00275 int nb_blockgappy_max; 00276 int Tgappysize_max; 00277 00278 //some computation usefull to determine the max size possible for the gappy variables 00279 int Tsize=0; 00280 int lambdamax=0; 00281 00282 if (VERBOSE) 00283 fprintf(file, "[%d] flag_skip_build_gappy_blocks=%d\n", rank, flag_skip_build_gappy_blocks); 00284 00285 if (flag_skip_build_gappy_blocks==1) { //no build gappy blocks strategy, just put zeros at gaps location 00286 //compute the product using only the input Toeplitz blocks structure with zeros at the gaps location 00287 stbmm(V, n, m, nrow, T, nb_blocks_local, nb_blocks_all, lambda, idv, id0p, local_V_size); 00288 00289 } 00290 else { //build gappy blocks strategy 00291 00292 for(Tsize=i=0;i<nb_blocks_local;i++) 00293 Tsize += lambda[i]; 00294 00295 for(i=0;i<nb_blocks_local;i++) { 00296 if (lambda[i]>lambdamax) 00297 lambdamax = lambda[i]; 00298 } 00299 00300 //compute max size possible for the gappy variables 00301 nb_blockgappy_max = nb_blocks_local+ngap; 00302 Tgappysize_max = Tsize + lambdamax*ngap; 00303 00304 //allocation of the gappy variables with max size possible 00305 Tgappy = (double *) calloc(Tgappysize_max,sizeof(double)); 00306 ngappy = (int *) calloc(nb_blockgappy_max, sizeof(int)); 00307 lambdagappy = (int *) calloc(nb_blockgappy_max, sizeof(int)); 00308 idvgappy = (int *) calloc(nb_blockgappy_max, sizeof(int)); 00309 00310 00311 //build gappy Toeplitz block structure considering significant gaps locations, meaning we skip 00312 //the gaps lower than the minimum correlation distance. You can also use the flag_param_distmin_fixed 00313 //parameter which allows you to skip the gap lower than these value. Indeed, sometimes it's 00314 //better to just put somes zeros than to consider two separates blocks. 00315 //ps: This criteria could be dependant of the local lambda in futur impovements. 00316 int flag_param_distmin_fixed=0; 00317 build_gappy_blocks(n, m, nrow, T, nb_blocks_local, nb_blocks_all, lambda, idv, id0gap, lgap, ngap, &nb_blocks_gappy, Tgappy, idvgappy, ngappy, lambdagappy, flag_param_distmin_fixed); 00318 00319 if (VERBOSE) { 00320 fprintf(file, "[%d] nb_blocks_gappy=%d\n", rank, nb_blocks_gappy); 00321 fprintf(file, "[%d] idvgappy[%d]=%d \n", rank, i, idvgappy[i]); 00322 for(i=0;i<nb_blocks_gappy;i++) 00323 fprintf(file, "[%d] ngappy[%d]=%d \n", rank, i, ngappy[i]); 00324 } 00325 //ps: we could reallocate the gappy variables to their real size. Not sure it's worth it. 00326 00327 //compute the product using the freshly created gappy Toeplitz blocks structure 00328 00329 //compute the product using the freshly created gappy Toeplitz blocks structure 00330 stbmm( V, ngappy, m, nrow, Tgappy, nb_blocks_gappy, nb_blocks_gappy, lambdagappy, idvgappy, id0p, local_V_size); 00331 00332 } //end flag_skip_build_gappy_blocks==1 00333 00334 00335 //put zeros on V for the gaps location again. Indeed, some gaps are just replaced by zeros numbers in input, 00336 //it's created some fakes results we need to clear after the computation 00337 reset_gaps( V, id0p, local_V_size, m, nrow, id0gap, lgap, ngap); 00338 00339 00340 return 0; 00341 } 00342 00343