MIDAPACK - MIcrowave Data Analysis PACKage 1.0beta
Parallel software tools for high performance CMB DA analysis
toeplitz_seq.c
00001 /*
00002 ** file: toeplitz_seq.c, version 1.0, May 2012  
00003 **
00004 ** Project:  Midapack library, ANR MIDAS'09
00005 ** Purpose:  Provide Toeplitz algebra tools suitable for Cosmic Microwave Background (CMB)
00006 **           data analysis.
00007 **
00008 ** Authors:  Pierre Cargemel, Frederic Dauvergne, Maude Le Jeune, Antoine Rogier, 
00009 **           Radek Stompor (APC, Paris)
00010 **
00011 ** 
00012 ** Copyright (c) 2010-2012 APC CNRS Université Paris Diderot
00013 ** 
00014 ** This program is free software; you can redistribute it and/or modify
00015 ** it under the terms of the GNU General Public License as published by
00016 ** the Free Software Foundation; either version 3 of the License, or
00017 ** (at your option) any later version.
00018 ** This program is distributed in the hope that it will be useful,
00019 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
00020 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00021 ** GNU General Public License for more details.
00022 ** 
00023 ** You should have received a copy of the GNU General Public License
00024 ** along with this program; if not, see http://www.gnu.org/licenses/gpl.html
00025 **
00026 ** For more information about ANR MIDAS'09 project see
00027 ** http://www.apc.univ-paris7.fr/APC_CS/Recherche/Adamis/MIDAS09/index.html
00028 ** 
00029 ** ACKNOWLEDGMENT: This work has been supported in part by the French National 
00030 ** Research Agency (ANR) through COSINUS program (project MIDAS no. ANR-09-COSI-009).
00031 **
00032 **
00033 ** Revision 1.0  2012/05/01?  fd@APC
00034 ** Official release 1.0, including only the Toeplitz algebra module.
00035 **
00036 */
00037 
00038 
00039 #include "toeplitz.h"
00040 //#include "toeplitz_seq.h"
00041 extern int NFFT;
00042 extern int FFTW_FLAG;
00043 
00044 
00045 //=========================================================================
00046 
00048 
00071 int stbmm(double **V, int *n, int m, int nrow, double *T, int nb_blocks_local, int nb_blocks_all, int *lambda, int *idv, int idp, int local_V_size)
00072 {
00073 
00074   //MPI parameters 
00075   int rank=-1;   //sequential use 
00076 
00077   FILE *file;
00078   file = stdout;
00079 
00080   int i,j,k;  //some indexes 
00081 
00082 
00083   //Define the indices for each process
00084   int idv0, idvn;  //indice of the first and the last block of V for each processes
00085 
00086   int *nnew;
00087   nnew = (int*) calloc( nb_blocks_local, sizeof(int));
00088   int idpnew;
00089   int local_V_size_new;  
00090 
00091 
00092   int status_params = get_overlapping_blocks_params( nb_blocks_local, idv, n, local_V_size, nrow, idp, &idpnew, &local_V_size_new, nnew, &idv0, &idvn);
00093 
00094   if (VERBOSE==1)
00095     printf("status_params=%d\n", status_params);
00096 
00097   if( status_params == 0) {
00098   free( nnew);
00099     return(0); //no work to be done
00100   }
00101 
00102 
00103   if (VERBOSE==1) {  //print on screen news parameters definition if VERBOSE
00104     fprintf(file, "news parameters definition:\n");
00105     fprintf(file, "[%d] idp=%d ; idpnew=%d", idp, idpnew);
00106     fprintf(file, "[%d] local_V_size=%d ; local_V_size_new=%d\n", rank, local_V_size, local_V_size_new);
00107     for(i=0;i<nb_blocks_local;i++)
00108       fprintf(file, "[%d] n[%d]=%d ; nnew[%d]=%d\n", rank, i, n[i], i, nnew[i]);
00109   }
00110 
00111 
00112   int vShft=idpnew-idp;   //new first element of relevance in V
00113 
00114   //Define the column indices:
00115   //index of the first and the last column of V for the current process
00116   int idvm0 = idpnew/nrow;
00117   int idvmn = (idpnew+local_V_size_new-1)/nrow;
00118   //number of columns of V for the current process
00119   int ncol_rank = idvmn-idvm0+1;
00120   //number of blocks for the current process with possibly repetitions 
00121   int nb_blocs_rank;
00122 
00123   if(ncol_rank == 1) // Empty process not allowed 
00124     nb_blocs_rank = idvn - idv0 + 1;
00125   else
00126     nb_blocs_rank = (ncol_rank-2)*nb_blocks_local + (nb_blocks_local-idv0) + (idvn+1);  //in this case nb_blocks_local = nblocs_all
00127 
00128   if (VERBOSE==1) {  //print on screen 
00129   fprintf(file, "[%d] nb_blocs_rank=%d, nb_blocks_local=%d\n", rank, nb_blocs_rank, nb_blocks_local);
00130   }
00131 
00132   //Define the indices for the first and the last element in each blocks
00133   int idvp0 = idpnew%nrow-idv[idv0];  //index of the first element of the process in the first block
00134   int idvpn;  //reverse index of the last element of the process in the last block
00135               //It's the number of remaining elements needed to fully complete the last block
00136   idvpn = idv[idvn]+nnew[idvn]-1 - (idpnew+local_V_size_new-1)%nrow ;
00137 
00138 
00139   //Define the offsets for the first and last blocks of the process for V1
00140   int v1_size = local_V_size_new;  //length of the new vector V1
00141   int offset0=0;
00142   int offsetn=0;
00143 
00144 
00145 //---------------------------------------
00146 //initialization for the blocks loop
00147 
00148   int idv1=0;     //index of *V1
00149 
00150   int mid;  //local number of column for the current block
00151   //index of the first element of the process inside the first block
00152   int offset_id0;
00153   offset_id0 = idvp0;
00154 
00155 //fftw variables
00156   fftw_complex *V_fft, *T_fft;
00157   double *V_rfft;
00158   fftw_plan plan_f, plan_b;
00159 //init local block vector
00160   double *V1block;
00161   int lambdaShft;
00162 
00163 //-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
00164 //loop on the blocks inside the process
00165   int nfft, blocksize;
00166   int iblock;  //index for the loop on the blocks
00167 //  int loopindex;
00168   int id; //indice of the current block
00169 
00170   int vblock_size;
00171   int id0block;
00172 
00173   for(iblock=idv0;iblock<idv0+nb_blocs_rank;iblock++) {
00174     id = iblock%nb_blocks_local;  //index of current block
00175 
00176 
00177   if(nnew[id]>0) { //the block is ok
00178 
00179   mid=1;  //always one!
00180 
00181   for( lambdaShft=k=0; k<id; k++)
00182     lambdaShft += lambda[k];
00183 
00184 //-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
00185 //Generic case : Generic block of the process
00186   if(VERBOSE)
00187     fprintf(file, "[%d] generic block...\n");
00188 
00189   vblock_size=nnew[id];
00190   id0block=0;
00191 
00192   V1block = (double *) calloc(vblock_size, sizeof(double));
00193 
00194   idv1 = idv[id]-idp%nrow - vShft + offset0 +nrow*( (iblock/nb_blocks_local) );
00195 
00196 #pragma omp parallel for                        
00197   for (j=0;j<vblock_size;j++)
00198     V1block[j] = (*V)[j+idv1-offset0+vShft];
00199 
00200   //init Toeplitz arrays
00201   tpltz_init(nnew[id], lambda[id], &nfft, &blocksize, &T_fft, (T+lambdaShft), &V_fft, &V_rfft, &plan_f, &plan_b);
00202   //Toeplitz computation
00203   if(VERBOSE)
00204     fprintf(file, "[%d] Before middle-level call : nfft = %d, blocksize = %d\n", rank, nfft, blocksize);
00205   stmm(&V1block, nnew[id], mid, id0block, vblock_size, T_fft, lambda[id], V_fft, V_rfft, plan_f, plan_b, blocksize, nfft);
00206   tpltz_cleanup(&T_fft, &V_fft, &V_rfft, &plan_f, &plan_b);
00207 
00208 #pragma omp parallel for                        
00209   for (j=0;j<vblock_size;j++)
00210     (*V)[j+idv[id] - idp%nrow + nrow*( (iblock/nb_blocks_local) )] = V1block[j];
00211 
00212 
00213   free(V1block);
00214 
00215 //-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
00216   }//end of if(nnew[id]>0)
00217   }//end of loop over the blocks
00218 
00219 
00220   return 0;
00221 }
00222 
00223 
00224 //====================================================================
00225 
00227 
00253 int gstbmm(double **V, int *n, int m, int nrow, double *T, int nb_blocks_local, int nb_blocks_all, int *lambda, int *idv, int id0p, int local_V_size, int *id0gap, int *lgap, int ngap)
00254 {
00255 
00256   int rank=-1; //sequential use  
00257 
00258   int i,j,k;   //some indexes
00259 
00260   int flag_skip_build_gappy_blocks=0;
00261 
00262   FILE *file;
00263   file = stdout;
00264 
00265 //put zeros at the gaps location
00266   reset_gaps( V, id0p, local_V_size, m, nrow, id0gap, lgap, ngap);
00267 
00268 //allocation for the gappy structure of the diagonal block Toeplitz matrix
00269   int nb_blocks_gappy;
00270   double *Tgappy;
00271   int *idvgappy;
00272   int *ngappy;
00273   int *lambdagappy;
00274 
00275   int nb_blockgappy_max; 
00276   int Tgappysize_max;
00277 
00278 //some computation usefull to determine the max size possible for the gappy variables
00279   int Tsize=0;
00280   int lambdamax=0;
00281 
00282   if (VERBOSE)
00283     fprintf(file, "[%d] flag_skip_build_gappy_blocks=%d\n", rank, flag_skip_build_gappy_blocks);
00284 
00285   if (flag_skip_build_gappy_blocks==1) {  //no build gappy blocks strategy, just put zeros at gaps location
00286   //compute the product using only the input Toeplitz blocks structure with zeros at the gaps location
00287   stbmm(V, n, m, nrow, T, nb_blocks_local, nb_blocks_all, lambda, idv, id0p, local_V_size);
00288 
00289   }
00290   else { //build gappy blocks strategy
00291 
00292   for(Tsize=i=0;i<nb_blocks_local;i++)
00293     Tsize += lambda[i];
00294 
00295   for(i=0;i<nb_blocks_local;i++) {
00296     if (lambda[i]>lambdamax)
00297       lambdamax = lambda[i];
00298   }
00299 
00300 //compute max size possible for the gappy variables
00301   nb_blockgappy_max = nb_blocks_local+ngap;
00302   Tgappysize_max = Tsize + lambdamax*ngap;
00303 
00304 //allocation of the gappy variables with max size possible
00305   Tgappy = (double *) calloc(Tgappysize_max,sizeof(double));
00306   ngappy = (int *) calloc(nb_blockgappy_max, sizeof(int));
00307   lambdagappy = (int *) calloc(nb_blockgappy_max, sizeof(int));
00308   idvgappy = (int *) calloc(nb_blockgappy_max, sizeof(int));
00309 
00310 
00311 //build gappy Toeplitz block structure considering significant gaps locations, meaning we skip
00312 //the gaps lower than the minimum correlation distance. You can also use the flag_param_distmin_fixed
00313 //parameter which allows you to skip the gap lower than these value. Indeed, sometimes it's
00314 //better to just put somes zeros than to consider two separates blocks.
00315 //ps: This criteria could be dependant of the local lambda in futur impovements.
00316   int flag_param_distmin_fixed=0;
00317   build_gappy_blocks(n, m, nrow, T, nb_blocks_local, nb_blocks_all, lambda, idv, id0gap, lgap, ngap, &nb_blocks_gappy, Tgappy, idvgappy, ngappy, lambdagappy, flag_param_distmin_fixed);
00318 
00319   if (VERBOSE) {
00320     fprintf(file, "[%d] nb_blocks_gappy=%d\n", rank, nb_blocks_gappy);
00321       fprintf(file, "[%d] idvgappy[%d]=%d \n", rank, i, idvgappy[i]);
00322     for(i=0;i<nb_blocks_gappy;i++)
00323       fprintf(file, "[%d] ngappy[%d]=%d \n", rank, i, ngappy[i]);
00324   }
00325 //ps: we could reallocate the gappy variables to their real size. Not sure it's worth it.
00326 
00327 //compute the product using the freshly created gappy Toeplitz blocks structure
00328 
00329 //compute the product using the freshly created gappy Toeplitz blocks structure
00330   stbmm( V, ngappy, m, nrow, Tgappy, nb_blocks_gappy, nb_blocks_gappy, lambdagappy, idvgappy, id0p, local_V_size);
00331 
00332   } //end flag_skip_build_gappy_blocks==1
00333 
00334 
00335 //put zeros on V for the gaps location again. Indeed, some gaps are just replaced by zeros numbers in input,
00336 //it's created some fakes results we need to clear after the computation
00337   reset_gaps( V, id0p, local_V_size, m, nrow, id0gap, lgap, ngap);
00338 
00339 
00340   return 0;
00341 }
00342 
00343