78 #define max(a,b) (((a) > (b)) ? (a) : (b))
79 #define min(a,b) (((a) < (b)) ? (a) : (b))
89 #define MPI_USER_TAG 123
94 #ifndef fftw_MULTITHREADING
95 #define fftw_MULTITHREADING
102 #define NFFT_DEFAULT 1
111 #ifndef FFTW_FLAG_AUTO
112 #define FFTW_FLAG_AUTO FFTW_ESTIMATE
120 #define FLAG_BS 0 //0:auto 1:fixed 2:zero 3:3lambda 4:4lambda 5:formula2
121 #define FLAG_NFFT 0 //0:auto 1:fixed 2:numthreads 3:fftwthreads
122 #define FLAG_FFTW FFTW_FLAG_AUTO //ESTIMATE, MEASURE, PATIENT, EXHAUSTIVE. Default is MEASURE
123 #define FLAG_NO_RSHP 0 //0:auto 1:yes 1:no
124 #define FLAG_NOFFT 0 //0:auto 1:yes 1:no
125 #define FLAG_BLOCKINGCOMM 0 //0:auto 1:noblocking 2:blocking
126 #define FIXED_NFFT 0 //fixed init value for nfft
127 #define FIXED_BS 0 //fixed init value for blockside
128 #define FLAG_VERBOSE 0
129 #define FLAG_SKIP_BUILD_GAPPY_BLOCKS 0
130 #define FLAG_PARAM_DISTMIN_FIXED 0
131 #define FLAG_PRECOMPUTE_LVL 0 //0: no precompute 1: precompute plans 2: precomputes Toeplitz and plans
256 int tpltz_init(
int n,
int lambda,
int *nfft,
int *blocksize, fftw_complex **T_fft,
double *T, fftw_complex **V_fft,
double **V_rfft, fftw_plan *plan_f, fftw_plan *plan_b,
Flag flag_stgy);
258 int tpltz_cleanup(fftw_complex **T_fft, fftw_complex **V_fft,
double **V_rfft,fftw_plan *plan_f, fftw_plan *plan_b);
260 int stmm_core(
double **V,
int n,
int m,
double *T, fftw_complex *T_fft,
int blocksize,
int lambda, fftw_complex *V_fft,
double *V_rfft,
int nfft, fftw_plan plan_f, fftw_plan plan_b,
int flag_offset,
int flag_nofft);
262 int stmm_main(
double **V,
int n,
int m,
int id0,
int l,
double *T, fftw_complex *T_fft,
int lambda, fftw_complex *V_fft,
double *V_rfft, fftw_plan plan_f, fftw_plan plan_b,
int blocksize,
int nfft,
Flag flag_stgy);
264 int stmm(
double **V,
int n,
int m,
double *T,
int lambda,
Flag flag_stgy);
270 int stbmm(
double **V,
int nrow,
int m_cw,
int m_rw,
Block *tpltzblocks,
int nb_blocks, int64_t idp,
int local_V_size,
Flag flag_stgy);
272 int gstbmm(
double **V,
int nrow,
int m_cw,
int m_rw,
Block *tpltzblocks,
int nb_blocks, int64_t idp,
int local_V_size, int64_t *id0gap,
int *lgap,
int ngap,
Flag flag_stgy);
275 int reset_gaps(
double **V,
int id0,
int local_V_size,
int m,
int nrow,
int m_rowwise, int64_t *id0gap,
int *lgap,
int ngap);
280 int mpi_stmm(
double **V,
int n,
int m,
int id0,
int l,
double *T,
int lambda,
Flag flag_stgy, MPI_Comm comm);
282 int mpi_stbmm(
double **V, int64_t nrow,
int m,
int m_rowwise,
Block *tpltzblocks,
int nb_blocks_local,
int nb_blocks_all, int64_t idp,
int local_V_size,
Flag flag_stgy, MPI_Comm comm);
284 int mpi_gstbmm(
double **V,
int nrow,
int m,
int m_rowwise,
Block *tpltzblocks,
int nb_blocks_local,
int nb_blocks_all,
int id0p,
int local_V_size, int64_t *id0gap,
int *lgap,
int ngap,
Flag flag_stgy, MPI_Comm comm);
303 int define_nfft(
int n_thread,
int flag_nfft,
int fixed_nfft);
307 int rhs_init_fftw(
int *nfft,
int fft_size, fftw_complex **V_fft,
double **V_rfft, fftw_plan *plan_f, fftw_plan *plan_b,
int fftw_flag);
309 int circ_init_fftw(
double *T,
int fft_size,
int lambda, fftw_complex **T_fft);
311 int scmm_direct(
int fft_size,
int nfft, fftw_complex *C_fft,
int ncol,
double *V_rfft,
double **CV, fftw_complex *V_fft, fftw_plan plan_f_V, fftw_plan plan_b_CV);
313 int scmm_basic(
double **V,
int blocksize,
int m, fftw_complex *C_fft,
double **CV, fftw_complex *V_fft,
double *V_rfft,
int nfft, fftw_plan plan_f_V, fftw_plan plan_b_CV);
315 int stmm_simple_basic(
double **V,
int n,
int m,
double *T,
int lambda,
double **TV);
317 int build_gappy_blocks(
int nrow,
int m,
Block *tpltzblocks,
int nb_blocks_local,
int nb_blocks_all, int64_t *id0gap,
int *lgap,
int ngap,
Block *tpltzblocks_gappy,
int *nb_blocks_gappy_final,
int flag_param_distmin_fixed);
323 int copy_block(
int ninrow,
int nincol,
double *Vin,
int noutrow,
int noutcol,
double *Vout,
int inrow,
int incol,
int nblockrow,
int nblockcol,
int outrow,
int outcol,
double norm,
int set_zero_flag);
325 int vect2nfftblock(
double *V1,
int v1_size,
double *V2,
int fft_size,
int nfft,
int lambda);
327 int nfftblock2vect(
double *V2,
int fft_size,
int nfft,
int lambda,
double *V1,
int v1_size);
329 int get_overlapping_blocks_params(
int nbloc,
Block *tpltzblocks,
int local_V_size, int64_t nrow, int64_t idp, int64_t *idpnew,
int *local_V_size_new,
int *nnew,
int *ifirstBlock,
int *ilastBlock);