32 int ring_init(
int *indices,
int count,
int **R,
int *nR,
int **S,
int *nS,
int steps, MPI_Comm comm){
34 int size, rank, sp, rp;
36 MPI_Request s_request, r_request;
38 MPI_Comm_size(comm, &size);
39 MPI_Comm_rank(comm, &rank);
40 MPI_Allreduce( &count, &nbuf, 1, MPI_INT, MPI_MAX, comm);
41 buf = (
int* ) malloc(nbuf*
sizeof(
int));
43 for (p=1; p < steps; p++){
45 rp=(rank+size-p)%size;
46 MPI_Isend( &count, 1, MPI_INT, sp , 0, comm, &s_request);
47 MPI_Irecv( &nbuf, 1, MPI_INT, rp, 0, comm, &r_request);
49 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
50 MPI_Irecv( buf, nbuf, MPI_INT, rp, tag, comm, &r_request);
51 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
52 MPI_Isend( indices, count, MPI_INT, sp, tag, comm, &s_request);
55 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
56 nR[p] =
card_and(indices, count, buf, nbuf);
58 R[p] = (
int* ) malloc(nR[p]*
sizeof(
int));
59 S[steps-p] = (
int* ) malloc(nS[steps-p]*
sizeof(
int));
60 map_and(indices, count, buf, nbuf, R[p]);
82 int ring_reduce(
int **R,
int *nR,
int nRmax,
int **S,
int *nS,
int nSmax,
double *val,
double *res_val,
int steps, MPI_Comm comm){
83 int tag, rank, size, p;
84 MPI_Request s_request, r_request;
88 MPI_Comm_size(comm, &size);
89 MPI_Comm_rank(comm, &rank);
92 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
93 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
95 for (p=1; p < steps; p++){
96 rp=(rank+size-p)%size;
97 MPI_Irecv(rbuf, nR[p], MPI_DOUBLE, rp, tag, comm, &r_request);
99 m2s(val, sbuf, S[p], nS[p]);
100 MPI_Isend(sbuf, nS[p], MPI_DOUBLE, sp, tag, comm, &s_request);
104 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
105 s2m_sum(res_val, rbuf, R[p], nR[p]);
107 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
127 int tag, rank, size, p;
128 MPI_Request *s_request, *r_request;
130 double **sbuf, **rbuf;
132 MPI_Comm_size(comm, &size);
133 MPI_Comm_rank(comm, &rank);
136 s_request = (MPI_Request *) malloc((steps-1) *
sizeof(MPI_Request));
137 r_request = (MPI_Request *) malloc((steps-1) *
sizeof(MPI_Request));
139 rbuf = (
double **) malloc((steps-1) *
sizeof(
double *));
140 sbuf = (
double **) malloc((steps-1) *
sizeof(
double *));
142 for (p=1; p < steps; p++){
144 rbuf[p-1] = (
double *) malloc(nR[p] *
sizeof(
double));
145 sbuf[p-1] = (
double *) malloc(nS[p] *
sizeof(
double));
146 m2s(val, sbuf[p-1], S[p], nS[p]);
150 for (p=1; p < steps; p++){
153 rp=(rank+size-p)%size;
155 MPI_Irecv(rbuf[p-1], nR[p], MPI_DOUBLE, rp, tag, comm, &r_request[p-1]);
156 MPI_Isend(sbuf[p-1], nS[p], MPI_DOUBLE, sp, tag, comm, &s_request[p-1]);
159 MPI_Waitall(size-1, r_request, MPI_STATUSES_IGNORE);
161 for (p=1; p < steps; p++){
162 s2m_sum(res_val, rbuf[p-1], R[p], nR[p]);
164 MPI_Waitall(size-1, s_request, MPI_STATUSES_IGNORE);
185 int ring_noempty_reduce(
int **R,
int *nR,
int nneR,
int **S,
int *nS,
int nneS,
double *val,
double *res_val,
int steps, MPI_Comm comm){
186 int tag, rank, size, p;
187 MPI_Request *s_request, *r_request;
188 int sp, rp, nesi, neri;
189 double **sbuf, **rbuf;
191 MPI_Comm_size(comm, &size);
192 MPI_Comm_rank(comm, &rank);
195 s_request = (MPI_Request *) malloc(nneS *
sizeof(MPI_Request));
196 r_request = (MPI_Request *) malloc(nneR *
sizeof(MPI_Request));
198 rbuf = (
double **) malloc(nneR *
sizeof(
double *));
199 sbuf = (
double **) malloc(nneS *
sizeof(
double *));
202 for (p=1; p < steps; p++){
204 sbuf[nesi] = (
double *) malloc(nS[p] *
sizeof(
double));
205 m2s(val, sbuf[nesi], S[p], nS[p]);
213 for (p=1; p < steps; p++){
215 rp=(rank+size-p)%size;
217 rbuf[neri] = (
double *) malloc(nR[p] *
sizeof(
double));
218 MPI_Irecv(rbuf[neri], nR[p], MPI_DOUBLE, rp, tag, comm, &r_request[neri]);
222 MPI_Isend(sbuf[nesi], nS[p], MPI_DOUBLE, sp, tag, comm, &s_request[nesi]);
227 MPI_Waitall(nneR, r_request, MPI_STATUSES_IGNORE);
230 for (p=1; p < steps; p++){
232 s2m_sum(res_val, rbuf[neri], R[p], nR[p]);
236 MPI_Waitall(nneS, s_request, MPI_STATUSES_IGNORE);