37 int truebutterfly_init(
int *indices,
int count,
int **R,
int *nR,
int **S,
int *nS,
int **com_indices,
int *com_count,
int steps, MPI_Comm comm){
40 int rank, size, rk, sk;
42 MPI_Request s_request, r_request;
47 MPI_Comm_size(comm, &size);
48 MPI_Comm_rank(comm, &rank);
50 I = (
int **) malloc(steps *
sizeof(
int*));
51 nI = (
int *) malloc(steps *
sizeof(
int));
56 for(k=0; k<steps; k++){
58 if( rank%p2k1 < p2k) sk=rk=rank+p2k;
else sk=rk=rank-p2k;
62 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
63 memcpy( S[k], indices, nS[k]*
sizeof(
int));
66 nS[k] =
card_or(S[k-1], nS[k-1], I[steps-k], nI[steps-k]);
67 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
68 set_or(S[k-1], nS[k-1], I[steps-k], nI[steps-k], S[k]);
71 MPI_Irecv(&nI[steps-k-1], 1, MPI_INT, rk, tag, comm, &r_request);
72 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request);
73 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
74 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
76 I[steps-k-1]= (
int *) malloc(nI[steps-k-1] *
sizeof(
int));
79 MPI_Irecv(I[steps-k-1], nI[steps-k-1], MPI_INT, rk, tag, comm, &r_request);
80 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm, &s_request);
81 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
82 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
89 J = (
int **) malloc(steps *
sizeof(
int*));
90 nJ = (
int *) malloc(steps *
sizeof(
int));
95 for(k=0; k<steps; k++){
98 if( rank%p2k1 < p2k) sk=rk=rank+p2k;
else sk=rk=rank-p2k;
102 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
103 memcpy( J[k], indices, nJ[k]*
sizeof(
int));
106 nJ[k] =
card_or(J[k-1], nJ[k-1], R[k-1], nR[k-1]);
107 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
108 set_or(J[k-1], nJ[k-1], R[k-1], nR[k-1], J[k]);
112 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
113 MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
114 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
115 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
117 R[k]= (
int *) malloc( nR[k] *
sizeof(
int));
120 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
121 MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
122 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
123 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
134 for(k=0; k<steps; k++){
136 if( rank%p2k1 < p2k) sk=rk=rank+p2k;
else sk=rk=rank-p2k;
138 nS[k] =
card_and(I[k], nI[k], J[k], nJ[k]);
139 S[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
140 set_and( I[k], nI[k], J[k], nJ[k], S[k]);
145 MPI_Irecv(&nR[k],1, MPI_INT, rk, tag, comm, &r_request);
146 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request);
147 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
148 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
150 R[k]= (
int *) malloc( nR[k] *
sizeof(
int));
153 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
154 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm, &s_request);
155 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
156 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
164 int **USR, *nUSR, **U, *nU;
166 USR = (
int **) malloc(steps*
sizeof(
int *));
167 nUSR = (
int *) malloc(steps*
sizeof(
int));
168 U = (
int **) malloc(steps*
sizeof(
int *));
169 nU = (
int *) malloc(steps*
sizeof(
int));
171 for(k=0; k<steps; k++){
172 nUSR[k] =
card_or(S[k], nS[k], R[k], nR[k]);
173 USR[k] = (
int *) malloc(nUSR[k]*
sizeof(
int));
174 set_or(S[k], nS[k], R[k], nR[k], USR[k]);
176 for(k=0; k<steps; k++){
179 U[k] = (
int *) malloc(nU[k] *
sizeof(
int));
180 memcpy( U[k], USR[k], nU[k]*
sizeof(
int));
183 nU[k] =
card_or(U[k-1], nU[k-1], USR[k], nUSR[k]);
184 U[k] = (
int *) malloc(nU[k]*
sizeof(
int *));
185 set_or(U[k-1], nU[k-1], USR[k], nUSR[k], U[k]);
188 *com_count=nU[steps-1];
189 *com_indices = (
int *) malloc(*com_count *
sizeof(
int));
190 memcpy(*com_indices, U[steps-1], *com_count *
sizeof(
int));
193 for(k=0; k<steps; k++){
194 subset2map(*com_indices, *com_count, S[k], nS[k]);
195 subset2map(*com_indices, *com_count, R[k], nR[k]);
216 int truebutterfly_reduce(
int **R,
int *nR,
int nRmax,
int **S,
int *nS,
int nSmax,
double *val,
int steps, MPI_Comm comm){
219 int k, p2k, p2k1, tag;
220 int rank, size, rk, sk;
222 MPI_Request s_request, r_request;
225 MPI_Comm_size(comm, &size);
226 MPI_Comm_rank(comm, &rank);
228 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
229 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
234 for(k=0; k<steps; k++){
236 if( rank%p2k1 < p2k){
244 m2s(val, sbuf, S[k], nS[k]);
245 MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
246 MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
248 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
249 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
250 s2m_sum(val, rbuf, R[k], nR[k]);
261 MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
262 m2s(val, sbuf, S[k], nS[k]);
263 MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
265 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
266 s2m_sum(val, rbuf, R[k], nR[k]);
268 MPI_Wait(&s_request, MPI_STATUS_IGNORE);