37 int butterfly_init(
int *indices,
int count,
int **R,
int *nR,
int **S,
int *nS,
int **com_indices,
int *com_count,
int steps, MPI_Comm comm){
40 int rank, size, rk, sk;
42 MPI_Request s_request, r_request;
47 MPI_Comm_size(comm, &size);
48 MPI_Comm_rank(comm, &rank);
50 I = (
int **) malloc(steps *
sizeof(
int*));
51 nI = (
int *) malloc(steps *
sizeof(
int));
55 for(k=0; k<steps; k++){
56 sk=(rank+size-p2k)%size;
61 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
62 memcpy( S[k], indices, nS[k]*
sizeof(
int));
65 nS[k] =
card_or(S[k-1], nS[k-1], I[steps-k], nI[steps-k]);
66 S[k] = (
int *) malloc(nS[k] *
sizeof(
int));
67 set_or(S[k-1], nS[k-1], I[steps-k], nI[steps-k], S[k]);
70 MPI_Irecv(&nI[steps-k-1], 1, MPI_INT, rk, tag, comm, &r_request);
71 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request);
72 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
73 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
75 I[steps-k-1]= (
int *) malloc(nI[steps-k-1] *
sizeof(
int));
78 MPI_Irecv(I[steps-k-1], nI[steps-k-1], MPI_INT, rk, tag, comm, &r_request);
79 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm, &s_request);
80 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
81 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
87 J = (
int **) malloc(steps *
sizeof(
int*));
88 nJ = (
int *) malloc(steps *
sizeof(
int));
92 for(k=0; k<steps; k++){
95 rk=(rank+size-p2k)%size;
98 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
99 memcpy( J[k], indices, nJ[k]*
sizeof(
int));
102 nJ[k] =
card_or(J[k-1], nJ[k-1], R[k-1], nR[k-1]);
103 J[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
104 set_or(J[k-1], nJ[k-1], R[k-1], nR[k-1], J[k]);
108 MPI_Irecv(&nR[k], 1, MPI_INT, rk, tag, comm, &r_request);
109 MPI_Isend(&nJ[k], 1, MPI_INT, sk, tag, comm, &s_request);
110 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
111 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
113 R[k]= (
int *) malloc( nR[k] *
sizeof(
int));
116 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
117 MPI_Isend(J[k], nJ[k], MPI_INT, sk, tag, comm, &s_request);
118 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
119 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
128 for(k=0; k<steps; k++){
130 rk=(rank+size-p2k)%size;
132 nS[k] =
card_and(I[k], nI[k], J[k], nJ[k]);
133 S[k] = (
int *) malloc(nJ[k] *
sizeof(
int));
134 set_and( I[k], nI[k], J[k], nJ[k], S[k]);
139 MPI_Irecv(&nR[k],1, MPI_INT, rk, tag, comm, &r_request);
140 MPI_Isend(&nS[k], 1, MPI_INT, sk, tag, comm, &s_request);
141 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
142 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
144 R[k]= (
int *) malloc( nR[k] *
sizeof(
int));
147 MPI_Irecv(R[k], nR[k], MPI_INT, rk, tag, comm, &r_request);
148 MPI_Isend(S[k], nS[k], MPI_INT, sk, tag, comm, &s_request);
149 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
150 MPI_Wait(&s_request, MPI_STATUS_IGNORE);
157 int **USR, *nUSR, **U, *nU;
159 USR = (
int **) malloc(steps*
sizeof(
int *));
160 nUSR = (
int *) malloc(steps*
sizeof(
int));
161 U = (
int **) malloc(steps*
sizeof(
int *));
162 nU = (
int *) malloc(steps*
sizeof(
int));
164 for(k=0; k<steps; k++){
165 nUSR[k] =
card_or(S[k], nS[k], R[k], nR[k]);
166 USR[k] = (
int *) malloc(nUSR[k]*
sizeof(
int));
167 set_or(S[k], nS[k], R[k], nR[k], USR[k]);
169 for(k=0; k<steps; k++){
172 U[k] = (
int *) malloc(nU[k] *
sizeof(
int));
173 memcpy( U[k], USR[k], nU[k]*
sizeof(
int));
176 nU[k] =
card_or(U[k-1], nU[k-1], USR[k], nUSR[k]);
177 U[k] = (
int *) malloc(nU[k]*
sizeof(
int *));
178 set_or(U[k-1], nU[k-1], USR[k], nUSR[k], U[k]);
181 *com_count=nU[steps-1];
182 *com_indices = (
int *) malloc(*com_count *
sizeof(
int));
183 memcpy(*com_indices, U[steps-1], *com_count *
sizeof(
int));
186 for(k=0; k<steps; k++){
187 subset2map(*com_indices, *com_count, S[k], nS[k]);
188 subset2map(*com_indices, *com_count, R[k], nR[k]);
209 int butterfly_reduce(
int **R,
int *nR,
int nRmax,
int **S,
int *nS,
int nSmax,
double *val,
int steps, MPI_Comm comm){
213 int rank, size, rk, sk;
214 MPI_Request s_request, r_request;
217 MPI_Comm_size(comm, &size);
218 MPI_Comm_rank(comm, &rank);
220 sbuf = (
double *) malloc(nSmax *
sizeof(
double));
221 rbuf = (
double *) malloc(nRmax *
sizeof(
double));
225 for(k=0; k<steps; k++){
227 rk=(rank+size-p2k)%size;
228 MPI_Irecv(rbuf, nR[k], MPI_DOUBLE, rk, tag, comm, &r_request);
230 m2s(val, sbuf, S[k], nS[k]);
231 MPI_Isend(sbuf, nS[k], MPI_DOUBLE, sk, tag, comm, &s_request);
232 MPI_Wait(&r_request, MPI_STATUS_IGNORE);
233 s2m_sum(val, rbuf, R[k], nR[k]);
236 MPI_Wait(&s_request, MPI_STATUS_IGNORE);