33#ifndef ALL_STAGGERED_HEADER_INCLUDED
34#define ALL_STAGGERED_HEADER_INCLUDED
67 communicators.resize(d);
68 for (
int _d = 0; _d < d; ++_d)
69 communicators.at(_d) = MPI_COMM_NULL;
71 nNeighbors.resize(2 * d);
81 void setup()
override;
87 void balance(
int step)
override;
105 MPI_Datatype MPIDataTypeT;
107 MPI_Datatype MPIDataTypeW;
110 std::vector<MPI_Comm> communicators;
113 std::vector<int> neighbors;
115 std::vector<int> nNeighbors;
118 void find_neighbors();
124 if (communicators.at(i) != MPI_COMM_NULL)
125 MPI_Comm_free(&communicators.at(i));
141 if (status != MPI_CART) {
143 __FILE__, __func__, __LINE__,
144 "Cartesian MPI communicator required, passed communicator is not "
151 this->periodicity.data(), this->local_coords.data());
158 if (communicators.at(1) != MPI_COMM_NULL)
159 MPI_Comm_free(&communicators.at(1));
160 if (communicators.at(2) != MPI_COMM_NULL)
161 MPI_Comm_free(&communicators.at(2));
165 this->local_coords.at(0) +
166 this->local_coords.at(1) * this->global_dims.at(0),
167 &communicators.at(2));
172 this->local_coords.at(1),
173 this->local_coords.at(0), &communicators.at(1));
176 communicators.at(0) = MPI_COMM_SELF;
179 if (std::is_same<T, double>::value)
180 MPIDataTypeT = MPI_DOUBLE;
181 else if (std::is_same<T, float>::value)
182 MPIDataTypeT = MPI_FLOAT;
183 else if (std::is_same<T, int>::value)
184 MPIDataTypeT = MPI_INT;
185 else if (std::is_same<T, long>::value)
186 MPIDataTypeT = MPI_LONG;
189 __FILE__, __func__, __LINE__,
190 "Invalid data type for boundaries given (T)");
194 if (std::is_same<W, double>::value)
195 MPIDataTypeW = MPI_DOUBLE;
196 else if (std::is_same<W, float>::value)
197 MPIDataTypeW = MPI_FLOAT;
198 else if (std::is_same<W, int>::value)
199 MPIDataTypeW = MPI_INT;
200 else if (std::is_same<W, long>::value)
201 MPIDataTypeW = MPI_LONG;
204 "Invalid data type for work given (W)");
208 int rank_left, rank_right;
212 MPI_Cart_shift(this->
globalComm, i, 1, &rank_left, &rank_right);
213 neighbors.push_back(rank_left);
214 neighbors.push_back(rank_right);
219 std::vector<Point<T>> newVertices = this->
vertices;
228#ifdef ALL_DEBUG_ENABLED
231 std::cout <<
"ALL::Staggered_LB::balance(): before work computation..."
235 MPI_Allreduce(this->
work.data(), &work_local_plane, 1, MPIDataTypeW,
236 MPI_SUM, communicators[i]);
238#ifdef ALL_DEBUG_ENABLED
241 std::cout <<
"ALL::Staggered_LB::balance(): before work distribution..."
250 int rank_left, rank_right;
252 MPI_Cart_shift(this->
globalComm, i, 1, &rank_left, &rank_right);
255 MPI_Request sreq, rreq;
256 MPI_Status sstat, rstat;
258 MPI_Irecv(&remote_work, 1, MPIDataTypeW, rank_right, 0, this->
globalComm,
260 MPI_Isend(&work_local_plane, 1, MPIDataTypeW, rank_left, 0,
262 MPI_Wait(&sreq, &sstat);
263 MPI_Wait(&rreq, &rstat);
269 MPI_Irecv(&remote_size, 1, MPIDataTypeT, rank_right, 0, this->
globalComm,
271 MPI_Isend(&local_size, 1, MPIDataTypeT, rank_left, 0, this->
globalComm,
273 MPI_Wait(&sreq, &sstat);
274 MPI_Wait(&rreq, &rstat);
280 std::max(4.1, 2.0 * (1.0 + std::max(local_size, remote_size) /
281 std::min(local_size, remote_size)));
284#ifdef ALL_DEBUG_ENABLED
287 std::cout <<
"ALL::Staggered_LB::balance(): before shift calculation..."
292 rank_right, this->
local_coords.at(i), this->global_dims.at(i),
293 work_local_plane, remote_work, local_size, remote_size, this->gamma,
296#ifdef ALL_DEBUG_ENABLED
299 std::cout <<
"ALL::Staggered_LB::balance(): before shift distibution..."
303 T remote_shift = (T)0;
305 MPI_Irecv(&remote_shift, 1, MPIDataTypeT, rank_left, 0, this->
globalComm,
307 MPI_Isend(&shift, 1, MPIDataTypeT, rank_right, 0, this->
globalComm, &sreq);
308 MPI_Wait(&sreq, &sstat);
309 MPI_Wait(&rreq, &rstat);
311#ifdef ALL_DEBUG_ENABLED
313 std::cout << this->
localRank <<
": shift = " << shift
314 <<
" remoteShift = " << remote_shift
315 <<
" vertices: " << this->
vertices.at(0)[i] <<
", "
316 << this->
vertices.at(1)[i] << std::endl;
322 if (rank_left != MPI_PROC_NULL && this->
local_coords[i] != 0)
323 newVertices.at(0)[i] = this->
prevVertices.at(0)[i] + remote_shift;
328 if (rank_right != MPI_PROC_NULL &&
330 newVertices.at(1)[i] = this->
prevVertices.at(1)[i] + shift;
335 if (newVertices.at(1)[i] < newVertices.at(0)[i]) {
336 std::cout <<
"ERROR on process: " << this->
localRank << std::endl;
338 __FILE__, __func__, __LINE__,
339 "Lower border of process larger than upper border of process!");
344#ifdef ALL_DEBUG_ENABLED
347 std::cout <<
"ALL::Staggered_LB::balance(): before neighbor search..."
354template <
class T,
class W>
void Staggered_LB<T, W>::find_neighbors() {
355 auto work = this->getWork();
356 auto vertices = this->prevVertices;
357 auto shifted_vertices = this->vertices;
361 MPI_Request sreq, rreq;
362 MPI_Status sstat, rstat;
364 T *vertices_loc =
new T[4];
365 T *vertices_rem =
new T[8 * this->global_dims[0] * this->global_dims[1]];
371 int rank_left, rank_right;
375 int offset_coords[3];
378 nNeighbors.at(0) = nNeighbors.at(1) = 1;
381 MPI_Cart_shift(this->globalComm, 0, 1, &rank_left, &rank_right);
384 neighbors.push_back(rank_left);
385 neighbors.push_back(rank_right);
388 MPI_Cart_shift(this->globalComm, 1, 1, &rank_left, &rank_right);
391 vertices_loc[0] = shifted_vertices.at(0)[0];
392 vertices_loc[1] = shifted_vertices.at(1)[0];
393 MPI_Allgather(vertices_loc, 2, MPIDataTypeT,
394 vertices_rem + 2 * this->global_dims[0], 2, MPIDataTypeT,
399 MPI_Irecv(vertices_rem, 2 * this->global_dims[0], MPIDataTypeT, rank_left, 0,
400 this->globalComm, &rreq);
401 MPI_Isend(vertices_rem + 2 * this->global_dims[0], 2 * this->global_dims[0],
402 MPIDataTypeT, rank_right, 0, this->globalComm, &sreq);
405 offset_coords[0] = 0;
406 offset_coords[1] = this->local_coords[1] - 1;
407 offset_coords[2] = this->local_coords[2];
409 rem_coords[1] = offset_coords[1];
410 rem_coords[2] = offset_coords[2];
412 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
415 MPI_Wait(&sreq, &sstat);
416 MPI_Wait(&rreq, &rstat);
419 nNeighbors.at(2) = 0;
420 for (
int x = 0; x < this->global_dims[0]; ++x) {
421 if ((vertices_rem[2 * x] <= vertices_loc[0] &&
422 vertices_loc[0] < vertices_rem[2 * x + 1]) ||
423 (vertices_rem[2 * x] < vertices_loc[1] &&
424 vertices_loc[1] <= vertices_rem[2 * x + 1]) ||
425 (vertices_rem[2 * x] >= vertices_loc[0] &&
426 vertices_loc[0] < vertices_rem[2 * x + 1] &&
427 vertices_loc[1] >= vertices_rem[2 * x + 1])) {
430 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
431 neighbors.push_back(rem_rank);
437 MPI_Barrier(this->globalComm);
441 MPI_Irecv(vertices_rem, 2 * this->global_dims[0], MPIDataTypeT, rank_right, 0,
442 this->globalComm, &rreq);
443 MPI_Isend(vertices_rem + 2 * this->global_dims[0], 2 * this->global_dims[0],
444 MPIDataTypeT, rank_left, 0, this->globalComm, &sreq);
447 offset_coords[0] = 0;
448 offset_coords[1] = this->local_coords[1] + 1;
449 offset_coords[2] = this->local_coords[2];
451 rem_coords[1] = offset_coords[1];
452 rem_coords[2] = offset_coords[2];
454 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
457 MPI_Wait(&sreq, &sstat);
458 MPI_Wait(&rreq, &rstat);
461 nNeighbors.at(3) = 0;
462 for (
int x = 0; x < this->global_dims[0]; ++x) {
463 if ((vertices_rem[2 * x] <= vertices_loc[0] &&
464 vertices_loc[0] < vertices_rem[2 * x + 1]) ||
465 (vertices_rem[2 * x] < vertices_loc[1] &&
466 vertices_loc[1] <= vertices_rem[2 * x + 1]) ||
467 (vertices_rem[2 * x] >= vertices_loc[0] &&
468 vertices_loc[0] < vertices_rem[2 * x + 1] &&
469 vertices_loc[1] >= vertices_rem[2 * x + 1])) {
472 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
473 neighbors.push_back(rem_rank);
479 MPI_Barrier(this->globalComm);
482 MPI_Cart_shift(this->globalComm, 2, 1, &rank_left, &rank_right);
485 vertices_loc[0] = shifted_vertices.at(0)[0];
486 vertices_loc[1] = shifted_vertices.at(1)[0];
487 vertices_loc[2] = shifted_vertices.at(0)[1];
488 vertices_loc[3] = shifted_vertices.at(1)[1];
490 MPI_Barrier(this->globalComm);
492 MPI_Allgather(vertices_loc, 4, MPIDataTypeT,
493 vertices_rem + 4 * this->global_dims[0] * this->global_dims[1],
494 4, MPIDataTypeT, communicators[2]);
498 MPI_Irecv(vertices_rem, 4 * this->global_dims[0] * this->global_dims[1],
499 MPIDataTypeT, rank_left, 0, this->globalComm, &rreq);
500 MPI_Isend(vertices_rem + 4 * this->global_dims[0] * this->global_dims[1],
501 4 * this->global_dims[0] * this->global_dims[1], MPIDataTypeT,
502 rank_right, 0, this->globalComm, &sreq);
505 offset_coords[0] = 0;
506 offset_coords[1] = 0;
507 offset_coords[2] = this->local_coords[2] - 1;
509 rem_coords[2] = offset_coords[2];
511 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
514 MPI_Wait(&sreq, &sstat);
515 MPI_Wait(&rreq, &rstat);
518 nNeighbors.at(4) = 0;
519 for (
int y = 0; y < this->global_dims[1]; ++y) {
520 for (
int x = 0; x < this->global_dims[0]; ++x) {
521 if (((vertices_rem[4 * (x + y * this->global_dims[0]) + 2] <=
524 vertices_rem[4 * (x + y * this->global_dims[0]) + 3]) ||
525 (vertices_rem[4 * (x + y * this->global_dims[0]) + 2] <
528 vertices_rem[4 * (x + y * this->global_dims[0]) + 3]) ||
529 (vertices_rem[4 * (x + y * this->global_dims[0]) + 2] >=
532 vertices_rem[4 * (x + y * this->global_dims[0]) + 3] &&
534 vertices_rem[4 * (x + y * this->global_dims[0]) + 3])))
535 if (((vertices_rem[4 * (x + y * this->global_dims[0])] <=
538 vertices_rem[4 * (x + y * this->global_dims[0]) + 1]) ||
539 (vertices_rem[4 * (x + y * this->global_dims[0])] <
542 vertices_rem[4 * (x + y * this->global_dims[0]) + 1]) ||
543 (vertices_rem[4 * (x + y * this->global_dims[0])] >=
546 vertices_rem[4 * (x + y * this->global_dims[0]) + 1] &&
548 vertices_rem[4 * (x + y * this->global_dims[0]) + 1]))) {
552 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
553 neighbors.push_back(rem_rank);
560 MPI_Barrier(this->globalComm);
564 MPI_Irecv(vertices_rem, 4 * this->global_dims[0] * this->global_dims[1],
565 MPIDataTypeT, rank_right, 0, this->globalComm, &rreq);
566 MPI_Isend(vertices_rem + 4 * this->global_dims[0] * this->global_dims[1],
567 4 * this->global_dims[0] * this->global_dims[1], MPIDataTypeT,
568 rank_left, 0, this->globalComm, &sreq);
571 offset_coords[0] = 0;
572 offset_coords[1] = 0;
573 offset_coords[2] = this->local_coords[2] + 1;
575 rem_coords[2] = offset_coords[2];
577 MPI_Cart_rank(this->globalComm, offset_coords, &rank_offset);
580 MPI_Wait(&sreq, &sstat);
581 MPI_Wait(&rreq, &rstat);
584 nNeighbors.at(5) = 0;
585 for (
int y = 0; y < this->global_dims[1]; ++y) {
586 for (
int x = 0; x < this->global_dims[0]; ++x) {
587 if (((vertices_rem[4 * (x + y * this->global_dims[0]) + 2] <=
590 vertices_rem[4 * (x + y * this->global_dims[0]) + 3]) ||
591 (vertices_rem[4 * (x + y * this->global_dims[0]) + 2] <
594 vertices_rem[4 * (x + y * this->global_dims[0]) + 3]) ||
595 (vertices_rem[4 * (x + y * this->global_dims[0]) + 2] >=
598 vertices_rem[4 * (x + y * this->global_dims[0]) + 3] &&
600 vertices_rem[4 * (x + y * this->global_dims[0]) + 3])))
601 if (((vertices_rem[4 * (x + y * this->global_dims[0])] <=
604 vertices_rem[4 * (x + y * this->global_dims[0]) + 1]) ||
605 (vertices_rem[4 * (x + y * this->global_dims[0])] <
608 vertices_rem[4 * (x + y * this->global_dims[0]) + 1]) ||
609 (vertices_rem[4 * (x + y * this->global_dims[0])] >=
612 vertices_rem[4 * (x + y * this->global_dims[0]) + 1] &&
614 vertices_rem[4 * (x + y * this->global_dims[0]) + 1]))) {
618 MPI_Cart_rank(this->globalComm, rem_coords, &rem_rank);
619 neighbors.push_back(rem_rank);
626 MPI_Barrier(this->globalComm);
629 delete[] vertices_loc;
630 delete[] vertices_rem;
634template <
class T,
class W>
LB(const int dim, const T g)
virtual void setVertices(const std::vector< Point< T > > &vertices_in)
int localRank
local rank in the used MPI communicator
std::vector< W > work
local work
MPI_Comm globalComm
used MPI communicator
int dimension
dimension of the used vertices
std::vector< int > global_dims
dimensions of the global process grid
virtual void setWork(const std::vector< W > &w)
bool gammaAutoCalc
turn on/off autocalculation of gamma factor if present
virtual int getDimension()
std::vector< int > local_coords
cartesian coordinates of the local domain in the process grid
std::vector< Point< T > > prevVertices
original vertices before previous balancing step
std::vector< Point< T > > vertices
local vertices after previous balancing step
~Staggered_LB()
default destructor
virtual void setAdditionalData(known_unused const void *data) override
void balance(int step) override
virtual W getEstimatedEfficiency() override
Staggered_LB(int d, W w, T g)
Staggered_LB()
default constructor
void setup() override
method to setup the method specific internal parameters
virtual std::vector< int > & getNeighbors() override
T borderShift1d(const int remote_rank, const int local_coord, const int global_dim, const W local_work, const W remote_work, const T local_size, const T remote_size, const T gamma, const T minSize)