diff --git a/src/C-interface/CMakeLists.txt b/src/C-interface/CMakeLists.txt index 1af4f9b3..394f5644 100644 --- a/src/C-interface/CMakeLists.txt +++ b/src/C-interface/CMakeLists.txt @@ -11,7 +11,6 @@ set(HEADERS-C bml_convert.h bml_copy.h bml_diagonalize.h - bml_domain.h bml_elemental.h bml_export.h bml_getters.h @@ -49,7 +48,6 @@ set(SOURCES-C bml_convert.c bml_copy.c bml_diagonalize.c - bml_domain.c bml_elemental.c bml_export.c bml_getters.c diff --git a/src/C-interface/bml_allocate.c b/src/C-interface/bml_allocate.c index bf9bbff2..98921c18 100644 --- a/src/C-interface/bml_allocate.c +++ b/src/C-interface/bml_allocate.c @@ -199,6 +199,24 @@ bml_deallocate( } } +/** Deallocate a domain. + * + * \ingroup allocate_group_C + * + * \param D[in,out] The domain. + */ +void +bml_deallocate_domain( + bml_domain_t * D) +{ + bml_free_memory(D->localRowMin); + bml_free_memory(D->localRowMax); + bml_free_memory(D->localRowExtent); + bml_free_memory(D->localDispl); + bml_free_memory(D->localElements); + bml_free_memory(D); +} + /** Clear a matrix. * * \ingroup allocate_group_C @@ -569,6 +587,125 @@ bml_identity_matrix( return NULL; } +/** Allocate a default domain for a bml matrix. + * + * \ingroup allocate_group_C + * + * \param N The number of rows + * \param M The number of columns + * \param distrib_mode The distribution mode + * \return The domain + */ +bml_domain_t * +bml_default_domain( + int N, + int M, + bml_distribution_mode_t distrib_mode) +{ + int avgExtent, nleft; + int nRanks = bml_getNRanks(); + + bml_domain_t *domain = bml_allocate_memory(sizeof(bml_domain_t)); + + domain->localRowMin = bml_allocate_memory(nRanks * sizeof(int)); + domain->localRowMax = bml_allocate_memory(nRanks * sizeof(int)); + domain->localRowExtent = bml_allocate_memory(nRanks * sizeof(int)); + domain->localDispl = bml_allocate_memory(nRanks * sizeof(int)); + domain->localElements = bml_allocate_memory(nRanks * sizeof(int)); + + domain->totalProcs = nRanks; + domain->totalRows = N; + domain->totalCols = M; + + domain->globalRowMin = 0; + domain->globalRowMax = domain->totalRows; + domain->globalRowExtent = domain->globalRowMax - domain->globalRowMin; + + switch (distrib_mode) + { + case sequential: + { + // Default - each rank contains entire matrix, even when running distributed + for (int i = 0; i < nRanks; i++) + { + domain->localRowMin[i] = domain->globalRowMin; + domain->localRowMax[i] = domain->globalRowMax; + domain->localRowExtent[i] = + domain->localRowMax[i] - domain->localRowMin[i]; + domain->localElements[i] = + domain->localRowExtent[i] * domain->totalCols; + domain->localDispl[i] = 0; + } + + } + break; + + case distributed: + { + // For completely distributed + avgExtent = N / nRanks; + domain->maxLocalExtent = ceil((float) N / (float) nRanks); + domain->minLocalExtent = avgExtent; + + for (int i = 0; i < nRanks; i++) + { + domain->localRowExtent[i] = avgExtent; + } + nleft = N - nRanks * avgExtent; + if (nleft > 0) + { + for (int i = 0; i < nleft; i++) + { + domain->localRowExtent[i]++; + } + } + + /** For first rank */ + domain->localRowMin[0] = domain->globalRowMin; + domain->localRowMax[0] = domain->localRowExtent[0]; + + /** For middle ranks */ + for (int i = 1; i < (nRanks - 1); i++) + { + domain->localRowMin[i] = domain->localRowMax[i - 1]; + domain->localRowMax[i] = + domain->localRowMin[i] + domain->localRowExtent[i]; + } + + /** For last rank */ + if (nRanks > 1) + { + int last = nRanks - 1; + domain->localRowMin[last] = domain->localRowMax[last - 1]; + domain->localRowMax[last] = + domain->localRowMin[last] + domain->localRowExtent[last]; + } + + /** Number of elements and displacement per rank */ + for (int i = 0; i < nRanks; i++) + { + domain->localElements[i] = + domain->localRowExtent[i] * domain->totalCols; + domain->localDispl[i] = + (i == + 0) ? 0 : domain->localDispl[i - 1] + + domain->localElements[i - 1]; + } + } + break; + + case graph_distributed: + LOG_ERROR("graph_distibuted not available\n"); + break; + + default: + LOG_ERROR("unknown distribution method\n"); + break; + } + + return domain; +} + /** Update a domain for a bml matrix. * * \ingroup allocate_group_C @@ -579,7 +716,7 @@ bml_identity_matrix( * \param nnodesInPart Number of nodes in each part */ void -bml_update_domain_matrix( +bml_update_domain( bml_matrix_t * A, int *localPartMin, int *localPartMax, diff --git a/src/C-interface/bml_allocate.h b/src/C-interface/bml_allocate.h index bec470ff..e9191ad5 100644 --- a/src/C-interface/bml_allocate.h +++ b/src/C-interface/bml_allocate.h @@ -29,6 +29,9 @@ void bml_free_ptr( void bml_deallocate( bml_matrix_t ** A); +void bml_deallocate_domain( + bml_domain_t * D); + void bml_clear( bml_matrix_t * A); @@ -73,7 +76,12 @@ bml_matrix_t *bml_identity_matrix( int M, bml_distribution_mode_t distrib_mode); -void bml_update_domain_matrix( +bml_domain_t *bml_default_domain( + int N, + int M, + bml_distribution_mode_t distrib_mode); + +void bml_update_domain( bml_matrix_t * A, int *localPartMin, int *localPartMax, diff --git a/src/C-interface/bml_copy.c b/src/C-interface/bml_copy.c index afa986b0..d24ed310 100644 --- a/src/C-interface/bml_copy.c +++ b/src/C-interface/bml_copy.c @@ -142,6 +142,26 @@ bml_reorder( } } +/** Copy a domain. + * + * \param A Domain to copy + * \param B Copy of Domain A + */ +void +bml_copy_domain( + bml_domain_t * A, + bml_domain_t * B) +{ + int nRanks = bml_getNRanks(); + + memcpy(B->localRowMin, A->localRowMin, nRanks * sizeof(int)); + memcpy(B->localRowMax, A->localRowMax, nRanks * sizeof(int)); + memcpy(B->localRowExtent, A->localRowExtent, nRanks * sizeof(int)); + memcpy(B->localDispl, A->localDispl, nRanks * sizeof(int)); + memcpy(B->localElements, A->localElements, nRanks * sizeof(int)); +} + + /** Save current domain for bml matrix. * * \param A Matrix with domain diff --git a/src/C-interface/bml_copy.h b/src/C-interface/bml_copy.h index 15ad1751..cb8a9739 100644 --- a/src/C-interface/bml_copy.h +++ b/src/C-interface/bml_copy.h @@ -16,6 +16,10 @@ void bml_reorder( bml_matrix_t * A, int *perm); +void bml_copy_domain( + bml_domain_t * A, + bml_domain_t * B); + void bml_save_domain( bml_matrix_t * A); diff --git a/src/C-interface/bml_domain.c b/src/C-interface/bml_domain.c deleted file mode 100644 index 2a87a10c..00000000 --- a/src/C-interface/bml_domain.c +++ /dev/null @@ -1,212 +0,0 @@ -#include "bml_types.h" -#include "bml_logger.h" -#include "bml_allocate.h" -#include "bml_parallel.h" - -#include -#include - -/** Allocate a default domain for a bml matrix. - * - * \ingroup allocate_group_C - * - * \param N The number of rows - * \param M The number of columns - * \param distrib_mode The distribution mode - * \return The domain - */ -bml_domain_t * -bml_default_domain( - int N, - int M, - bml_distribution_mode_t distrib_mode) -{ - int avgExtent, nleft; - int nRanks = bml_getNRanks(); - - bml_domain_t *domain = bml_allocate_memory(sizeof(bml_domain_t)); - - domain->localRowMin = bml_allocate_memory(nRanks * sizeof(int)); - domain->localRowMax = bml_allocate_memory(nRanks * sizeof(int)); - domain->localRowExtent = bml_allocate_memory(nRanks * sizeof(int)); - domain->localDispl = bml_allocate_memory(nRanks * sizeof(int)); - domain->localElements = bml_allocate_memory(nRanks * sizeof(int)); - - domain->totalProcs = nRanks; - domain->totalRows = N; - domain->totalCols = M; - - domain->globalRowMin = 0; - domain->globalRowMax = domain->totalRows; - domain->globalRowExtent = domain->globalRowMax - domain->globalRowMin; - - switch (distrib_mode) - { - case sequential: - { - // Default - each rank contains entire matrix, even when running distributed - for (int i = 0; i < nRanks; i++) - { - domain->localRowMin[i] = domain->globalRowMin; - domain->localRowMax[i] = domain->globalRowMax; - domain->localRowExtent[i] = - domain->localRowMax[i] - domain->localRowMin[i]; - domain->localElements[i] = - domain->localRowExtent[i] * domain->totalCols; - domain->localDispl[i] = 0; - } - - } - break; - - case distributed: - { - // For completely distributed - avgExtent = N / nRanks; - domain->maxLocalExtent = ceil((float) N / (float) nRanks); - domain->minLocalExtent = avgExtent; - - for (int i = 0; i < nRanks; i++) - { - domain->localRowExtent[i] = avgExtent; - } - nleft = N - nRanks * avgExtent; - if (nleft > 0) - { - for (int i = 0; i < nleft; i++) - { - domain->localRowExtent[i]++; - } - } - - /** For first rank */ - domain->localRowMin[0] = domain->globalRowMin; - domain->localRowMax[0] = domain->localRowExtent[0]; - - /** For middle ranks */ - for (int i = 1; i < (nRanks - 1); i++) - { - domain->localRowMin[i] = domain->localRowMax[i - 1]; - domain->localRowMax[i] = - domain->localRowMin[i] + domain->localRowExtent[i]; - } - - /** For last rank */ - if (nRanks > 1) - { - int last = nRanks - 1; - domain->localRowMin[last] = domain->localRowMax[last - 1]; - domain->localRowMax[last] = - domain->localRowMin[last] + domain->localRowExtent[last]; - } - - /** Number of elements and displacement per rank */ - for (int i = 0; i < nRanks; i++) - { - domain->localElements[i] = - domain->localRowExtent[i] * domain->totalCols; - domain->localDispl[i] = - (i == - 0) ? 0 : domain->localDispl[i - 1] + - domain->localElements[i - 1]; - } - } - break; - - - default: - LOG_ERROR("unknown distribution method\n"); - break; - } - - return domain; -} - -/** Deallocate a domain. - * - * \ingroup allocate_group_C - * - * \param D[in,out] The domain. - */ -void -bml_deallocate_domain( - bml_domain_t * D) -{ - bml_free_memory(D->localRowMin); - bml_free_memory(D->localRowMax); - bml_free_memory(D->localRowExtent); - bml_free_memory(D->localDispl); - bml_free_memory(D->localElements); - bml_free_memory(D); -} - -/** Copy a domain. - * - * \param A Domain to copy - * \param B Copy of Domain A - */ -void -bml_copy_domain( - bml_domain_t * A, - bml_domain_t * B) -{ - int nRanks = bml_getNRanks(); - - memcpy(B->localRowMin, A->localRowMin, nRanks * sizeof(int)); - memcpy(B->localRowMax, A->localRowMax, nRanks * sizeof(int)); - memcpy(B->localRowExtent, A->localRowExtent, nRanks * sizeof(int)); - memcpy(B->localDispl, A->localDispl, nRanks * sizeof(int)); - memcpy(B->localElements, A->localElements, nRanks * sizeof(int)); -} - -void -bml_update_domain( - bml_domain_t * A_domain, - int *localPartMin, - int *localPartMax, - int *nnodesInPart) -{ - int myRank = bml_getMyRank(); - int nprocs = bml_getNRanks(); - - for (int i = 0; i < nprocs; i++) - { - int rtotal = 0; - for (int j = localPartMin[i] - 1; j <= localPartMax[i] - 1; j++) - { - rtotal += nnodesInPart[j]; -/* - if (bml_printRank() == 1) - printf("rank %d localPart %d %d part %d nnodesPerPart %d rtotal %d\n", - i, localPartMin[i], localPartMax[i], j, nnodesInPart[j-1], rtotal); -*/ - } - - if (i == 0) - A_domain->localRowMin[0] = A_domain->globalRowMin; - else - A_domain->localRowMin[i] = A_domain->localRowMax[i - 1]; - - A_domain->localRowMax[i] = A_domain->localRowMin[i] + rtotal; - A_domain->localRowExtent[i] = - A_domain->localRowMax[i] - A_domain->localRowMin[i]; - A_domain->localElements[i] = - A_domain->localRowExtent[i] * A_domain->totalCols; - - if (i == 0) - A_domain->localDispl[0] = 0; - else - A_domain->localDispl[i] = - A_domain->localDispl[i - 1] + A_domain->localElements[i - 1]; - } - - A_domain->minLocalExtent = A_domain->localRowExtent[0]; - A_domain->maxLocalExtent = A_domain->localRowExtent[0]; - for (int i = 1; i < nprocs; i++) - { - if (A_domain->localRowExtent[i] < A_domain->minLocalExtent) - A_domain->minLocalExtent = A_domain->localRowExtent[i]; - if (A_domain->localRowExtent[i] > A_domain->maxLocalExtent) - A_domain->maxLocalExtent = A_domain->localRowExtent[i]; - } -} diff --git a/src/C-interface/bml_domain.h b/src/C-interface/bml_domain.h deleted file mode 100644 index 5cc8a864..00000000 --- a/src/C-interface/bml_domain.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef __BML_DOMAIN_H -#define __BML_DOMAIN_H - -#include "bml_types.h" - -bml_domain_t *bml_default_domain( - int N, - int M, - bml_distribution_mode_t distrib_mode); - -void bml_deallocate_domain( - bml_domain_t * D); - -void bml_copy_domain( - bml_domain_t * A, - bml_domain_t * B); - -void bml_update_domain( - bml_domain_t * A_domain, - int *localPartMin, - int *localPartMax, - int *nnodesInPart); - -#endif diff --git a/src/C-interface/csr/bml_allocate_csr.c b/src/C-interface/csr/bml_allocate_csr.c index d1037fa4..810cfa89 100644 --- a/src/C-interface/csr/bml_allocate_csr.c +++ b/src/C-interface/csr/bml_allocate_csr.c @@ -195,6 +195,7 @@ bml_deallocate_csr( bml_free_memory(A->data_); // bml_free_memory(A->lvarsgid_); // bml_deallocate_domain(A->domain); +// bml_deallocate_domain(A->domain2); bml_free_memory(A); } diff --git a/src/C-interface/csr/bml_allocate_csr_typed.c b/src/C-interface/csr/bml_allocate_csr_typed.c index fb6c8c79..de160320 100644 --- a/src/C-interface/csr/bml_allocate_csr_typed.c +++ b/src/C-interface/csr/bml_allocate_csr_typed.c @@ -129,6 +129,7 @@ bml_matrix_csr_t *TYPED_FUNC( /** end allocate hash table **/ /* A->domain = bml_default_domain(A->N_, A->NZMAX_, distrib_mode); + A->domain2 = bml_default_domain(A->N_, A->NZMAX_, distrib_mode); */ return A; } @@ -209,6 +210,7 @@ bml_matrix_csr_t *TYPED_FUNC( } /* A->domain = bml_default_domain(N, M, distrib_mode); + A->domain2 = bml_default_domain(N, M, distrib_mode); */ return A; } diff --git a/src/C-interface/csr/bml_copy_csr.c b/src/C-interface/csr/bml_copy_csr.c index 766b6002..478ac65f 100644 --- a/src/C-interface/csr/bml_copy_csr.c +++ b/src/C-interface/csr/bml_copy_csr.c @@ -2,7 +2,6 @@ #include "../bml_logger.h" #include "../bml_parallel.h" #include "../bml_types.h" -#include "../bml_domain.h" #include "bml_types_csr.h" #include "bml_copy_csr.h" @@ -126,7 +125,7 @@ void bml_save_domain_csr( bml_matrix_csr_t * A) { - LOG_ERROR("bml_save_domain_csr not implemented"); + bml_copy_domain(A->domain, A->domain2); } /** Restore the domain for a csr matrix. @@ -139,5 +138,5 @@ void bml_restore_domain_csr( bml_matrix_csr_t * A) { - LOG_ERROR("bml_restore_domain_csr not implemented"); + bml_copy_domain(A->domain2, A->domain); } diff --git a/src/C-interface/csr/bml_copy_csr_typed.c b/src/C-interface/csr/bml_copy_csr_typed.c index 6154bb94..cc08a446 100644 --- a/src/C-interface/csr/bml_copy_csr_typed.c +++ b/src/C-interface/csr/bml_copy_csr_typed.c @@ -71,6 +71,7 @@ bml_matrix_csr_t *TYPED_FUNC( // copy domain info // bml_copy_domain(A->domain, B->domain); +// bml_copy_domain(A->domain2, B->domain2); return B; } @@ -131,6 +132,7 @@ void TYPED_FUNC( if (A->distribution_mode == B->distribution_mode) { bml_copy_domain(A->domain, B->domain); + bml_copy_domain(A->domain2, B->domain2); } */ } diff --git a/src/C-interface/csr/bml_types_csr.h b/src/C-interface/csr/bml_types_csr.h index bb8395d6..d6006813 100644 --- a/src/C-interface/csr/bml_types_csr.h +++ b/src/C-interface/csr/bml_types_csr.h @@ -81,6 +81,8 @@ struct bml_matrix_csr_t /** The domain decomposition when running in parallel. */ bml_domain_t *domain; + /** A copy of the domain decomposition. */ + bml_domain_t *domain2; #ifdef BML_USE_MPI /** Buffer for communications */ void *buffer; diff --git a/src/C-interface/dense/bml_allocate_dense.c b/src/C-interface/dense/bml_allocate_dense.c index 6a8e0b0f..bd4a5320 100644 --- a/src/C-interface/dense/bml_allocate_dense.c +++ b/src/C-interface/dense/bml_allocate_dense.c @@ -2,7 +2,6 @@ #include "../bml_logger.h" #include "../bml_parallel.h" #include "../bml_types.h" -#include "../bml_domain.h" #include "bml_allocate_dense.h" #include "bml_types_dense.h" @@ -339,5 +338,41 @@ bml_update_domain_dense( { bml_domain_t *A_domain = A->domain; - bml_update_domain(A_domain, localPartMin, localPartMax, nnodesInPart); + int nprocs = bml_getNRanks(); + + for (int i = 0; i < nprocs; i++) + { + int rtotal = 0; + for (int j = localPartMin[i]; j <= localPartMax[i]; j++) + { + rtotal += nnodesInPart[j - 1]; + } + + if (i == 0) + A_domain->localRowMin[0] = A_domain->globalRowMin; + else + A_domain->localRowMin[i] = A_domain->localRowMax[i - 1]; + + A_domain->localRowMax[i] = A_domain->localRowMin[i] + rtotal; + A_domain->localRowExtent[i] = + A_domain->localRowMax[i] - A_domain->localRowMin[i]; + A_domain->localElements[i] = + A_domain->localRowExtent[i] * A_domain->totalCols; + + if (i == 0) + A_domain->localDispl[0] = 0; + else + A_domain->localDispl[i] = + A_domain->localDispl[i - 1] + A_domain->localElements[i - 1]; + } + + A_domain->minLocalExtent = A_domain->localRowExtent[0]; + A_domain->maxLocalExtent = A_domain->localRowExtent[0]; + for (int i = 1; i < nprocs; i++) + { + if (A_domain->localRowExtent[i] < A_domain->minLocalExtent) + A_domain->minLocalExtent = A_domain->localRowExtent[i]; + if (A_domain->localRowExtent[i] > A_domain->maxLocalExtent) + A_domain->maxLocalExtent = A_domain->localRowExtent[i]; + } } diff --git a/src/C-interface/dense/bml_allocate_dense_typed.c b/src/C-interface/dense/bml_allocate_dense_typed.c index 1bd9a603..5f0f6f15 100644 --- a/src/C-interface/dense/bml_allocate_dense_typed.c +++ b/src/C-interface/dense/bml_allocate_dense_typed.c @@ -3,7 +3,6 @@ #include "../bml_allocate.h" #include "../bml_logger.h" #include "../bml_types.h" -#include "../bml_domain.h" #include "bml_allocate_dense.h" #include "bml_types_dense.h" #include "bml_utilities_dense.h" @@ -37,6 +36,7 @@ void TYPED_FUNC( bml_matrix_dense_t * A) { bml_deallocate_domain(A->domain); + bml_deallocate_domain(A->domain2); #ifdef BML_USE_MAGMA magma_int_t ret = magma_free(A->matrix); assert(ret == MAGMA_SUCCESS); @@ -125,6 +125,9 @@ bml_matrix_dense_t *TYPED_FUNC( A->domain = bml_default_domain(matrix_dimension.N_rows, matrix_dimension.N_rows, distrib_mode); + A->domain2 = + bml_default_domain(matrix_dimension.N_rows, matrix_dimension.N_rows, + distrib_mode); return A; } @@ -181,6 +184,9 @@ bml_matrix_dense_t *TYPED_FUNC( A->domain = bml_default_domain(matrix_dimension.N_rows, matrix_dimension.N_rows, distrib_mode); + A->domain2 = + bml_default_domain(matrix_dimension.N_rows, matrix_dimension.N_rows, + distrib_mode); return A; } diff --git a/src/C-interface/dense/bml_copy_dense.c b/src/C-interface/dense/bml_copy_dense.c index bfd60647..822e07ce 100644 --- a/src/C-interface/dense/bml_copy_dense.c +++ b/src/C-interface/dense/bml_copy_dense.c @@ -1,6 +1,5 @@ #include "../bml_allocate.h" #include "../bml_copy.h" -#include "../bml_domain.h" #include "../bml_logger.h" #include "../bml_types.h" #include "bml_allocate_dense.h" @@ -127,8 +126,6 @@ void bml_save_domain_dense( bml_matrix_dense_t * A) { - A->domain2 = bml_default_domain(A->N, A->N, A->distribution_mode); - bml_copy_domain(A->domain, A->domain2); } @@ -143,6 +140,4 @@ bml_restore_domain_dense( bml_matrix_dense_t * A) { bml_copy_domain(A->domain2, A->domain); - - bml_deallocate_domain(A->domain2); } diff --git a/src/C-interface/dense/bml_copy_dense_typed.c b/src/C-interface/dense/bml_copy_dense_typed.c index 6e097a88..8130f793 100644 --- a/src/C-interface/dense/bml_copy_dense_typed.c +++ b/src/C-interface/dense/bml_copy_dense_typed.c @@ -3,7 +3,6 @@ #include "../bml_allocate.h" #include "../bml_copy.h" #include "../bml_types.h" -#include "../bml_domain.h" #include "bml_allocate_dense.h" #include "bml_copy_dense.h" #include "bml_types_dense.h" @@ -60,6 +59,7 @@ bml_matrix_dense_t *TYPED_FUNC( #endif // end of MKL_GPU #endif bml_copy_domain(A->domain, B->domain); + bml_copy_domain(A->domain2, B->domain2); return B; } @@ -99,6 +99,7 @@ void TYPED_FUNC( if (A->distribution_mode == B->distribution_mode) { bml_copy_domain(A->domain, B->domain); + bml_copy_domain(A->domain2, B->domain2); } } diff --git a/src/C-interface/ellblock/bml_copy_ellblock.c b/src/C-interface/ellblock/bml_copy_ellblock.c index f0e2616f..14a563cf 100644 --- a/src/C-interface/ellblock/bml_copy_ellblock.c +++ b/src/C-interface/ellblock/bml_copy_ellblock.c @@ -2,7 +2,6 @@ #include "../bml_logger.h" #include "../bml_parallel.h" #include "../bml_types.h" -#include "../bml_domain.h" #include "bml_copy_ellblock.h" #include "bml_types_ellblock.h" @@ -126,7 +125,7 @@ void bml_save_domain_ellblock( bml_matrix_ellblock_t * A) { - LOG_ERROR("bml_save_domain_ellblock not implemented"); + bml_copy_domain(A->domain, A->domain2); } /** Restore the domain for an ellblock matrix. @@ -139,5 +138,20 @@ void bml_restore_domain_ellblock( bml_matrix_ellblock_t * A) { - LOG_ERROR("bml_restore_domain_ellblock not implemented"); + bml_copy_domain(A->domain2, A->domain); + +/* + if (bml_printRank() == 1) + { + int nprocs = bml_getNRanks(); + printf("Restored Domain\n"); + for (int i = 0; i < nprocs; i++) + { + printf("rank %d localRow %d %d %d localElem %d localDispl %d\n", + i, A->domain->localRowMin[i], A->domain->localRowMax[i], + A->domain->localRowExtent[i], A->domain->localElements[i], + A->domain->localDispl[i]); + } + } +*/ } diff --git a/src/C-interface/ellblock/bml_types_ellblock.h b/src/C-interface/ellblock/bml_types_ellblock.h index 0368fd21..b078cf69 100644 --- a/src/C-interface/ellblock/bml_types_ellblock.h +++ b/src/C-interface/ellblock/bml_types_ellblock.h @@ -42,6 +42,8 @@ struct bml_matrix_ellblock_t int *bsize; /** The domain decomposition when running in parallel. */ bml_domain_t *domain; + /** A copy of the domain decomposition. */ + bml_domain_t *domain2; #ifdef BML_USE_MPI /** Buffer for communications */ void *buffer; diff --git a/src/C-interface/ellpack/bml_allocate_ellpack.c b/src/C-interface/ellpack/bml_allocate_ellpack.c index e1844392..df0cb361 100644 --- a/src/C-interface/ellpack/bml_allocate_ellpack.c +++ b/src/C-interface/ellpack/bml_allocate_ellpack.c @@ -1,5 +1,4 @@ #include "../bml_allocate.h" -#include "../bml_domain.h" #include "../bml_logger.h" #include "../bml_parallel.h" #include "../bml_types.h" @@ -329,5 +328,60 @@ bml_update_domain_ellpack( { bml_domain_t *A_domain = A->domain; - bml_update_domain(A_domain, localPartMin, localPartMax, nnodesInPart); + int nprocs = bml_getNRanks(); + + for (int i = 0; i < nprocs; i++) + { + int rtotal = 0; + for (int j = localPartMin[i] - 1; j <= localPartMax[i] - 1; j++) + { + rtotal += nnodesInPart[j]; +/* + if (bml_printRank() == 1) + printf("rank %d localPart %d %d part %d nnodesPerPart %d rtotal %d\n", + i, localPartMin[i], localPartMax[i], j, nnodesInPart[j-1], rtotal); +*/ + } + + if (i == 0) + A_domain->localRowMin[0] = A_domain->globalRowMin; + else + A_domain->localRowMin[i] = A_domain->localRowMax[i - 1]; + + A_domain->localRowMax[i] = A_domain->localRowMin[i] + rtotal; + A_domain->localRowExtent[i] = + A_domain->localRowMax[i] - A_domain->localRowMin[i]; + A_domain->localElements[i] = + A_domain->localRowExtent[i] * A_domain->totalCols; + + if (i == 0) + A_domain->localDispl[0] = 0; + else + A_domain->localDispl[i] = + A_domain->localDispl[i - 1] + A_domain->localElements[i - 1]; + } + + A_domain->minLocalExtent = A_domain->localRowExtent[0]; + A_domain->maxLocalExtent = A_domain->localRowExtent[0]; + for (int i = 1; i < nprocs; i++) + { + if (A_domain->localRowExtent[i] < A_domain->minLocalExtent) + A_domain->minLocalExtent = A_domain->localRowExtent[i]; + if (A_domain->localRowExtent[i] > A_domain->maxLocalExtent) + A_domain->maxLocalExtent = A_domain->localRowExtent[i]; + } + +/* + if (bml_printRank() == 1) + { + printf("Updated Domain\n"); + for (int i = 0; i < nprocs; i++) + { + printf("rank %d localRow %d %d %d localElem %d localDispl %d\n", + i, A_domain->localRowMin[i], A_domain->localRowMax[i], + A_domain->localRowExtent[i], A_domain->localElements[i], + A_domain->localDispl[i]); + } + } +*/ } diff --git a/src/C-interface/ellpack/bml_allocate_ellpack_typed.c b/src/C-interface/ellpack/bml_allocate_ellpack_typed.c index 594fb086..b790da55 100644 --- a/src/C-interface/ellpack/bml_allocate_ellpack_typed.c +++ b/src/C-interface/ellpack/bml_allocate_ellpack_typed.c @@ -2,7 +2,6 @@ #include "../../typed.h" #include "../bml_allocate.h" #include "../bml_types.h" -#include "../bml_domain.h" #include "bml_allocate_ellpack.h" #include "bml_types_ellpack.h" @@ -53,6 +52,7 @@ void TYPED_FUNC( #endif bml_deallocate_domain(A->domain); + bml_deallocate_domain(A->domain2); bml_free_memory(A->value); bml_free_memory(A->index); bml_free_memory(A->nnz); @@ -160,6 +160,7 @@ bml_matrix_ellpack_t A->nnz = bml_allocate_memory(sizeof(int) * A->N); A->value = bml_noinit_allocate_memory(sizeof(REAL_T) * A->N * A->M); A->domain = bml_default_domain(A->N, A->M, distrib_mode); + A->domain2 = bml_default_domain(A->N, A->M, distrib_mode); #if defined(USE_OMP_OFFLOAD) int N = A->N; @@ -227,6 +228,7 @@ bml_matrix_ellpack_t *TYPED_FUNC( #endif A->domain = bml_default_domain(N, M, distrib_mode); + A->domain2 = bml_default_domain(N, M, distrib_mode); #if defined(USE_OMP_OFFLOAD) REAL_T *A_value = A->value; diff --git a/src/C-interface/ellpack/bml_copy_ellpack.c b/src/C-interface/ellpack/bml_copy_ellpack.c index 0281d9f3..4614ae32 100644 --- a/src/C-interface/ellpack/bml_copy_ellpack.c +++ b/src/C-interface/ellpack/bml_copy_ellpack.c @@ -1,5 +1,4 @@ #include "../bml_copy.h" -#include "../bml_domain.h" #include "../bml_logger.h" #include "../bml_parallel.h" #include "../bml_types.h" @@ -126,8 +125,6 @@ void bml_save_domain_ellpack( bml_matrix_ellpack_t * A) { - A->domain2 = bml_default_domain(A->N, A->M, A->distribution_mode); - bml_copy_domain(A->domain, A->domain2); } @@ -157,5 +154,4 @@ bml_restore_domain_ellpack( } } */ - bml_deallocate_domain(A->domain2); } diff --git a/src/C-interface/ellpack/bml_copy_ellpack_typed.c b/src/C-interface/ellpack/bml_copy_ellpack_typed.c index e4288480..2c378ecc 100644 --- a/src/C-interface/ellpack/bml_copy_ellpack_typed.c +++ b/src/C-interface/ellpack/bml_copy_ellpack_typed.c @@ -3,7 +3,6 @@ #include "../bml_allocate.h" #include "../bml_copy.h" #include "../bml_types.h" -#include "../bml_domain.h" #include "bml_allocate_ellpack.h" #include "bml_copy_ellpack.h" #include "bml_types_ellpack.h" @@ -73,6 +72,7 @@ bml_matrix_ellpack_t *TYPED_FUNC( } #endif bml_copy_domain(A->domain, B->domain); + bml_copy_domain(A->domain2, B->domain2); return B; } @@ -143,6 +143,7 @@ void TYPED_FUNC( if (A->distribution_mode == B->distribution_mode) { bml_copy_domain(A->domain, B->domain); + bml_copy_domain(A->domain2, B->domain2); } }