0

I'm trying to cyclically distribute row blocks of a matrix (represented as a 1D array) across processes using MPI_Type_create_darray.

After creating the datatype and inspecting it with MPI_Type_get_true_extent and MPI_Type_size, everything appears to be calculated correctly. However, when I actually send the data using MPI_Scatterv, all processes receive the same data, the offsets are not being applied during communication.

Here is a minimal example ready to compile and execute.

#mpicc P2_SO.c -o P2 #COMPILE

#mpirun -np 3 ./P2 6 #EXECUTE

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <mpi.h>

#define ROOT_RANK 0
#define ROWS_PER_BLOCK 2

int main(int argc, char **argv)
{
    // Obtain matrix size from argument
    int N = atoi(argv[1]);

    // Initialize the MPI environment
    MPI_Init(NULL, NULL);

    // Get the rank of the process
    int world_rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);

    // Get the number of processes
    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    int gsize[1] = {N * N};
    int distrib[1] = {MPI_DISTRIBUTE_CYCLIC}; 
    int dargs[1] = {ROWS_PER_BLOCK * N};      
    int psize[1] = {world_size};              

    MPI_Datatype myType;
    MPI_Type_create_darray(world_size, world_rank, 1,
                           gsize,
                           distrib,
                           dargs,
                           psize,
                           MPI_ORDER_C,
                           MPI_DOUBLE,
                           &myType);
    MPI_Type_commit(&myType);

    ///// DEBUG //////
    MPI_Aint lb, extent;
    MPI_Type_get_extent(myType, &lb, &extent);
    MPI_Aint true_lb, true_extent;
    MPI_Type_get_true_extent(myType, &true_lb, &true_extent);
    int count_bytes;
    MPI_Type_size(myType, &count_bytes);
    int num_elements = count_bytes / (int)sizeof(double);
    ///// DEBUG //////

    int *sendcounts = malloc((size_t)world_size * sizeof(int));
    int *displs = malloc((size_t)world_size * sizeof(int));

    for (int i = 0; i < world_size; i++)
    {
        sendcounts[i] = 1; // Each process receives one datatype of cyclic_type
        displs[i] = 0;
    }

    double *buf = malloc((size_t)num_elements * sizeof(double)); // rows_per_process[world_rank]

    if (world_rank == ROOT_RANK)
    {
        printf("Process %d: true_lb = %ld == %d\n", world_rank, true_lb / 8, N * ROWS_PER_BLOCK * world_rank);
        printf("Process %d has %d elements\n", world_rank, num_elements);

        double *A;

        // Reserves memory and inicializate matrix
        A = (double *)malloc(sizeof(double) * (size_t)N * (size_t)N);
        for (int i = 0; i < N; i++)
        {
            for (int j = 0; j < N; j++)
            {
                A[i * N + j] = i * N + j;
            }
        }

        MPI_Scatterv(A, sendcounts, displs, myType,
                     buf, num_elements, MPI_DOUBLE,
                     ROOT_RANK, MPI_COMM_WORLD);
    }
    else
    {
        sleep((unsigned int)world_rank);
        printf("\nPROCESS %d:\n", world_rank);
        printf("Process %d: true_lb = %ld == %d\n", world_rank, true_lb / 8, N * ROWS_PER_BLOCK * world_rank);
        printf("Process %d has %d elements\n", world_rank, num_elements);

        MPI_Scatterv(NULL, sendcounts, displs, myType,
                     buf, num_elements, MPI_DOUBLE,
                     ROOT_RANK, MPI_COMM_WORLD);
    }

    for (int i = 0; i < num_elements; i++)
    {
        printf("Position %d of process %d has value %f\n", i, world_rank, buf[i]);
    }

    free(sendcounts);
    free(displs);
    free(buf);
    MPI_Type_free(&myType);
    MPI_Finalize();
    return 0;
}
1
  • All displs are 0. What did you expect? Commented 10 hours ago

0

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.