Skip to content
Snippets Groups Projects
Commit 7659c55c authored by alexgara's avatar alexgara
Browse files

.

parent 7676a8c6
No related branches found
No related tags found
No related merge requests found
/*
* Exact genetic sequence alignment
* (Using brute force)
*
* MPI version
*
* Computacion Paralela, Grado en Informatica (Universidad de Valladolid)
* 2023/2024
*
* v1.2.1
*
* (c) 2024, Arturo Gonzalez-Escribano
*/
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<limits.h>
#include<sys/time.h>
#include<mpi.h>
/* Arbitrary value to indicate that no matches are found */
#define NOT_FOUND -1
/* Arbitrary value to restrict the checksums period */
#define CHECKSUM_MAX 65535
#define TAG_NO_COMPLETE 100
#define END_SING -111
/*
* Function: Increment the number of pattern matches on the sequence positions
* This function can be changed and/or optimized by the students
*/
/*
*
* START HERE: DO NOT CHANGE THE CODE ABOVE THIS POINT
*
*/
void increment_matches( int pat, unsigned long *pat_found, unsigned long *pat_length, int *seq_matches ) {
int ind;
for( ind=0; ind<pat_length[pat]; ind++) {
if ( seq_matches[ pat_found[pat] + ind ] == NOT_FOUND )
seq_matches[ pat_found[pat] + ind ] = 0;
else
seq_matches[ pat_found[pat] + ind ] ++;
}
}
/*
*
* STOP HERE: DO NOT CHANGE THE CODE BELOW THIS POINT
*
*/
/*
* Utils: Function to get wall time
*/
double cp_Wtime(){
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + 1.0e-6 * tv.tv_usec;
}
/*
* Utils: Random generator
*/
#include "rng.c"
/*
* Function: Allocate new patttern
*/
char *pattern_allocate( rng_t *random, unsigned long pat_rng_length_mean, unsigned long pat_rng_length_dev, unsigned long seq_length, unsigned long *new_length ) {
/* Random length */
unsigned long length = (unsigned long)rng_next_normal( random, (double)pat_rng_length_mean, (double)pat_rng_length_dev );
if ( length > seq_length ) length = seq_length;
if ( length <= 0 ) length = 1;
/* Allocate pattern */
char *pattern = (char *)malloc( sizeof(char) * length );
if ( pattern == NULL ) {
fprintf(stderr,"\n-- Error allocating a pattern of size: %lu\n", length );
exit( EXIT_FAILURE );
}
/* Return results */
*new_length = length;
return pattern;
}
/*
* Function: Fill random sequence or pattern
*/
void generate_rng_sequence( rng_t *random, float prob_G, float prob_C, float prob_A, char *seq, unsigned long length) {
unsigned long ind;
for( ind=0; ind<length; ind++ ) {
double prob = rng_next( random );
if( prob < prob_G ) seq[ind] = 'G';
else if( prob < prob_C ) seq[ind] = 'C';
else if( prob < prob_A ) seq[ind] = 'A';
else seq[ind] = 'T';
}
}
/*
* Function: Copy a sample of the sequence
*/
void copy_sample_sequence( rng_t *random, char *sequence, unsigned long seq_length, unsigned long pat_samp_loc_mean, unsigned long pat_samp_loc_dev, char *pattern, unsigned long length) {
/* Choose location */
unsigned long location = (unsigned long)rng_next_normal( random, (double)pat_samp_loc_mean, (double)pat_samp_loc_dev );
if ( location > seq_length - length ) location = seq_length - length;
if ( location <= 0 ) location = 0;
/* Copy sample */
unsigned long ind;
for( ind=0; ind<length; ind++ )
pattern[ind] = sequence[ind+location];
}
/*
* Function: Regenerate a sample of the sequence
*/
void generate_sample_sequence( rng_t *random, rng_t random_seq, float prob_G, float prob_C, float prob_A, unsigned long seq_length, unsigned long pat_samp_loc_mean, unsigned long pat_samp_loc_dev, char *pattern, unsigned long length ) {
/* Choose location */
unsigned long location = (unsigned long)rng_next_normal( random, (double)pat_samp_loc_mean, (double)pat_samp_loc_dev );
if ( location > seq_length - length ) location = seq_length - length;
if ( location <= 0 ) location = 0;
/* Regenerate sample */
rng_t local_random = random_seq;
rng_skip( &local_random, location );
generate_rng_sequence( &local_random, prob_G, prob_C, prob_A, pattern, length);
}
/*
* Function: Print usage line in stderr
*/
void show_usage( char *program_name ) {
fprintf(stderr,"Usage: %s ", program_name );
fprintf(stderr,"<seq_length> <prob_G> <prob_C> <prob_A> <pat_rng_num> <pat_rng_length_mean> <pat_rng_length_dev> <pat_samples_num> <pat_samp_length_mean> <pat_samp_length_dev> <pat_samp_loc_mean> <pat_samp_loc_dev> <pat_samp_mix:B[efore]|A[fter]|M[ixed]> <long_seed>\n");
fprintf(stderr,"\n");
}
/*
* MAIN PROGRAM
*/
int main(int argc, char *argv[]) {
/* 0. Default output and error without buffering, forces to write immediately */
setbuf(stdout, NULL);
setbuf(stderr, NULL);
/* 1. Read scenary arguments */
/* 1.0. Init MPI before processing arguments */
MPI_Init( &argc, &argv );
int rank;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
/* 1.1. Check minimum number of arguments */
if (argc < 15) {
fprintf(stderr, "\n-- Error: Not enough arguments when reading configuration from the command line\n\n");
show_usage( argv[0] );
exit( EXIT_FAILURE );
}
/* 1.2. Read argument values */
unsigned long seq_length = atol( argv[1] );
float prob_G = atof( argv[2] );
float prob_C = atof( argv[3] );
float prob_A = atof( argv[4] );
if ( prob_G + prob_C + prob_A > 1 ) {
fprintf(stderr, "\n-- Error: The sum of G,C,A,T nucleotid probabilities cannot be higher than 1\n\n");
show_usage( argv[0] );
exit( EXIT_FAILURE );
}
prob_C += prob_G;
prob_A += prob_C;
int pat_rng_num = atoi( argv[5] );
unsigned long pat_rng_length_mean = atol( argv[6] );
unsigned long pat_rng_length_dev = atol( argv[7] );
int pat_samp_num = atoi( argv[8] );
unsigned long pat_samp_length_mean = atol( argv[9] );
unsigned long pat_samp_length_dev = atol( argv[10] );
unsigned long pat_samp_loc_mean = atol( argv[11] );
unsigned long pat_samp_loc_dev = atol( argv[12] );
char pat_samp_mix = argv[13][0];
if ( pat_samp_mix != 'B' && pat_samp_mix != 'A' && pat_samp_mix != 'M' ) {
fprintf(stderr, "\n-- Error: Incorrect first character of pat_samp_mix: %c\n\n", pat_samp_mix);
show_usage( argv[0] );
exit( EXIT_FAILURE );
}
unsigned long seed = atol( argv[14] );
#ifdef DEBUG
/* DEBUG: Print arguments */
printf("\nArguments: seq_length=%lu\n", seq_length );
printf("Arguments: Accumulated probabilitiy G=%f, C=%f, A=%f, T=1\n", prob_G, prob_C, prob_A );
printf("Arguments: Random patterns number=%d, length_mean=%lu, length_dev=%lu\n", pat_rng_num, pat_rng_length_mean, pat_rng_length_dev );
printf("Arguments: Sample patterns number=%d, length_mean=%lu, length_dev=%lu, loc_mean=%lu, loc_dev=%lu\n", pat_samp_num, pat_samp_length_mean, pat_samp_length_dev, pat_samp_loc_mean, pat_samp_loc_dev );
printf("Arguments: Type of mix: %c, Random seed: %lu\n", pat_samp_mix, seed );
printf("\n");
#endif // DEBUG
/* 2. Initialize data structures */
/* 2.1. Skip allocate and fill sequence */
rng_t random = rng_new( seed );
rng_skip( &random, seq_length );
int nprocs;
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
/* 2.2. Allocate and fill patterns */
/* 2.2.1 Allocate main structures */
int pat_number = pat_rng_num + pat_samp_num;
unsigned long *pat_length = (unsigned long *)malloc( sizeof(unsigned long) * pat_number );
char **pattern = (char **)malloc( sizeof(char*) * pat_number );
if ( pattern == NULL || pat_length == NULL ) {
fprintf(stderr,"\n-- Error allocating the basic patterns structures for size: %d\n", pat_number );
exit( EXIT_FAILURE );
}
/* 2.2.2 Allocate and initialize ancillary structure for pattern types */
unsigned long ind;
#define PAT_TYPE_NONE 0
#define PAT_TYPE_RNG 1
#define PAT_TYPE_SAMP 2
char *pat_type = (char *)malloc( sizeof(char) * pat_number );
if ( pat_type == NULL ) {
fprintf(stderr,"\n-- Error allocating ancillary structure for pattern of size: %d\n", pat_number );
exit( EXIT_FAILURE );
}
for( ind=0; ind<pat_number; ind++ ) pat_type[ind] = PAT_TYPE_NONE;
/* 2.2.3 Fill up pattern types using the chosen mode */
switch( pat_samp_mix ) {
case 'A':
for( ind=0; ind<pat_rng_num; ind++ ) pat_type[ind] = PAT_TYPE_RNG;
for( ; ind<pat_number; ind++ ) pat_type[ind] = PAT_TYPE_SAMP;
break;
case 'B':
for( ind=0; ind<pat_samp_num; ind++ ) pat_type[ind] = PAT_TYPE_SAMP;
for( ; ind<pat_number; ind++ ) pat_type[ind] = PAT_TYPE_RNG;
break;
default:
if ( pat_rng_num == 0 ) {
for( ind=0; ind<pat_number; ind++ ) pat_type[ind] = PAT_TYPE_SAMP;
}
else if ( pat_samp_num == 0 ) {
for( ind=0; ind<pat_number; ind++ ) pat_type[ind] = PAT_TYPE_RNG;
}
else if ( pat_rng_num < pat_samp_num ) {
int interval = pat_number / pat_rng_num;
for( ind=0; ind<pat_number; ind++ )
if ( (ind+1) % interval == 0 ) pat_type[ind] = PAT_TYPE_RNG;
else pat_type[ind] = PAT_TYPE_SAMP;
}
else {
int interval = pat_number / pat_samp_num;
for( ind=0; ind<pat_number; ind++ )
if ( (ind+1) % interval == 0 ) pat_type[ind] = PAT_TYPE_SAMP;
else pat_type[ind] = PAT_TYPE_RNG;
}
}
/* 2.2.4 Generate the patterns */
for( ind=0; ind<pat_number; ind++ ) {
if ( pat_type[ind] == PAT_TYPE_RNG ) {
pattern[ind] = pattern_allocate( &random, pat_rng_length_mean, pat_rng_length_dev, seq_length, &pat_length[ind] );
generate_rng_sequence( &random, prob_G, prob_C, prob_A, pattern[ind], pat_length[ind] );
}
else if ( pat_type[ind] == PAT_TYPE_SAMP ) {
pattern[ind] = pattern_allocate( &random, pat_samp_length_mean, pat_samp_length_dev, seq_length, &pat_length[ind] );
#define REGENERATE_SAMPLE_PATTERNS
#ifdef REGENERATE_SAMPLE_PATTERNS
rng_t random_seq_orig = rng_new( seed );
generate_sample_sequence( &random, random_seq_orig, prob_G, prob_C, prob_A, seq_length, pat_samp_loc_mean, pat_samp_loc_dev, pattern[ind], pat_length[ind] );
#else
copy_sample_sequence( &random, sequence, seq_length, pat_samp_loc_mean, pat_samp_loc_dev, pattern[ind], pat_length[ind] );
#endif
}
else {
fprintf(stderr,"\n-- Error internal: Paranoic check! A pattern without type at position %lu\n", ind );
exit( EXIT_FAILURE );
}
}
free( pat_type );
/* Avoid the usage of arguments to take strategic decisions
* In a real case the user only has the patterns and sequence data to analize
*/
argc = 0;
argv = NULL;
pat_rng_num = 0;
pat_rng_length_mean = 0;
pat_rng_length_dev = 0;
pat_samp_num = 0;
pat_samp_length_mean = 0;
pat_samp_length_dev = 0;
pat_samp_loc_mean = 0;
pat_samp_loc_dev = 0;
pat_samp_mix = '0';
pat_samp_mix = '0';
/* 2.3. Other result data and structures */
int pat_matches = 0;
/* 2.3.1. Other results related to patterns */
unsigned long *pat_found;
pat_found = (unsigned long *)malloc( sizeof(unsigned long) * pat_number );
if ( pat_found == NULL ) {
fprintf(stderr,"\n-- Error allocating aux pattern structure for size: %d\n", pat_number );
exit( EXIT_FAILURE );
}
/* 3. Start global timer */
MPI_Barrier( MPI_COMM_WORLD );
double ttotal = cp_Wtime();
/*
*
* START HERE: DO NOT CHANGE THE CODE ABOVE THIS POINT
*
*/
/* 2.1. Allocate and fill sequence */
long my_size = seq_length/nprocs ;
long resto = seq_length% nprocs;
long my_begin = my_size*rank ;
my_begin = my_begin+(rank<resto?rank:resto);
if (rank < resto) {
++my_size;
}
my_size = my_size;
char *sequence = (char *)malloc( sizeof(char) * my_size );
if ( sequence == NULL ) {
fprintf(stderr,"\n-- Error allocating the sequence for size: %lu\n", my_size );
exit( EXIT_FAILURE );
}
random = rng_new( seed );
rng_t random_local = random;
rng_skip( &random_local, my_begin );
generate_rng_sequence( &random_local, prob_G, prob_C, prob_A, sequence, my_size);
rng_skip( &random, seq_length );
#ifdef DEBUG
/* DEBUG: Print sequence and patterns */
printf("-----------------\n");
printf("Sequence: ");
for( ind=0; ind<seq_length; ind++ )
printf( "%c", sequence[ind] );
printf("\n-----------------\n");
printf("Patterns: %d ( rng: %d, samples: %d )\n", pat_number, pat_rng_num, pat_samp_num );
int debug_pat;
for( debug_pat=0; debug_pat<pat_number; debug_pat++ ) {
printf( "Pat[%d]: ", debug_pat );
for( ind=0; ind<pat_length[debug_pat]; ind++ ) {
printf( "%c", pattern[debug_pat][ind] );
printf("\n");
}
printf("-----------------\n\n");
#endif // DEBUG
/* 2.3.2. Other results related to the main sequence */
/* 4. Initialize ancillary structures */
for( ind=0; ind<pat_number; ind++) {
pat_found[ind] = NOT_FOUND;
}
/* 5. Search for each pattern */
unsigned long start;
unsigned long pat;
long *envio_struct;
envio_struct = (long *)malloc(sizeof( long) * 3); // Posicion 0 (my_begin+start), Posicion 1 (tamaño leido) Posicion 2 (patrón)
long *recv_struct;
recv_struct = (long *)malloc(sizeof(long) * 3); // Posicion 0 (my_begin+start), Posicion 1 (tamaño leido) Posicion 2 (patrón)
for( pat=0; pat < pat_number; pat++ ) {
/* 5.1. For each posible starting position */
for( start=0; start <= my_size; start++) {
/* 5.1.1. For each pattern element */
for( ind=0; ind<pat_length[pat]; ind++) {
/* Stop this test when different nucleotids are found */
if (((start + ind) >= my_size) && (rank != nprocs-1)){
envio_struct[0] = my_begin + start;
envio_struct[1] = ind;
envio_struct[2] = pat;
MPI_Send(envio_struct, 3, MPI_UNSIGNED_LONG, rank+1, TAG_NO_COMPLETE, MPI_COMM_WORLD);
break;
}
else if (((start + ind) >= my_size) && (rank == nprocs-1)) break;
if ( sequence[start + ind] != pattern[pat][ind] ) break;
}
/* 5.1.2. Check if the loop ended with a match */
if ( ind == pat_length[pat] ) {
pat_found[pat] = my_begin + start;
break;
}
if( rank != nprocs-1 ){
envio_struct[0] = END_SING;
envio_struct[1] = END_SING;
envio_struct[2] = END_SING;
MPI_Send(envio_struct, 3, MPI_UNSIGNED_LONG, rank+1, TAG_NO_COMPLETE, MPI_COMM_WORLD);
}
}
}
int start_r, pat_r, length_r;
/* Continuar con busqueda*/
while(1 && (rank != 0)){
MPI_Recv(recv_struct, 3, MPI_UNSIGNED_LONG, rank-1, TAG_NO_COMPLETE, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
start_r = recv_struct[0];
length_r = recv_struct[1];
pat_r = recv_struct[2];
if(start_r == END_SING) break;
for( ind = 0; ind < my_size && ind + length_r < pat_length[pat_r]; ind ++){
//if ( ind == my_size && ( rank != nprocs-1)){
if(sequence[ind] != pattern[pat_r][length_r + ind])break;
}
if( ind == my_size && (rank != nprocs-1)){
envio_struct[0] = start_r;
envio_struct[1] = length_r + ind;
envio_struct[2] = pat_r;
MPI_Send(envio_struct, 3, MPI_LONG, rank+1, TAG_NO_COMPLETE, MPI_COMM_WORLD);
}
else if( rank != nprocs-1 ){
envio_struct[0] = END_SING;
envio_struct[1] = END_SING;
envio_struct[2] = END_SING;
MPI_Send(&envio_struct, 3, MPI_UNSIGNED_LONG, rank+1, TAG_NO_COMPLETE, MPI_COMM_WORLD);
}
if (ind+ length_r == pat_length[pat_r] ) pat_found[pat] = start_r;
}
/* 7. Check sums */
unsigned long checksum_matches = 0;
unsigned long checksum_found = 0;
unsigned long * pat_found_reduct = (unsigned long *) malloc( sizeof(unsigned long)*pat_number);
MPI_Reduce(pat_found, pat_found_reduct, pat_number, MPI_UNSIGNED_LONG, MPI_MIN, 0 ,MPI_COMM_WORLD);
if ( rank == 0 ){
for( ind=0; ind < pat_number; ind++) {
if ( pat_found_reduct[ind] != NOT_FOUND ) {
checksum_found = ( checksum_found + pat_found_reduct[ind] ) % CHECKSUM_MAX;
checksum_matches =( checksum_matches + pat_length[ind] ) %CHECKSUM_MAX;
pat_matches ++;
}
}
}
#ifdef DEBUG
/* DEBUG: Write results */
printf("-----------------\n");
printf("Found start:");
for( debug_pat=0; debug_pat<pat_number; debug_pat++ ) {
printf( " %lu", pat_found[debug_pat] );
}
printf("\n");
printf("-----------------\n");
printf("Matches:");
for( ind=0; ind<seq_length; ind++ )
printf( " %d", seq_matches[ind] );
printf("\n");
printf("-----------------\n");
#endif // DEBUG
/* Free local resources */
free( sequence );
free(envio_struct);
free(recv_struct);
free(pat_found_reduct);
/*
*
* STOP HERE: DO NOT CHANGE THE CODE BELOW THIS POINT
*
*/
/* 8. Stop global time */
MPI_Barrier( MPI_COMM_WORLD );
ttotal = cp_Wtime() - ttotal;
/* 9. Output for leaderboard */
if ( rank == 0 ) {
printf("\n");
/* 9.1. Total computation time */
printf("Time: %lf\n", ttotal );
/* 9.2. Results: Statistics */
printf("Result: %d, %lu, %lu\n\n",
pat_matches,
checksum_found,
checksum_matches );
}
/* 10. Free resources */
int i;
for( i=0; i<pat_number; i++ ) free( pattern[i] );
free( pattern );
free( pat_length );
free( pat_found );
/* 11. End */
MPI_Finalize();
return 0;
}
...@@ -3,38 +3,7 @@ ...@@ -3,38 +3,7 @@
Time: 10.761040 Time: 10.761040
Result: 1900, 5291, 30391, 17177 Result: 1900, 5291, 30391, 17177
mpiexec --oversubscribe -n 8 ./align_mpi 500000 0.15 0.4 0.15 1120 20 10 2110 50000 500 7000 4000 M 346536
$ mpiexec --oversubscribe -n 10 ./align_mpi 500000 0.15 0.4 0.15 1120 20 10 2110 50000 500 7000 4000 M 346536 Time: 3.009573
Result: 227, 34783, 0
MPI fallo en linea (426):
MPI_ERR_BUFFER: invalid buffer pointer
MPI fallo en linea (426):
MPI_ERR_BUFFER: invalid buffer pointer
MPI fallo en linea (426):
MPI_ERR_BUFFER: invalid buffer pointer
MPI fallo en linea (426):
MPI_ERR_BUFFER: invalid buffer pointer
MPI fallo en linea (426):
MPI_ERR_BUFFER: invalid buffer pointer
MPI fallo en linea (426):
MPI_ERR_BUFFER: invalid buffer pointer
MPI fallo en linea (426):
MPI_ERR_BUFFER: invalid buffer pointer
MPI fallo en linea (426):
MPI_ERR_BUFFER: invalid buffer pointer
MPI fallo en linea (426):
MPI_ERR_BUFFER: invalid buffer pointer
--------------------------------------------------------------------------
MPI_ABORT was invoked on rank 7 in communicator MPI_COMM_WORLD
Proc: [[46938,1],7]
Errorcode: -1
NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes.
You may or may not see output from other processes, depending on
exactly when Open MPI kills them.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
prterun has exited due to process rank 7 with PID 0 on node forge calling
"abort". This may have caused other processes in the application to be
terminated by signals sent by prterun (as reported here).
--------------------------------------------------------------------------
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment