Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
mpi-par
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
alesanc
mpi-par
Commits
1fa46f8e
Commit
1fa46f8e
authored
Jun 6, 2024
by
alexgara
Browse files
Options
Downloads
Patches
Plain Diff
falta impl mensaje de terminar
parent
7bc1b848
Branches
master
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
align_cuda.cu
+151
-42
151 additions, 42 deletions
align_cuda.cu
align_mpi.c
+110
-24
110 additions, 24 deletions
align_mpi.c
antiguompi.c
+531
-0
531 additions, 0 deletions
antiguompi.c
with
792 additions
and
66 deletions
align_cuda.cu
+
151
−
42
View file @
1fa46f8e
/*
* Exact genetic sequence alignment
* (Using brute force)
...
...
@@ -11,6 +12,8 @@
*
* (c) 2024, Arturo Gonzalez-Escribano
*/
#include
<complex.h>
#include
<stddef.h>
#include
<stdio.h>
#include
<stdlib.h>
#include
<string.h>
...
...
@@ -53,6 +56,89 @@ double cp_Wtime(){
*/
/* ADD KERNELS AND OTHER FUNCTIONS HERE */
//5-- Hacer la funcion kernel para generar la secuencia con el nuevo rng_next()
__global__
void
new_generate_rng_sequence
(
rng_t
random
,
float
prob_G
,
float
prob_C
,
float
prob_A
,
char
*
seq
,
unsigned
long
length
){
// CADA HILO VA A LA POSICION DE SU ID GLOBAL
unsigned
long
tid_block
=
(
threadIdx
.
x
);
unsigned
block_size
=
(
blockDim
.
x
);
unsigned
long
tid_global
=
(
blockIdx
.
x
*
block_size
+
tid_block
);
//Si su tid esta fuera de rango entonces salirse y no hacer nada
if
(
tid_global
>=
length
)
return
;
// Dar tantos saltos en como la posicion que te toca
rng_skip
(
&
random
,
tid_global
);
float
prob
=
rng_next
(
&
random
);
if
(
prob
<
prob_G
)
seq
[
tid_global
]
=
'G'
;
else
if
(
prob
<
prob_C
)
seq
[
tid_global
]
=
'C'
;
else
if
(
prob
<
prob_A
)
seq
[
tid_global
]
=
'A'
;
else
seq
[
tid_global
]
=
'T'
;
}
__global__
void
search_pat_by_gpu
(
char
*
sequence
,
unsigned
long
seq_length
,
int
pat_number
,
unsigned
long
*
pat_length
,
char
**
pattern
,
unsigned
long
*
pat_found
,
int
*
seq_matches
){
unsigned
long
tid_block
=
(
threadIdx
.
x
);
unsigned
block_size
=
(
blockDim
.
x
);
unsigned
long
tid_global
=
(
blockIdx
.
x
*
block_size
+
tid_block
);
int
pat
;
unsigned
long
lind
;
//Go through all the patterns
if
(
tid_global
<
seq_length
)
{
for
(
pat
=
0
;
pat
<
pat_number
;
pat
++
){
//Check it dosent get more far than the sequence length
if
(
tid_global
>
seq_length
-
pat_length
[
pat
])
continue
;
//Search for the current pattern that belongs to this thread
for
(
lind
=
0
;
lind
<
pat_length
[
pat
];
lind
++
){
// The pattern doesnt match
if
(
sequence
[
lind
+
tid_global
]
!=
pattern
[
pat
][
lind
])
break
;
}
//Check if the loop has ended with a match of the pattern
if
(
lind
==
pat_length
[
pat
]
){
//put in pat found the min position found of the pattern
//BECAUSE notfound is -1 and in unsigned is the biggest number possible
atomicMin
((
unsigned
long
long
*
)
&
pat_found
[
pat
],
tid_global
);
}
}
}
//Wait to all threads to start to cound seq_matches
__syncthreads
();
if
(
pat_found
[
tid_global
]
==
(
unsigned
long
)
NOT_FOUND
)
return
;
if
(
tid_global
>=
pat_number
)
return
;
//Now increment_matches
for
(
lind
=
0
;
lind
<
pat_length
[
tid_global
];
lind
++
){
if
(
seq_matches
[
pat_found
[
tid_global
]
+
lind
]
==
(
unsigned
long
)
NOT_FOUND
)
seq_matches
[
pat_found
[
tid_global
]
+
lind
]
=
0
;
else
atomicAdd
(
&
seq_matches
[
pat_found
[
tid_global
]
+
lind
],
1
);
}
}
__global__
void
count_number_of_pat
(
int
*
return_pat_matches
,
unsigned
long
*
pat_found
,
int
pat_found_length
,
unsigned
long
*
return_checksum_patfound
,
int
seq_length
,
int
*
seq_matches
,
unsigned
long
*
return_checksum_matches
){
unsigned
long
tid_block
=
(
threadIdx
.
x
);
unsigned
block_size
=
(
blockDim
.
x
);
unsigned
long
tid_global
=
(
blockIdx
.
x
*
block_size
+
tid_block
);
if
(
tid_global
<
pat_found_length
){
if
(
pat_found
[
tid_global
]
!=
(
unsigned
long
)
NOT_FOUND
){
atomicAdd
(
return_pat_matches
,
1
);
atomicAdd
((
unsigned
long
long
*
)
return_checksum_patfound
,
pat_found
[
tid_global
]);
}
}
if
(
tid_global
>=
seq_length
)
return
;
if
(
seq_matches
[
tid_global
]
!=
(
unsigned
long
)
NOT_FOUND
)
atomicAdd
((
unsigned
long
long
*
)
return_checksum_matches
,
seq_matches
[
tid_global
]);
}
//TODO Implement later
//__global__ void new_increment_matches(int pat, unsigned long *pat_found, unsigned long *pat_length, int *pat_a);
/*
* Function: Increment the number of pattern matches on the sequence positions
...
...
@@ -346,15 +432,30 @@ int main(int argc, char *argv[]) {
* DO NOT USE OpenMP IN YOUR CODE
*
*/
/* 2.1. Allocate and fill sequence */
char
*
sequence
=
(
char
*
)
malloc
(
sizeof
(
char
)
*
seq_length
);
if
(
sequence
==
NULL
)
{
fprintf
(
stderr
,
"
\n
-- Error allocating the sequence for size: %lu
\n
"
,
seq_length
);
exit
(
EXIT_FAILURE
);
}
//TODO quitar cuando ya no se necesite
char
*
sequence
;
//2-- Reservar espacio suficiente en la GPU
char
*
device_sequence
;
CUDA_CHECK_FUNCTION
(
cudaMalloc
(
&
device_sequence
,
sizeof
(
char
)
*
seq_length
););
random
=
rng_new
(
seed
);
generate_rng_sequence
(
&
random
,
prob_G
,
prob_C
,
prob_A
,
sequence
,
seq_length
);
//generate_rng_sequence( &random, prob_G, prob_C, prob_A, sequence, seq_length);
//3--Generar la sequencia con una funcion Kernel
// El numero de bloques y su tamaño ( que cada hilo inicialice un de las posiciones de la secuencia)
// numero de hilos = tamaño de la secuencia
// numero de grids = numero de threads / 1024
int
blockSize
=
1024
;
//1024 hilos por bloque
int
nunmBlocks
;
if
(
seq_length
%
blockSize
==
0
)
nunmBlocks
=
seq_length
/
blockSize
;
else
nunmBlocks
=
(
seq_length
/
blockSize
)
+
1
;
//Usando la memoria reservada para cuda
new_generate_rng_sequence
<<<
nunmBlocks
,
blockSize
>>>
(
random
,
prob_G
,
prob_C
,
prob_A
,
device_sequence
,
seq_length
);
#ifdef DEBUG
/* DEBUG: Print sequence and patterns */
...
...
@@ -386,49 +487,56 @@ int main(int argc, char *argv[]) {
for
(
ind
=
0
;
ind
<
pat_number
;
ind
++
)
{
pat_found
[
ind
]
=
(
unsigned
long
)
NOT_FOUND
;
}
for
(
lind
=
0
;
lind
<
seq_length
;
lind
++
)
{
seq_matches
[
lind
]
=
0
;
}
/* 5. Search for each pattern */
unsigned
long
start
;
int
pat
;
for
(
pat
=
0
;
pat
<
pat_number
;
pat
++
)
{
/* 5.1. For each posible starting position */
for
(
start
=
0
;
start
<=
seq_length
-
pat_length
[
pat
];
start
++
)
{
/* copy to gpu mem pat_found and pat_matches */
//unsigned long *device_pat_matches;
int
*
device_pat_matches
;
int
*
device_seq_matches
;
unsigned
long
*
device_pat_found
;
unsigned
long
*
device_checksum_found
;
unsigned
long
*
device_checksum_matches
;
unsigned
long
checksum_found
=
0
;
unsigned
long
checksum_matches
=
0
;
/* 5.1.1. For each pattern element */
for
(
lind
=
0
;
lind
<
pat_length
[
pat
];
lind
++
)
{
/* Stop this test when different nucleotids are found */
if
(
sequence
[
start
+
lind
]
!=
pattern
[
pat
][
lind
]
)
break
;
}
/* 5.1.2. Check if the loop ended with a match */
if
(
lind
==
pat_length
[
pat
]
)
{
pat_matches
++
;
pat_found
[
pat
]
=
start
;
break
;
}
}
CUDA_CHECK_FUNCTION
(
cudaMalloc
(
&
device_seq_matches
,
sizeof
(
int
)
*
seq_length
));
CUDA_CHECK_FUNCTION
(
cudaMemcpy
(
device_seq_matches
,
seq_matches
,
sizeof
(
int
)
*
seq_length
,
cudaMemcpyHostToDevice
));
/* 5.2. Pattern found */
if
(
pat_found
[
pat
]
!=
(
unsigned
long
)
NOT_FOUND
)
{
/* 4.2.1. Increment the number of pattern matches on the sequence positions */
increment_matches
(
pat
,
pat_found
,
pat_length
,
seq_matches
);
}
}
CUDA_CHECK_FUNCTION
(
cudaMalloc
(
&
device_pat_matches
,
sizeof
(
int
)));
CUDA_CHECK_FUNCTION
(
cudaMemcpy
(
device_pat_matches
,
&
pat_matches
,
sizeof
(
int
),
cudaMemcpyHostToDevice
));
CUDA_CHECK_FUNCTION
(
cudaMalloc
(
&
device_pat_found
,
sizeof
(
unsigned
long
)
*
pat_number
));
CUDA_CHECK_FUNCTION
(
cudaMemcpy
(
device_pat_found
,
pat_found
,
sizeof
(
unsigned
long
)
*
pat_number
,
cudaMemcpyHostToDevice
));
CUDA_CHECK_FUNCTION
(
cudaMalloc
(
&
device_checksum_found
,
sizeof
(
unsigned
long
)));
CUDA_CHECK_FUNCTION
(
cudaMemcpy
(
device_checksum_found
,
&
checksum_found
,
sizeof
(
unsigned
long
),
cudaMemcpyHostToDevice
));
CUDA_CHECK_FUNCTION
(
cudaMalloc
(
&
device_checksum_matches
,
sizeof
(
unsigned
long
)));
CUDA_CHECK_FUNCTION
(
cudaMemcpy
(
device_checksum_matches
,
&
checksum_matches
,
sizeof
(
unsigned
long
),
cudaMemcpyHostToDevice
));
// copy the length of each pattern
CUDA_CHECK_FUNCTION
(
cudaMemcpy
(
d_pat_length
,
pat_length
,
sizeof
(
unsigned
long
)
*
pat_number
,
cudaMemcpyHostToDevice
));
search_pat_by_gpu
<<<
nunmBlocks
,
blockSize
>>>
(
device_sequence
,
seq_length
,
pat_number
,
d_pat_length
,
d_pattern
,
device_pat_found
,
device_seq_matches
);
CUDA_CHECK_KERNEL
();
count_number_of_pat
<<<
nunmBlocks
,
blockSize
>>>
(
device_pat_matches
,
device_pat_found
,
pat_number
,
device_checksum_found
,
seq_length
,
device_seq_matches
,
device_checksum_matches
);
CUDA_CHECK_KERNEL
();
//copy the data to the host
CUDA_CHECK_FUNCTION
(
cudaMemcpy
(
&
pat_matches
,
device_pat_matches
,
sizeof
(
int
),
cudaMemcpyDeviceToHost
));
CUDA_CHECK_FUNCTION
(
cudaMemcpy
(
&
checksum_found
,
device_checksum_found
,
sizeof
(
unsigned
long
)
,
cudaMemcpyDeviceToHost
));
CUDA_CHECK_FUNCTION
(
cudaMemcpy
(
&
checksum_matches
,
device_checksum_matches
,
sizeof
(
unsigned
long
)
,
cudaMemcpyDeviceToHost
));
/* 7. Check sums */
unsigned
long
checksum_matches
=
0
;
unsigned
long
checksum_found
=
0
;
for
(
ind
=
0
;
ind
<
pat_number
;
ind
++
)
{
if
(
pat_found
[
ind
]
!=
(
unsigned
long
)
NOT_FOUND
)
checksum_found
=
(
checksum_found
+
pat_found
[
ind
]
)
%
CHECKSUM_MAX
;
}
for
(
lind
=
0
;
lind
<
seq_length
;
lind
++
)
{
if
(
seq_matches
[
lind
]
!=
NOT_FOUND
)
checksum_matches
=
(
checksum_matches
+
seq_matches
[
lind
]
)
%
CHECKSUM_MAX
;
}
// calc now the rest operation
checksum_found
=
checksum_found
%
CHECKSUM_MAX
;
checksum_matches
=
checksum_matches
%
CHECKSUM_MAX
;
#ifdef DEBUG
/* DEBUG: Write results */
...
...
@@ -481,3 +589,4 @@ int main(int argc, char *argv[]) {
/* 11. End */
return
0
;
}
This diff is collapsed.
Click to expand it.
align_mpi.c
+
110
−
24
View file @
1fa46f8e
...
...
@@ -25,6 +25,8 @@
/* Arbitrary value to restrict the checksums period */
#define CHECKSUM_MAX 65535
#define TAG_NO_COMPLETE 69
#define TAG_END 8
/*
* Function: Increment the number of pattern matches on the sequence positions
* This function can be changed and/or optimized by the students
...
...
@@ -306,9 +308,6 @@ int main(int argc, char *argv[]) {
pat_samp_mix
=
'0'
;
pat_samp_mix
=
'0'
;
/* 2.3. Other result data and structures */
int
pat_matches
=
0
;
/* 2.3.1. Other results related to patterns */
unsigned
long
*
pat_found
;
pat_found
=
(
unsigned
long
*
)
malloc
(
sizeof
(
unsigned
long
)
*
pat_number
);
...
...
@@ -327,13 +326,26 @@ int main(int argc, char *argv[]) {
*
*/
/* 2.1. Allocate and fill sequence */
char
*
sequence
=
(
char
*
)
malloc
(
sizeof
(
char
)
*
seq_length
);
int
nprocs
;
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
nprocs
);
unsigned
long
my_size
=
seq_length
/
nprocs
;
unsigned
long
resto
=
seq_length
%
nprocs
;
unsigned
long
my_begin
=
my_size
*
rank
;
if
(
rank
<
resto
)
my_size
++
;
char
*
sequence
=
(
char
*
)
malloc
(
sizeof
(
char
)
*
my_size
);
if
(
sequence
==
NULL
)
{
fprintf
(
stderr
,
"
\n
-- Error allocating the sequence for size: %lu
\n
"
,
seq_length
);
fprintf
(
stderr
,
"
\n
-- Error allocating the sequence for size: %lu
\n
"
,
my_size
);
MPI_Abort
(
MPI_COMM_WORLD
,
EXIT_FAILURE
);
}
random
=
rng_new
(
seed
);
generate_rng_sequence
(
&
random
,
prob_G
,
prob_C
,
prob_A
,
sequence
,
seq_length
);
rng_t
random_local
=
random
;
rng_skip
(
&
random_local
,
my_size
);
generate_rng_sequence
(
&
random_local
,
prob_G
,
prob_C
,
prob_A
,
sequence
,
my_size
);
rng_skip
(
&
random
,
seq_length
);
#ifdef DEBUG
/* DEBUG: Print sequence and patterns */
...
...
@@ -354,49 +366,123 @@ int main(int argc, char *argv[]) {
#endif // DEBUG
/* 2.3.2. Other results related to the main sequence */
int
*
seq_matches
;
seq_matches
=
(
int
*
)
malloc
(
sizeof
(
int
)
*
seq_length
);
if
(
seq_matches
==
NULL
)
{
fprintf
(
stderr
,
"
\n
-- Error allocating aux sequence structures for size: %lu
\n
"
,
seq_length
);
MPI_Abort
(
MPI_COMM_WORLD
,
EXIT_FAILURE
);
}
/* 4. Initialize ancillary structures */
for
(
ind
=
0
;
ind
<
pat_number
;
ind
++
)
{
pat_found
[
ind
]
=
(
unsigned
long
)
NOT_FOUND
;
}
for
(
ind
=
0
;
ind
<
seq_length
;
ind
++
)
{
seq_matches
[
ind
]
=
0
;
}
/* 5. Search for each pattern */
unsigned
long
start
;
unsigned
long
pat
;
MPI_Request
request
;
MPI_Status
status
;
int
send_to_continue_flag
=
0
;
// [0]=my_begin+start | [1]= tamaño ya leido del patorn | [2]= numero de patrón
unsigned
long
*
envio_array
=
(
unsigned
long
*
)
malloc
(
sizeof
(
unsigned
long
)
*
3
);
if
(
envio_array
==
NULL
)
{
fprintf
(
stderr
,
"Error al crear envio_array en proc:%d
\n
"
,
rank
);
MPI_Abort
(
MPI_COMM_WORLD
,
EXIT_FAILURE
);
}
unsigned
long
*
recibo_array
=
(
unsigned
long
*
)
malloc
(
sizeof
(
unsigned
long
)
*
3
);
if
(
envio_array
==
NULL
)
{
fprintf
(
stderr
,
"Error al crear recibo_array en proc:%d
\n
"
,
rank
);
MPI_Abort
(
MPI_COMM_WORLD
,
EXIT_FAILURE
);
}
for
(
pat
=
0
;
pat
<
pat_number
;
pat
++
)
{
/* 5.1. For each posible starting position */
for
(
start
=
0
;
start
<=
seq_length
-
pat_length
[
pat
];
start
++
)
{
for
(
start
=
0
;
start
<=
my_size
;
start
++
)
{
/* 5.1.1. For each pattern element */
for
(
ind
=
0
;
ind
<
pat_length
[
pat
];
ind
++
)
{
/* Stop this test when different nucleotids are found */
if
(
sequence
[
start
+
ind
]
!=
pattern
[
pat
][
ind
]
)
break
;
if
(
(
start
+
ind
>=
my_size
)
&&
(
rank
!=
nprocs
-
1
)){
envio_array
[
0
]
=
my_begin
+
start
;
envio_array
[
1
]
=
ind
-
1
;
envio_array
[
2
]
=
pat
;
//Envio el parton para que continue el siguente que le toca
MPI_Isend
(
envio_array
,
3
,
MPI_UNSIGNED_LONG
,
rank
+
1
,
TAG_NO_COMPLETE
,
MPI_COMM_WORLD
,
&
request
);
send_to_continue_flag
=
1
;
break
;
}
else
if
(
sequence
[
start
+
ind
]
!=
pattern
[
pat
][
ind
]
)
break
;
}
/* 5.1.2. Check if the loop ended with a match */
if
(
ind
==
pat_length
[
pat
]
)
{
pat_matches
++
;
pat_found
[
pat
]
=
start
;
//Check if is outside of the loop and the pattern continue in other
if
(
send_to_continue_flag
){
send_to_continue_flag
=
0
;
break
;
}
else
if
(
ind
==
pat_length
[
pat
]
)
{
pat_found
[
pat
]
=
my_begin
+
start
;
break
;
}
}
}
/* 5.2. Pattern found */
if
(
pat_found
[
pat
]
!=
(
unsigned
long
)
NOT_FOUND
)
{
/* 4.2.1. Increment the number of pattern matches on the sequence positions */
increment_matches
(
pat
,
pat_found
,
pat_length
,
seq_matches
);
if
(
rank
==
0
){
short
end
=
0
;
MPI_Isend
(
&
end
,
1
,
MPI_SHORT
,
rank
+
1
,
TAG_END
,
MPI_COMM_WORLD
,
&
request
);
}
//NOW PROCESS CHECK FOR THE REAMAING PATTERN THAT ITS PREVIOUS DOSENT HAVE FOUND
int
start_r
,
pat_r
,
length_r
;
while
(
1
&&
(
rank
!=
0
)){
// TODO Implement if the previous has ended i dosent wait more
//WAIT if the previous process has some pattern that continues in ours
MPI_Recv
(
recibo_array
,
3
,
MPI_UNSIGNED_LONG
,
rank
-
1
,
TAG_NO_COMPLETE
,
MPI_COMM_WORLD
,
&
status
);
start_r
=
recibo_array
[
0
];
length_r
=
recibo_array
[
1
];
pat_r
=
recibo_array
[
3
];
//search for the pattern in my space
start
=
0
;
for
(
ind
=
0
;
ind
+
length_r
<
pat_length
[
pat
];
ind
++
)
{
/* Stop this test when different nucleotids are found */
if
(
(
start
+
ind
>=
my_size
)
&&
(
rank
!=
nprocs
-
1
)){
envio_array
[
0
]
=
start_r
;
envio_array
[
1
]
=
(
ind
-
1
)
+
length_r
;
envio_array
[
2
]
=
pat
;
//Envio el parton para que continue el siguente que le toca
MPI_Isend
(
envio_array
,
3
,
MPI_UNSIGNED_LONG
,
rank
+
1
,
TAG_NO_COMPLETE
,
MPI_COMM_WORLD
,
&
request
);
send_to_continue_flag
=
1
;
break
;
}
else
if
(
sequence
[
start
+
ind
]
!=
pattern
[
pat
][
ind
+
length_r
]
)
break
;
}
/* 5.1.2. Check if the loop ended with a match */
//Check if is outside of the loop and the pattern continue in other
if
(
send_to_continue_flag
){
send_to_continue_flag
=
0
;
break
;
}
else
if
(
ind
==
pat_length
[
pat
]
)
{
pat_found
[
pat
]
=
start_r
;
break
;
}
}
}
/* 5.2. Pattern found */
//if ( pat_found[pat] != (unsigned long)NOT_FOUND ) {
/* 4.2.1. Increment the number of pattern matches on the sequence positions */
//increment_matches( pat, pat_found, pat_length, seq_matches );
// }
/* 7. Check sums */
unsigned
long
checksum_matches
=
0
;
unsigned
long
checksum_found
=
0
;
...
...
This diff is collapsed.
Click to expand it.
antiguompi.c
0 → 100644
+
531
−
0
View file @
1fa46f8e
/*
* Exact genetic sequence alignment
* (Using brute force)
*
* MPI version
*
* Computacion Paralela, Grado en Informatica (Universidad de Valladolid)
* 2023/2024
*
* v1.2.1
*
* (c) 2024, Arturo Gonzalez-Escribano
*/
#include
<stdio.h>
#include
<stdlib.h>
#include
<string.h>
#include
<limits.h>
#include
<sys/time.h>
#include
<mpi.h>
/* Arbitrary value to indicate that no matches are found */
#define NOT_FOUND -1
/* Arbitrary value to restrict the checksums period */
#define CHECKSUM_MAX 65535
#define TAG_NO_COMPLETE 100
#define END_SING -111
/*
* Function: Increment the number of pattern matches on the sequence positions
* This function can be changed and/or optimized by the students
*/
/*
*
* START HERE: DO NOT CHANGE THE CODE ABOVE THIS POINT
*
*/
void
increment_matches
(
int
pat
,
unsigned
long
*
pat_found
,
unsigned
long
*
pat_length
,
int
*
seq_matches
)
{
int
ind
;
for
(
ind
=
0
;
ind
<
pat_length
[
pat
];
ind
++
)
{
if
(
seq_matches
[
pat_found
[
pat
]
+
ind
]
==
NOT_FOUND
)
seq_matches
[
pat_found
[
pat
]
+
ind
]
=
0
;
else
seq_matches
[
pat_found
[
pat
]
+
ind
]
++
;
}
}
/*
*
* STOP HERE: DO NOT CHANGE THE CODE BELOW THIS POINT
*
*/
/*
* Utils: Function to get wall time
*/
double
cp_Wtime
(){
struct
timeval
tv
;
gettimeofday
(
&
tv
,
NULL
);
return
tv
.
tv_sec
+
1.0e-6
*
tv
.
tv_usec
;
}
/*
* Utils: Random generator
*/
#include
"rng.c"
/*
* Function: Allocate new patttern
*/
char
*
pattern_allocate
(
rng_t
*
random
,
unsigned
long
pat_rng_length_mean
,
unsigned
long
pat_rng_length_dev
,
unsigned
long
seq_length
,
unsigned
long
*
new_length
)
{
/* Random length */
unsigned
long
length
=
(
unsigned
long
)
rng_next_normal
(
random
,
(
double
)
pat_rng_length_mean
,
(
double
)
pat_rng_length_dev
);
if
(
length
>
seq_length
)
length
=
seq_length
;
if
(
length
<=
0
)
length
=
1
;
/* Allocate pattern */
char
*
pattern
=
(
char
*
)
malloc
(
sizeof
(
char
)
*
length
);
if
(
pattern
==
NULL
)
{
fprintf
(
stderr
,
"
\n
-- Error allocating a pattern of size: %lu
\n
"
,
length
);
exit
(
EXIT_FAILURE
);
}
/* Return results */
*
new_length
=
length
;
return
pattern
;
}
/*
* Function: Fill random sequence or pattern
*/
void
generate_rng_sequence
(
rng_t
*
random
,
float
prob_G
,
float
prob_C
,
float
prob_A
,
char
*
seq
,
unsigned
long
length
)
{
unsigned
long
ind
;
for
(
ind
=
0
;
ind
<
length
;
ind
++
)
{
double
prob
=
rng_next
(
random
);
if
(
prob
<
prob_G
)
seq
[
ind
]
=
'G'
;
else
if
(
prob
<
prob_C
)
seq
[
ind
]
=
'C'
;
else
if
(
prob
<
prob_A
)
seq
[
ind
]
=
'A'
;
else
seq
[
ind
]
=
'T'
;
}
}
/*
* Function: Copy a sample of the sequence
*/
void
copy_sample_sequence
(
rng_t
*
random
,
char
*
sequence
,
unsigned
long
seq_length
,
unsigned
long
pat_samp_loc_mean
,
unsigned
long
pat_samp_loc_dev
,
char
*
pattern
,
unsigned
long
length
)
{
/* Choose location */
unsigned
long
location
=
(
unsigned
long
)
rng_next_normal
(
random
,
(
double
)
pat_samp_loc_mean
,
(
double
)
pat_samp_loc_dev
);
if
(
location
>
seq_length
-
length
)
location
=
seq_length
-
length
;
if
(
location
<=
0
)
location
=
0
;
/* Copy sample */
unsigned
long
ind
;
for
(
ind
=
0
;
ind
<
length
;
ind
++
)
pattern
[
ind
]
=
sequence
[
ind
+
location
];
}
/*
* Function: Regenerate a sample of the sequence
*/
void
generate_sample_sequence
(
rng_t
*
random
,
rng_t
random_seq
,
float
prob_G
,
float
prob_C
,
float
prob_A
,
unsigned
long
seq_length
,
unsigned
long
pat_samp_loc_mean
,
unsigned
long
pat_samp_loc_dev
,
char
*
pattern
,
unsigned
long
length
)
{
/* Choose location */
unsigned
long
location
=
(
unsigned
long
)
rng_next_normal
(
random
,
(
double
)
pat_samp_loc_mean
,
(
double
)
pat_samp_loc_dev
);
if
(
location
>
seq_length
-
length
)
location
=
seq_length
-
length
;
if
(
location
<=
0
)
location
=
0
;
/* Regenerate sample */
rng_t
local_random
=
random_seq
;
rng_skip
(
&
local_random
,
location
);
generate_rng_sequence
(
&
local_random
,
prob_G
,
prob_C
,
prob_A
,
pattern
,
length
);
}
/*
* Function: Print usage line in stderr
*/
void
show_usage
(
char
*
program_name
)
{
fprintf
(
stderr
,
"Usage: %s "
,
program_name
);
fprintf
(
stderr
,
"<seq_length> <prob_G> <prob_C> <prob_A> <pat_rng_num> <pat_rng_length_mean> <pat_rng_length_dev> <pat_samples_num> <pat_samp_length_mean> <pat_samp_length_dev> <pat_samp_loc_mean> <pat_samp_loc_dev> <pat_samp_mix:B[efore]|A[fter]|M[ixed]> <long_seed>
\n
"
);
fprintf
(
stderr
,
"
\n
"
);
}
/*
* MAIN PROGRAM
*/
int
main
(
int
argc
,
char
*
argv
[])
{
/* 0. Default output and error without buffering, forces to write immediately */
setbuf
(
stdout
,
NULL
);
setbuf
(
stderr
,
NULL
);
/* 1. Read scenary arguments */
/* 1.0. Init MPI before processing arguments */
MPI_Init
(
&
argc
,
&
argv
);
int
rank
;
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
/* 1.1. Check minimum number of arguments */
if
(
argc
<
15
)
{
fprintf
(
stderr
,
"
\n
-- Error: Not enough arguments when reading configuration from the command line
\n\n
"
);
show_usage
(
argv
[
0
]
);
exit
(
EXIT_FAILURE
);
}
/* 1.2. Read argument values */
unsigned
long
seq_length
=
atol
(
argv
[
1
]
);
float
prob_G
=
atof
(
argv
[
2
]
);
float
prob_C
=
atof
(
argv
[
3
]
);
float
prob_A
=
atof
(
argv
[
4
]
);
if
(
prob_G
+
prob_C
+
prob_A
>
1
)
{
fprintf
(
stderr
,
"
\n
-- Error: The sum of G,C,A,T nucleotid probabilities cannot be higher than 1
\n\n
"
);
show_usage
(
argv
[
0
]
);
exit
(
EXIT_FAILURE
);
}
prob_C
+=
prob_G
;
prob_A
+=
prob_C
;
int
pat_rng_num
=
atoi
(
argv
[
5
]
);
unsigned
long
pat_rng_length_mean
=
atol
(
argv
[
6
]
);
unsigned
long
pat_rng_length_dev
=
atol
(
argv
[
7
]
);
int
pat_samp_num
=
atoi
(
argv
[
8
]
);
unsigned
long
pat_samp_length_mean
=
atol
(
argv
[
9
]
);
unsigned
long
pat_samp_length_dev
=
atol
(
argv
[
10
]
);
unsigned
long
pat_samp_loc_mean
=
atol
(
argv
[
11
]
);
unsigned
long
pat_samp_loc_dev
=
atol
(
argv
[
12
]
);
char
pat_samp_mix
=
argv
[
13
][
0
];
if
(
pat_samp_mix
!=
'B'
&&
pat_samp_mix
!=
'A'
&&
pat_samp_mix
!=
'M'
)
{
fprintf
(
stderr
,
"
\n
-- Error: Incorrect first character of pat_samp_mix: %c
\n\n
"
,
pat_samp_mix
);
show_usage
(
argv
[
0
]
);
exit
(
EXIT_FAILURE
);
}
unsigned
long
seed
=
atol
(
argv
[
14
]
);
#ifdef DEBUG
/* DEBUG: Print arguments */
printf
(
"
\n
Arguments: seq_length=%lu
\n
"
,
seq_length
);
printf
(
"Arguments: Accumulated probabilitiy G=%f, C=%f, A=%f, T=1
\n
"
,
prob_G
,
prob_C
,
prob_A
);
printf
(
"Arguments: Random patterns number=%d, length_mean=%lu, length_dev=%lu
\n
"
,
pat_rng_num
,
pat_rng_length_mean
,
pat_rng_length_dev
);
printf
(
"Arguments: Sample patterns number=%d, length_mean=%lu, length_dev=%lu, loc_mean=%lu, loc_dev=%lu
\n
"
,
pat_samp_num
,
pat_samp_length_mean
,
pat_samp_length_dev
,
pat_samp_loc_mean
,
pat_samp_loc_dev
);
printf
(
"Arguments: Type of mix: %c, Random seed: %lu
\n
"
,
pat_samp_mix
,
seed
);
printf
(
"
\n
"
);
#endif // DEBUG
/* 2. Initialize data structures */
/* 2.1. Skip allocate and fill sequence */
rng_t
random
=
rng_new
(
seed
);
rng_skip
(
&
random
,
seq_length
);
int
nprocs
;
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
nprocs
);
/* 2.2. Allocate and fill patterns */
/* 2.2.1 Allocate main structures */
int
pat_number
=
pat_rng_num
+
pat_samp_num
;
unsigned
long
*
pat_length
=
(
unsigned
long
*
)
malloc
(
sizeof
(
unsigned
long
)
*
pat_number
);
char
**
pattern
=
(
char
**
)
malloc
(
sizeof
(
char
*
)
*
pat_number
);
if
(
pattern
==
NULL
||
pat_length
==
NULL
)
{
fprintf
(
stderr
,
"
\n
-- Error allocating the basic patterns structures for size: %d
\n
"
,
pat_number
);
exit
(
EXIT_FAILURE
);
}
/* 2.2.2 Allocate and initialize ancillary structure for pattern types */
unsigned
long
ind
;
#define PAT_TYPE_NONE 0
#define PAT_TYPE_RNG 1
#define PAT_TYPE_SAMP 2
char
*
pat_type
=
(
char
*
)
malloc
(
sizeof
(
char
)
*
pat_number
);
if
(
pat_type
==
NULL
)
{
fprintf
(
stderr
,
"
\n
-- Error allocating ancillary structure for pattern of size: %d
\n
"
,
pat_number
);
exit
(
EXIT_FAILURE
);
}
for
(
ind
=
0
;
ind
<
pat_number
;
ind
++
)
pat_type
[
ind
]
=
PAT_TYPE_NONE
;
/* 2.2.3 Fill up pattern types using the chosen mode */
switch
(
pat_samp_mix
)
{
case
'A'
:
for
(
ind
=
0
;
ind
<
pat_rng_num
;
ind
++
)
pat_type
[
ind
]
=
PAT_TYPE_RNG
;
for
(
;
ind
<
pat_number
;
ind
++
)
pat_type
[
ind
]
=
PAT_TYPE_SAMP
;
break
;
case
'B'
:
for
(
ind
=
0
;
ind
<
pat_samp_num
;
ind
++
)
pat_type
[
ind
]
=
PAT_TYPE_SAMP
;
for
(
;
ind
<
pat_number
;
ind
++
)
pat_type
[
ind
]
=
PAT_TYPE_RNG
;
break
;
default:
if
(
pat_rng_num
==
0
)
{
for
(
ind
=
0
;
ind
<
pat_number
;
ind
++
)
pat_type
[
ind
]
=
PAT_TYPE_SAMP
;
}
else
if
(
pat_samp_num
==
0
)
{
for
(
ind
=
0
;
ind
<
pat_number
;
ind
++
)
pat_type
[
ind
]
=
PAT_TYPE_RNG
;
}
else
if
(
pat_rng_num
<
pat_samp_num
)
{
int
interval
=
pat_number
/
pat_rng_num
;
for
(
ind
=
0
;
ind
<
pat_number
;
ind
++
)
if
(
(
ind
+
1
)
%
interval
==
0
)
pat_type
[
ind
]
=
PAT_TYPE_RNG
;
else
pat_type
[
ind
]
=
PAT_TYPE_SAMP
;
}
else
{
int
interval
=
pat_number
/
pat_samp_num
;
for
(
ind
=
0
;
ind
<
pat_number
;
ind
++
)
if
(
(
ind
+
1
)
%
interval
==
0
)
pat_type
[
ind
]
=
PAT_TYPE_SAMP
;
else
pat_type
[
ind
]
=
PAT_TYPE_RNG
;
}
}
/* 2.2.4 Generate the patterns */
for
(
ind
=
0
;
ind
<
pat_number
;
ind
++
)
{
if
(
pat_type
[
ind
]
==
PAT_TYPE_RNG
)
{
pattern
[
ind
]
=
pattern_allocate
(
&
random
,
pat_rng_length_mean
,
pat_rng_length_dev
,
seq_length
,
&
pat_length
[
ind
]
);
generate_rng_sequence
(
&
random
,
prob_G
,
prob_C
,
prob_A
,
pattern
[
ind
],
pat_length
[
ind
]
);
}
else
if
(
pat_type
[
ind
]
==
PAT_TYPE_SAMP
)
{
pattern
[
ind
]
=
pattern_allocate
(
&
random
,
pat_samp_length_mean
,
pat_samp_length_dev
,
seq_length
,
&
pat_length
[
ind
]
);
#define REGENERATE_SAMPLE_PATTERNS
#ifdef REGENERATE_SAMPLE_PATTERNS
rng_t
random_seq_orig
=
rng_new
(
seed
);
generate_sample_sequence
(
&
random
,
random_seq_orig
,
prob_G
,
prob_C
,
prob_A
,
seq_length
,
pat_samp_loc_mean
,
pat_samp_loc_dev
,
pattern
[
ind
],
pat_length
[
ind
]
);
#else
copy_sample_sequence
(
&
random
,
sequence
,
seq_length
,
pat_samp_loc_mean
,
pat_samp_loc_dev
,
pattern
[
ind
],
pat_length
[
ind
]
);
#endif
}
else
{
fprintf
(
stderr
,
"
\n
-- Error internal: Paranoic check! A pattern without type at position %lu
\n
"
,
ind
);
exit
(
EXIT_FAILURE
);
}
}
free
(
pat_type
);
/* Avoid the usage of arguments to take strategic decisions
* In a real case the user only has the patterns and sequence data to analize
*/
argc
=
0
;
argv
=
NULL
;
pat_rng_num
=
0
;
pat_rng_length_mean
=
0
;
pat_rng_length_dev
=
0
;
pat_samp_num
=
0
;
pat_samp_length_mean
=
0
;
pat_samp_length_dev
=
0
;
pat_samp_loc_mean
=
0
;
pat_samp_loc_dev
=
0
;
pat_samp_mix
=
'0'
;
pat_samp_mix
=
'0'
;
/* 2.3. Other result data and structures */
int
pat_matches
=
0
;
/* 2.3.1. Other results related to patterns */
unsigned
long
*
pat_found
;
pat_found
=
(
unsigned
long
*
)
malloc
(
sizeof
(
unsigned
long
)
*
pat_number
);
if
(
pat_found
==
NULL
)
{
fprintf
(
stderr
,
"
\n
-- Error allocating aux pattern structure for size: %d
\n
"
,
pat_number
);
exit
(
EXIT_FAILURE
);
}
/* 3. Start global timer */
MPI_Barrier
(
MPI_COMM_WORLD
);
double
ttotal
=
cp_Wtime
();
/*
*
* START HERE: DO NOT CHANGE THE CODE ABOVE THIS POINT
*
*/
/* 2.1. Allocate and fill sequence */
long
my_size
=
seq_length
/
nprocs
;
long
resto
=
seq_length
%
nprocs
;
long
my_begin
=
my_size
*
rank
;
my_begin
=
my_begin
+
(
rank
<
resto
?
rank
:
resto
);
if
(
rank
<
resto
)
{
++
my_size
;
}
my_size
=
my_size
;
char
*
sequence
=
(
char
*
)
malloc
(
sizeof
(
char
)
*
my_size
);
if
(
sequence
==
NULL
)
{
fprintf
(
stderr
,
"
\n
-- Error allocating the sequence for size: %lu
\n
"
,
my_size
);
exit
(
EXIT_FAILURE
);
}
random
=
rng_new
(
seed
);
rng_t
random_local
=
random
;
rng_skip
(
&
random_local
,
my_begin
);
generate_rng_sequence
(
&
random_local
,
prob_G
,
prob_C
,
prob_A
,
sequence
,
my_size
);
rng_skip
(
&
random
,
seq_length
);
#ifdef DEBUG
/* DEBUG: Print sequence and patterns */
printf
(
"-----------------
\n
"
);
printf
(
"Sequence: "
);
for
(
ind
=
0
;
ind
<
seq_length
;
ind
++
)
printf
(
"%c"
,
sequence
[
ind
]
);
printf
(
"
\n
-----------------
\n
"
);
printf
(
"Patterns: %d ( rng: %d, samples: %d )
\n
"
,
pat_number
,
pat_rng_num
,
pat_samp_num
);
int
debug_pat
;
for
(
debug_pat
=
0
;
debug_pat
<
pat_number
;
debug_pat
++
)
{
printf
(
"Pat[%d]: "
,
debug_pat
);
for
(
ind
=
0
;
ind
<
pat_length
[
debug_pat
];
ind
++
)
{
printf
(
"%c"
,
pattern
[
debug_pat
][
ind
]
);
printf
(
"
\n
"
);
}
printf
(
"-----------------
\n\n
"
);
#endif // DEBUG
/* 2.3.2. Other results related to the main sequence */
/* 4. Initialize ancillary structures */
for
(
ind
=
0
;
ind
<
pat_number
;
ind
++
)
{
pat_found
[
ind
]
=
NOT_FOUND
;
}
/* 5. Search for each pattern */
unsigned
long
start
;
unsigned
long
pat
;
long
*
envio_struct
;
envio_struct
=
(
long
*
)
malloc
(
sizeof
(
long
)
*
3
);
// Posicion 0 (my_begin+start), Posicion 1 (tamaño leido) Posicion 2 (patrón)
long
*
recv_struct
;
recv_struct
=
(
long
*
)
malloc
(
sizeof
(
long
)
*
3
);
// Posicion 0 (my_begin+start), Posicion 1 (tamaño leido) Posicion 2 (patrón)
for
(
pat
=
0
;
pat
<
pat_number
;
pat
++
)
{
/* 5.1. For each posible starting position */
for
(
start
=
0
;
start
<=
my_size
;
start
++
)
{
/* 5.1.1. For each pattern element */
for
(
ind
=
0
;
ind
<
pat_length
[
pat
];
ind
++
)
{
/* Stop this test when different nucleotids are found */
if
(((
start
+
ind
)
>=
my_size
)
&&
(
rank
!=
nprocs
-
1
)){
envio_struct
[
0
]
=
my_begin
+
start
;
envio_struct
[
1
]
=
ind
;
envio_struct
[
2
]
=
pat
;
MPI_Send
(
envio_struct
,
3
,
MPI_UNSIGNED_LONG
,
rank
+
1
,
TAG_NO_COMPLETE
,
MPI_COMM_WORLD
);
break
;
}
else
if
(((
start
+
ind
)
>=
my_size
)
&&
(
rank
==
nprocs
-
1
))
break
;
if
(
sequence
[
start
+
ind
]
!=
pattern
[
pat
][
ind
]
)
break
;
}
/* 5.1.2. Check if the loop ended with a match */
if
(
ind
==
pat_length
[
pat
]
)
{
pat_found
[
pat
]
=
my_begin
+
start
;
break
;
}
if
(
rank
!=
nprocs
-
1
){
envio_struct
[
0
]
=
END_SING
;
envio_struct
[
1
]
=
END_SING
;
envio_struct
[
2
]
=
END_SING
;
MPI_Send
(
envio_struct
,
3
,
MPI_UNSIGNED_LONG
,
rank
+
1
,
TAG_NO_COMPLETE
,
MPI_COMM_WORLD
);
}
}
}
int
start_r
,
pat_r
,
length_r
;
/* Continuar con busqueda*/
while
(
1
&&
(
rank
!=
0
)){
MPI_Recv
(
recv_struct
,
3
,
MPI_UNSIGNED_LONG
,
rank
-
1
,
TAG_NO_COMPLETE
,
MPI_COMM_WORLD
,
MPI_STATUS_IGNORE
);
start_r
=
recv_struct
[
0
];
length_r
=
recv_struct
[
1
];
pat_r
=
recv_struct
[
2
];
if
(
start_r
==
END_SING
)
break
;
for
(
ind
=
0
;
ind
<
my_size
&&
ind
+
length_r
<
pat_length
[
pat_r
];
ind
++
){
//if ( ind == my_size && ( rank != nprocs-1)){
if
(
sequence
[
ind
]
!=
pattern
[
pat_r
][
length_r
+
ind
])
break
;
}
if
(
ind
==
my_size
&&
(
rank
!=
nprocs
-
1
)){
envio_struct
[
0
]
=
start_r
;
envio_struct
[
1
]
=
length_r
+
ind
;
envio_struct
[
2
]
=
pat_r
;
MPI_Send
(
envio_struct
,
3
,
MPI_LONG
,
rank
+
1
,
TAG_NO_COMPLETE
,
MPI_COMM_WORLD
);
}
else
if
(
rank
!=
nprocs
-
1
){
envio_struct
[
0
]
=
END_SING
;
envio_struct
[
1
]
=
END_SING
;
envio_struct
[
2
]
=
END_SING
;
MPI_Send
(
&
envio_struct
,
3
,
MPI_UNSIGNED_LONG
,
rank
+
1
,
TAG_NO_COMPLETE
,
MPI_COMM_WORLD
);
}
if
(
ind
+
length_r
==
pat_length
[
pat_r
]
)
pat_found
[
pat
]
=
start_r
;
}
/* 7. Check sums */
unsigned
long
checksum_matches
=
0
;
unsigned
long
checksum_found
=
0
;
unsigned
long
*
pat_found_reduct
=
(
unsigned
long
*
)
malloc
(
sizeof
(
unsigned
long
)
*
pat_number
);
MPI_Reduce
(
pat_found
,
pat_found_reduct
,
pat_number
,
MPI_UNSIGNED_LONG
,
MPI_MIN
,
0
,
MPI_COMM_WORLD
);
if
(
rank
==
0
){
for
(
ind
=
0
;
ind
<
pat_number
;
ind
++
)
{
if
(
pat_found_reduct
[
ind
]
!=
NOT_FOUND
)
{
checksum_found
=
(
checksum_found
+
pat_found_reduct
[
ind
]
)
%
CHECKSUM_MAX
;
checksum_matches
=
(
checksum_matches
+
pat_length
[
ind
]
)
%
CHECKSUM_MAX
;
pat_matches
++
;
}
}
}
#ifdef DEBUG
/* DEBUG: Write results */
printf
(
"-----------------
\n
"
);
printf
(
"Found start:"
);
for
(
debug_pat
=
0
;
debug_pat
<
pat_number
;
debug_pat
++
)
{
printf
(
" %lu"
,
pat_found
[
debug_pat
]
);
}
printf
(
"
\n
"
);
printf
(
"-----------------
\n
"
);
printf
(
"Matches:"
);
for
(
ind
=
0
;
ind
<
seq_length
;
ind
++
)
printf
(
" %d"
,
seq_matches
[
ind
]
);
printf
(
"
\n
"
);
printf
(
"-----------------
\n
"
);
#endif // DEBUG
/* Free local resources */
free
(
sequence
);
free
(
envio_struct
);
free
(
recv_struct
);
free
(
pat_found_reduct
);
/*
*
* STOP HERE: DO NOT CHANGE THE CODE BELOW THIS POINT
*
*/
/* 8. Stop global time */
MPI_Barrier
(
MPI_COMM_WORLD
);
ttotal
=
cp_Wtime
()
-
ttotal
;
/* 9. Output for leaderboard */
if
(
rank
==
0
)
{
printf
(
"
\n
"
);
/* 9.1. Total computation time */
printf
(
"Time: %lf
\n
"
,
ttotal
);
/* 9.2. Results: Statistics */
printf
(
"Result: %d, %lu, %lu
\n\n
"
,
pat_matches
,
checksum_found
,
checksum_matches
);
}
/* 10. Free resources */
int
i
;
for
(
i
=
0
;
i
<
pat_number
;
i
++
)
free
(
pattern
[
i
]
);
free
(
pattern
);
free
(
pat_length
);
free
(
pat_found
);
/* 11. End */
MPI_Finalize
();
return
0
;
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment