Skip to content
Snippets Groups Projects
Commit ba2dd2a8 authored by davifer's avatar davifer :speech_balloon:
Browse files

Delete build_dataset.py

parent 9c6c4bca
Branches
No related tags found
No related merge requests found
import sys
import pandas as pd
import subprocess
import re
dir = sys.argv[1] + "/data"
# leemos el csv
data = pd.read_csv('../csv/csv_formated.csv')
data = data.sample(frac=1)
# Cambiamos el nombre de las columnas para estandarizarlo a cualquier dataset
data.columns = ["audio","transcription"]
# Creamos el path donde se guardaran los wavs
save_dir = dir + "/"
num = 0
train_fem = ["041","036","038","039","060","061","063","050","055","057"]
train_mas = ["033","034","043","044","032","022","023","062","064","045","053","059","094"]
test_fem = ["024","027","029","031","046","048","058"]
test_masc = ["047","049","051","052","035","037","040","042","025","026","028","030"]
for i in range(len(data)):
# Obtenemos donde esta el WAV que estamos utilizando
wav = str(data.iloc[i]['audio'])
# Obtenemos el nombre del WAV
url_split = wav.split("/")
wav_name = url_split[len(url_split)-1]
# Obtenemos el ID del hablante
id = wav_name[4:7]
# Vemos donde hay que meter cada auido segun el hablante
if id in train_fem or id in train_mas:
save_dir = dir + "/" + "train"
if id in test_fem or id in test_masc:
save_dir = dir + "/" + "test"
# Obtenemos donde esta la transcripcion del WAV que estamos utilizando
transcription = str(data.iloc[i]['transcription'])
if transcription != "":
# Obtenemos la transcripcion y la ponemos un formato estandar
transcription = re.sub(r"\n", "", transcription)
transcription = re.sub(r"\r", "", transcription)
transcription = re.sub(r"\t", "", transcription)
transcription = re.sub(r'[.,"\'-?:!;#]<>', '', transcription)
transcription = transcription.lower()
# Movemos el audio y le cambiamos el nombre
salida = subprocess.run(['.././scripts/copy_audio',wav,save_dir,str(num),wav_name,transcription])
num+=1
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment