Phoneme / parallel_phonemize.sh
Alyosha11's picture
Upload parallel_phonemize.sh with huggingface_hub
5a5eaa3 verified
#!/bin/bash
input_dir="txt"
output_dir="sangraha_hi_phonemized"
lang=hi
num_files=50000
num_jobs=-1
process_file() {
input_file="$1"
output_file="$2"
lang=hi
# Create the output directory and its parent directories if they don't exist
mkdir -p "$(dirname "$output_file")"
phonemize --quiet -l $lang "$input_file" -o "$output_file" --strip --language-switch remove-flags --preserve-punctuation
echo "Processed: $input_file -> $output_file"
}
export -f process_file
# Start the timer
start_time=$(date +%s)
# Use GNU Parallel with find to process files in parallel
find "$input_dir" -type f -name "*.txt" | head -n $num_files | parallel -j $num_jobs process_file "{}" "${output_dir}/phn_$(basename {})"
# End the timer
end_time=$(date +%s)
# Calculate the elapsed time
elapsed_time=$((end_time - start_time))
# Convert elapsed time to minutes and seconds
minutes=$((elapsed_time / 60))
seconds=$((elapsed_time % 60))
# Print the benchmark results
echo "Benchmark Results:"
echo "Number of files processed: $num_files"
echo "Number of parallel jobs: $num_jobs"
echo "Elapsed time: $minutes minutes $seconds seconds"