Alyosha11
/

Phoneme

Model card Files Files and versions Community

Phoneme / parallel_phonemize.sh

Alyosha11's picture

Upload parallel_phonemize.sh with huggingface_hub

5a5eaa3 verified 6 months ago

history blame contribute delete

1.16 kB

	#!/bin/bash

	input_dir="txt"
	output_dir="sangraha_hi_phonemized"
	lang=hi
	num_files=50000
	num_jobs=-1

	process_file() {
	input_file="$1"
	output_file="$2"
	lang=hi

	# Create the output directory and its parent directories if they don't exist
	mkdir -p "$(dirname "$output_file")"

	phonemize --quiet -l $lang "$input_file" -o "$output_file" --strip --language-switch remove-flags --preserve-punctuation
	echo "Processed: $input_file -> $output_file"
	}

	export -f process_file

	# Start the timer
	start_time=$(date +%s)

	# Use GNU Parallel with find to process files in parallel
	find "$input_dir" -type f -name "*.txt" \| head -n $num_files \| parallel -j $num_jobs process_file "{}" "${output_dir}/phn_$(basename {})"

	# End the timer
	end_time=$(date +%s)

	# Calculate the elapsed time
	elapsed_time=$((end_time - start_time))

	# Convert elapsed time to minutes and seconds
	minutes=$((elapsed_time / 60))
	seconds=$((elapsed_time % 60))

	# Print the benchmark results
	echo "Benchmark Results:"
	echo "Number of files processed: $num_files"
	echo "Number of parallel jobs: $num_jobs"
	echo "Elapsed time: $minutes minutes $seconds seconds"