Spaces:
Running
Running
import gradio as gr | |
from openai import OpenAI | |
import pandas as pd | |
import io | |
import tempfile | |
import shutil | |
import google.generativeai as genai | |
import os | |
api_key = os.getenv("OPENAI_API_KEY") | |
gemni_api_key = os.getenv("GEMNI_API_KEY") | |
supported_languages = ['English', 'Brazilian Portuguese', 'Latin American Spanish', 'French', 'European Portuguese', 'Castilian Spanish', 'German', 'Italian', 'Czech', 'Danish', 'Dutch', 'Finnish', 'Norwegian', 'Swedish', 'Hungarian', 'Greek', 'Romanian', 'Polish', 'Arabic', 'Urdu'] | |
# OPENAI | |
client = OpenAI(api_key = api_key) | |
def translate_text_openai(source_language, target_language, TEXT, max_characters): | |
response = client.chat.completions.create( | |
model="gpt-3.5-turbo-0125", | |
temperature = 0.1, | |
messages=[ | |
{"role": "system", "content": "You are a multilingual translator for movies subtitles."}, | |
{"role": "system", "content": "The number of input characters and output characters should be the same despite the change in language."}, | |
{"role": "system", "content": f"In response, maximum characters allowed are {max_characters}"}, | |
{"role": "system", "content": "You SHOULD NOT SKIP ANY LINE OR ANY INFORMATION"}, | |
{"role": "system", "content": "The Tranlation should be error proof"}, | |
{"role": "user", "content": f"""Translate the text from {source_language} language to {target_language} language.: | |
\nTEXT: {TEXT} | |
NOTE: THE OUTPUT SHOULD BE IN {target_language} language. | |
\nREMEMBER: MAXIMUM output chaeracters should be {max_characters} | |
END NOTE: THE OUTPUT SHOULD BE IN {target_language} language. | |
REMEMBER: THE OUTPUT SHOULD BE IN {target_language} language. | |
Hey on some instances you give response in languages other than {target_language}, which is wrong | |
"""}, | |
] | |
) | |
return response.choices[0].message.content | |
def translate_text_correct_openai(source_language, target_language, TEXT, max_characters): | |
response = client.chat.completions.create( | |
model="gpt-3.5-turbo-0125", | |
temperature = 0.1, | |
messages=[ | |
{"role": "system", "content": "You reduce the size of the sentences."}, | |
{"role": "system", "content": f"The maximuim output should not be more than {max_characters} characters."}, | |
{"role": "user", "content": f""" | |
DO NOT CHANGE THE LANGUAGE | |
Reduce the size of the text to less than {max_characters} characters even if there is a change in meaning. | |
REMEMBER LLM, THE INPUT AND THE OUTPUT SHOULD BE IN THE SAME LANGUAGE | |
\nWrite the sentence in shortest possible manner | |
\nTEXT: {TEXT}\ | |
REMEMBER LLM, THE INPUT AND THE OUTPUT SHOULD BE IN THE SAME LANGUAGE | |
"""}, | |
] | |
) | |
return response.choices[0].message.content | |
# GEMNI | |
genai.configure(api_key=gemni_api_key) | |
model = genai.GenerativeModel('gemini-1.5-flash') | |
def translate_text_gemni(source_language, target_language, TEXT, max_characters): | |
response = model.generate_content(f'''You are a multilingual translator for movies subtitles. | |
The number of input characters and output characters should be the same despite the change in language. | |
In response, maximum characters allowed are {max_characters}. | |
You SHOULD NOT SKIP ANY LINE OR ANY INFORMATION. | |
The Tranlation should be error proof. | |
Translate the text from {source_language} language to {target_language} language.: | |
\nTEXT: {TEXT} | |
NOTE: THE OUTPUT SHOULD BE IN {target_language} language. | |
\nREMEMBER: MAXIMUM output chaeracters should be {max_characters} | |
END NOTE: THE OUTPUT SHOULD BE IN {target_language} language. | |
REMEMBER: THE OUTPUT SHOULD BE IN {target_language} language. | |
Hey on some instances you give response in languages other than {target_language}, which is wrong | |
''' | |
) | |
return response.text | |
def translate_text_correct_gemni(source_language, target_language, TEXT, max_characters): | |
response = model.generate_content(f""" | |
You reduce the size of the sentences. | |
The maximuim output should not be more than {max_characters} characters. | |
DO NOT CHANGE THE LANGUAGE | |
Reduce the size of the text to less than {max_characters} characters even if there is a change in meaning. | |
REMEMBER LLM, THE INPUT AND THE OUTPUT SHOULD BE IN THE SAME LANGUAGE | |
\nWrite the sentence in shortest possible manner | |
\nTEXT: {TEXT}\ | |
REMEMBER LLM, THE INPUT AND THE OUTPUT SHOULD BE IN THE SAME LANGUAGE | |
""" | |
) | |
return response.text | |
def check_conditon_openai(source_language, target_language, response, max_characters): | |
length = len(response) | |
if length > int(max_characters): | |
response = translate_text_correct_openai(source_language, target_language, response, max_characters) | |
return check_conditon_openai(source_language, target_language, response, max_characters) | |
return response | |
def check_conditon_gemni(source_language, target_language, response, max_characters): | |
length = len(response) | |
if length > int(max_characters): | |
response = translate_text_correct_gemni(source_language, target_language, response, max_characters) | |
return check_conditon_gemni(source_language, target_language, response, max_characters) | |
return response | |
def get_translation(source_language, target_language, TEXT, max_characters): | |
response_openai = translate_text_openai(source_language, target_language, TEXT, max_characters) | |
response_openai = check_conditon_openai(source_language, target_language, response_openai, max_characters) | |
response_gemni = translate_text_gemni(source_language, target_language, TEXT, max_characters) | |
response_gemni = check_conditon_gemni(source_language, target_language, response_gemni, max_characters) | |
excel_data_path = create_excel(TEXT, response_openai, response_gemni) | |
return excel_data_path | |
def create_excel(TEXT, response_openai, response_gemni): | |
# Create a DataFrame from the input data | |
df = pd.DataFrame({"Original Text": TEXT, "OpenAI Translated": response_openai, "Gemni Translated": response_gemni}, index = [1]) | |
# Create a temporary file to store the Excel data | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as temp_file: | |
# Write the DataFrame to the temporary file as an Excel file | |
with pd.ExcelWriter(temp_file, engine='xlsxwriter') as writer: | |
df.to_excel(writer, index=False, sheet_name='Sheet1') | |
# Return the path to the temporary file | |
temp_file_path = temp_file.name | |
return temp_file_path | |
iface = gr.Interface( | |
fn=get_translation, | |
inputs=[ | |
gr.Dropdown(choices= supported_languages, label="Source Language"), # Add more languages as needed | |
gr.Dropdown(choices= supported_languages, label="Target Language"), | |
gr.Textbox(lines=2, label="Input Text"), | |
gr.Textbox(lines=1, label="Difine number of output characters"), | |
], | |
outputs=gr.File(label="Download Excel File"), | |
title="MVP Multilingual Translation", | |
description="MVP Multilingual Translation by Farhan", | |
) | |
iface.launch(share=True, debug=True) |