Spaces:
Runtime error
Runtime error
import os | |
from bs4 import BeautifulSoup | |
import gradio as gr | |
from langchain import OpenAI, ConversationChain, LLMChain, PromptTemplate | |
from langchain.memory import ConversationBufferWindowMemory | |
import openai | |
import requests | |
from langchain.chat_models import ChatOpenAI | |
import ast | |
import imgkit | |
import pdfkit | |
import imgkit | |
import re | |
import glob | |
import openai | |
OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] | |
dict_list_format = '''[{'header': 'slide1_title', | |
'paragraphs': ['bullet_point1', | |
'bullet_point2', | |
'bullet_point3',]}, | |
'header': 'slide2_title', | |
'paragraphs': ['bullet_point1', | |
'bullet_point2', | |
'bullet_point3', | |
...]}, | |
'header': 'slide3_title', | |
'paragraphs': ['bullet_point1', | |
'bullet_point2', | |
'bullet_point3', | |
...]}, | |
'header': 'slide4_title', | |
'paragraphs': ['bullet_point1', | |
'bullet_point2', | |
'bullet_point3', | |
...]}, | |
'header': 'slide5_title', | |
'paragraphs': ['bullet_point1', | |
'bullet_point2', | |
'bullet_point3', | |
...]}] | |
''' | |
import google.cloud.texttospeech as tts | |
from google.oauth2 import service_account | |
credentials = service_account.Credentials.from_service_account_file("tts_google.json") | |
def text_to_wav(voice_name: str, text: str, file_name: str): | |
language_code = "-".join(voice_name.split("-")[:2]) | |
text_input = tts.SynthesisInput(text=text) | |
voice_params = tts.VoiceSelectionParams( | |
language_code=language_code, name=voice_name | |
) | |
audio_config = tts.AudioConfig(audio_encoding=tts.AudioEncoding.LINEAR16) | |
client = tts.TextToSpeechClient(credentials=credentials) | |
response = client.synthesize_speech( | |
input=text_input, | |
voice=voice_params, | |
audio_config=audio_config, | |
) | |
filename = f"{file_name}" | |
with open(filename, "wb") as out: | |
out.write(response.audio_content) | |
print(f'Generated speech saved to "{filename}"') | |
def prompt_to_video(video_prompt): | |
template = ''' | |
{history} | |
{human_input} | |
''' | |
prompt = PromptTemplate( | |
input_variables=["history", "human_input"], | |
template=template | |
) | |
chatgpt_chain = LLMChain( | |
llm=ChatOpenAI(model="gpt-4", temperature=0.5,openai_api_key=OPENAI_API_KEY), | |
prompt=prompt, | |
verbose=True, | |
memory=ConversationBufferWindowMemory(k=10), | |
) | |
prompt_input1 = f''' | |
You are a world expert oracle that knows everything. | |
You are also an excellent teacher that explains everything succintly and simply like towards a kid. | |
You are also an expert slide maker and think everything step by step. | |
You are tasked to create 5 slides today. | |
Here is the topic: | |
{video_prompt} | |
Here is the output python list format: | |
{dict_list_format} | |
The slides should be created in a python list format. | |
The list consists of python dictionary objects in the list. | |
Each dictionary object contains the header and paragraphs as keys. | |
Do not name the slide as "Slide 1" or any number. Insert header as header string. | |
The header is the title of the slide and the paragraph should be a list of string object. | |
Return the output in a python list format. | |
Make sure there is only 5 objects in the python list. | |
Do not declare a new variable, output the python list object only. | |
Do not say "Here's your". Directly output the python list object only. | |
Make sure there is nothing before or after the python list object. ONLY output the python list object. | |
''' | |
slide_str_list = [] | |
while len(slide_str_list) != 5: | |
slide_dict=chatgpt_chain.predict(human_input=prompt_input1) | |
try: | |
slide_str_list = ast.literal_eval(slide_dict) | |
except: | |
print("Already formatted.") | |
print("this is the slides:", slide_str_list) | |
print("length is:", len(slide_str_list)) | |
html_out_list = [] | |
for i in slide_str_list: | |
template = ''' | |
{history} | |
{human_input} | |
''' | |
prompt = PromptTemplate( | |
input_variables=["history", "human_input"], | |
template=template | |
) | |
chatgpt_chain = LLMChain( | |
llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0,openai_api_key=OPENAI_API_KEY), | |
prompt=prompt, | |
verbose=True, | |
memory=ConversationBufferWindowMemory(k=10), | |
) | |
prompt_input2 = f''' | |
You are a world expert oracle that knows everything. | |
You are also an excellent teacher that explains everything succintly and simply like towards a kid. | |
You are also an expert slide maker and thinks about everything step by step. | |
You are tasked to convert a python dictionary into a formatted HTML code. | |
The dictionary object consist of the header and paragraph key. | |
The paragraph key is a list of strings. | |
Here is the dictionary object: | |
{i} | |
The slide should be created in a HTML format with the correct format of 16:9 aspect ratio. | |
The wording of the slides should be formatted appropriately with the header and paragraph. | |
The paragraph in the slides should be formatted in bullet points and each bullet point should be 1.5 line spacing apart. | |
Header and paragraph should be aligned in an aesthetically pleasing way. | |
Return the output as a nicely formatted HTML string. | |
Header should be aligned to the center. | |
Font color should be white and background black. | |
Font should be Roboto. | |
Do not say "Here's your" or "Sure". Directly output the HTML string only. | |
Make sure there is nothing before or after the HTML string. ONLY output the HTML string. | |
Do not explain what is the HTML code about. | |
Do not declare a new variable, output the HTML string only. | |
''' | |
html_out_list.append(chatgpt_chain.predict(human_input=prompt_input2)) | |
extract_path = 'slide_' + video_prompt | |
os.makedirs(extract_path, exist_ok=True) | |
num = 1 | |
for html_string in html_out_list: | |
print(html_string) | |
with open(f"{extract_path}/slide_{num}.html", "w") as file: | |
file.write(html_string) | |
num = num + 1 | |
# Create the directory to extract to if it doesn't exist | |
os.makedirs(extract_path, exist_ok=True) | |
# Configuration for imgkit | |
config = imgkit.config(wkhtmltoimage='/bin/wkhtmltoimage') | |
# The path to store the images | |
image_path = os.path.join(extract_path, 'images') | |
os.makedirs(image_path, exist_ok=True) | |
# Get the list of HTML files | |
html_files = sorted([f for f in os.listdir(extract_path) if f.endswith('.html')]) | |
# Dictionary to store the file names and their corresponding images | |
file_images = {} | |
# Loop through the HTML files and convert them to images | |
for html_file in html_files: | |
# Full path of the HTML file | |
full_path = os.path.join(extract_path, html_file) | |
# Image file name | |
image_file = re.sub('.html$', '.jpg', html_file) | |
# Full path of the image file | |
full_image_path = os.path.join(image_path, image_file) | |
# Convert the HTML to an image | |
imgkit.from_file(full_path, full_image_path, config=config) | |
# Store the image file name | |
file_images[html_file] = image_file | |
print(file_images) | |
template = ''' | |
{history} | |
{human_input} | |
''' | |
prompt = PromptTemplate( | |
input_variables=["history", "human_input"], | |
template=template | |
) | |
chatgpt_chain = LLMChain( | |
llm=ChatOpenAI(model="gpt-4", temperature=0.5,openai_api_key=OPENAI_API_KEY), | |
prompt=prompt, | |
verbose=True, | |
memory=ConversationBufferWindowMemory(k=10), | |
) | |
prompt_input3 = f''' | |
You are a world expert oracle that knows everything. | |
You are also an excellent teacher that explains everything succintly and simply like towards a kid. | |
You are an expert orator and presenter. | |
You are tasked to create a voiceover for 5 slides. | |
The slides are formatted in a python list of dictionary objects. | |
Each dictionary object is a slide. | |
{slide_str_list} | |
Input: Python list of dictionary objects | |
Output: Python list of string objects | |
The output list consists of string objects. | |
The voiceover text purpose is a speech presentation of the slide. | |
The voiceover text should be about the content of each slide but at the same time add additional information to make the presentation funny and engaging. | |
Each string is a voiceover text of each slide of the python dictionary. | |
Each voiceover string object should be around 80 words. | |
Make sure there is only 5 objects in the python list. | |
Do not declare a new variable, output the python list object only. | |
Make sure there is nothing before or after the python list object. ONLY output the python list object. | |
Return the output in a python list format. | |
Do not say "Here's your" or "Sure". Directly output python list of dictionary object only. | |
Do not declare a new variable, output the python list of dictionary object only. | |
''' | |
voiceover_list = [] | |
voiceover_list=chatgpt_chain.predict(human_input=prompt_input3) | |
try: | |
voiceover_list = ast.literal_eval(voiceover_list) | |
except: | |
print("Already formatted.") | |
num = 1 | |
for i in voiceover_list: | |
file_name = "slide/slide" + f"_{num}" + ".wav" | |
text_to_wav("en-US-Neural2-F",i, file_name) | |
print(file_name) | |
num = num + 1 | |
# Get list of .jpg and .wav files from the correct directories | |
jpg_files = sorted(glob.glob(f"{extract_path}/images/*.jpg")) | |
wav_files = sorted(glob.glob(f"{extract_path}/*.wav")) | |
jpg_files, wav_files | |
from moviepy.editor import ImageSequenceClip, AudioFileClip, concatenate_videoclips | |
# Create a list to store the clips | |
clips = [] | |
# Loop through each jpg and wav file | |
for jpg_file, wav_file in zip(jpg_files, wav_files): | |
# Load the audio file and get its duration | |
audio = AudioFileClip(wav_file) | |
duration = audio.duration | |
print(duration) | |
# Calculate the frame rate as the inverse of the duration | |
fps = 1 / duration if duration != 0 else 1 | |
# Create a video clip from the image and set its duration and fps to match the audio | |
clip = ImageSequenceClip([jpg_file], durations=[duration], fps=fps) | |
# Set the audio of the clip to the wav file | |
clip = clip.set_audio(audio) | |
# Add the clip to the list of clips | |
clips.append(clip) | |
# Concatenate all clips into a single video | |
video = concatenate_videoclips(clips) | |
video_path = f"{extract_path}/output.mp4" | |
# Write the video to a file | |
video.write_videofile(video_path) | |
return video_path | |
iface = gr.Interface( | |
fn=prompt_to_video, | |
inputs="text", | |
outputs=["file"], | |
title="Prompt to Video Tutorial", | |
description="Create a video tutorial to learn about anything!") | |
iface.launch() |