from langchain.llms import HuggingFacePipeline import torch from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM from components import caption_chain, tag_chain from components import pexels, utils import os import gradio as gr model = AutoModelForSeq2SeqLM.from_pretrained("declare-lab/flan-alpaca-gpt4-xl") tokenizer = AutoTokenizer.from_pretrained("declare-lab/flan-alpaca-gpt4-xl") pipe = pipeline( 'text2text-generation', model=model, tokenizer= tokenizer, max_length=120 ) local_llm = HuggingFacePipeline(pipeline=pipe) llm_chain = caption_chain.chain(llm=local_llm) sum_llm_chain = tag_chain.chain(llm=local_llm) pexels_api_key = os.getenv('pexels_api_key') def pred(product_name, orientation): folder_name, sentences = pexels.generate_videos(product_name, pexels_api_key, orientation, llm_chain, sum_llm_chain) utils.combine_videos(folder_name) return ["\n".join(sentences), os.path.join(folder_name, "Final_Ad_Video.mp4")] #{'video':os.path.join(folder_name, "Final_Ad_Video.mp4"), # 'captions':"\n".join(sentences)} with gr.Blocks() as demo: dimension = gr.Dropdown( ["Potrait", "Landscape", "Square"], label="Video Dimension", info="Choose orientation" ) product_name = gr.Textbox(label="Product Name") captions = gr.Textbox(label="captions") video = gr.Video() btn = gr.Button("Submit") btn.click(pred, inputs=[product_name, dimension], outputs=[captions,video]) demo.launch()