from langchain.llms import HuggingFacePipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM

from components import caption_chain, tag_chain
from components import pexels, utils
import os
import gradio as gr

model = AutoModelForSeq2SeqLM.from_pretrained("declare-lab/flan-alpaca-gpt4-xl")
tokenizer = AutoTokenizer.from_pretrained("declare-lab/flan-alpaca-gpt4-xl")

pipe = pipeline(
    'text2text-generation',
    model=model,
    tokenizer= tokenizer,
    max_length=120
)

local_llm = HuggingFacePipeline(pipeline=pipe)

llm_chain = caption_chain.chain(llm=local_llm)
sum_llm_chain = tag_chain.chain(llm=local_llm)

pexels_api_key = os.getenv('pexels_api_key')

def pred(product_name, orientation):
    folder_name, sentences = pexels.generate_videos(product_name, pexels_api_key, orientation, llm_chain, sum_llm_chain)
    utils.combine_videos(folder_name)
    return ["\n".join(sentences), os.path.join(folder_name, "Final_Ad_Video.mp4")]
        #{'video':os.path.join(folder_name, "Final_Ad_Video.mp4"),
       # 'captions':"\n".join(sentences)}
    

with gr.Blocks() as demo:
    dimension = gr.Dropdown(
            ["Potrait", "Landscape", "Square"], label="Video Dimension", info="Choose orientation"
        )
    product_name = gr.Textbox(label="Product Name")
    captions = gr.Textbox(label="captions")
    video = gr.Video()
    btn = gr.Button("Submit")
    btn.click(pred, inputs=[product_name, dimension], outputs=[captions,video])
    
    

demo.launch()