from langchain.llms import HuggingFacePipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM

from components import caption_chain, tag_chain
from components import pexels, utils
import os
import gradio as gr

model = AutoModelForSeq2SeqLM.from_pretrained("declare-lab/flan-alpaca-gpt4-xl")
tokenizer = AutoTokenizer.from_pretrained("declare-lab/flan-alpaca-gpt4-xl")

pipe = pipeline(
    'text2text-generation',
    model=model,
    tokenizer= tokenizer,
    max_length=120
)

local_llm = HuggingFacePipeline(pipeline=pipe)

llm_chain = caption_chain.chain(llm=local_llm)
sum_llm_chain = tag_chain.chain(llm=local_llm)

pexels_api_key = os.getenv('pexels_api_key')

def pred(product_name, orientation):
    folder_name, sentences = pexels.generate_videos(product_name, pexels_api_key, orientation, llm_chain, sum_llm_chain)
    utils.combine_videos(folder_name)
    return {
        'video':folder_name, 
        'captions':"\n".join(sentences)
    }

with gr.Blocks() as demo:
    dimension = gr.Dropdown(
            ["Potrait", "Landscape", "Square"], label="Video Dimension", info="Choose orientation"
        )
    product_name = gr.Textbox(placeholder="Product Name")
    captions = gr.Textbox()
    video = gr.Video()
    btn = gr.Button("Submit")
    btn.click(pred, inputs=[product_name, dimension], outputs=[captions,video])
    
    

demo.launch()