import os import gradio as gr from huggingface_hub import login from transformers import AutoModelForCausalLM, AutoTokenizer # Fetch API token from environment variable api_token = os.getenv("Llama_Token") # Authenticate with Hugging Face login(api_token) # Load LLaMA 3.2 model and tokenizer with the API token model_name = "meta-llama/Llama-3.2-1B" tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token) model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token) pipe = pipeline("text-generation", model=model_id, torch_dtype=torch.bfloat16, device_map="auto") pipe("How are you doing?") # # Define the inference function # def generate_text(prompt, max_length, temperature): # inputs = tokenizer(prompt, return_tensors="pt") # output = model.generate(inputs['input_ids'], max_length=max_length, temperature=temperature) # return tokenizer.decode(output[0], skip_special_tokens=True) # # Create the Gradio interface # iface = gr.Interface( # fn=generate_text, # inputs=[ # gr.Textbox(label="Enter your prompt", placeholder="Start typing..."), # gr.Slider(minimum=50, maximum=200, label="Max Length", value=100), # gr.Slider(minimum=0.1, maximum=1.0, label="Temperature", value=0.7), # ], # outputs="text", # title="LLaMA 3.2 Text Generator", # description="Enter a prompt to generate text using the LLaMA 3.2 model.", # ) # # Launch the Gradio app # iface.launch()