import os import gradio as gr from huggingface_hub import login from transformers import AutoModelForCausalLM, AutoTokenizer # Fetch API token from environment variable api_token = os.getenv("Llama_Token") # Authenticate with Hugging Face login(api_token) # Load LLaMA 3.2 model and tokenizer with the API token model_name = "meta-llama/Llama-3.2-1B" tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token) model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token) # Define the inference function def generate_text(prompt, max_length, temperature): inputs = tokenizer(prompt, return_tensors="pt") output = model.generate(inputs['input_ids'], max_length=max_length, temperature=temperature) return tokenizer.decode(output[0], skip_special_tokens=True) # Create the Gradio interface iface = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(label="Enter your prompt", placeholder="Start typing..."), gr.Slider(minimum=50, maximum=200, label="Max Length", value=100), gr.Slider(minimum=0.1, maximum=1.0, label="Temperature", value=0.7), ], outputs="text", title="LLaMA 3.2 Text Generator", description="Enter a prompt to generate text using the LLaMA 3.2 model.", ) # Launch the Gradio app iface.launch()