import os | |
import gradio as gr | |
from huggingface_hub import login | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
# Fetch API token from environment variable | |
api_token = os.getenv("Llama_Token") | |
# Authenticate with Hugging Face | |
login(api_token) | |
# Load LLaMA 3.2 model and tokenizer with the API token | |
model_name = "meta-llama/Llama-3.2-1B" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token) | |
model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token) | |
pipe = pipeline("text-generation", model=model_id, torch_dtype=torch.bfloat16, device_map="auto") | |
pipe("How are you doing?") | |
# # Define the inference function | |
# def generate_text(prompt, max_length, temperature): | |
# inputs = tokenizer(prompt, return_tensors="pt") | |
# output = model.generate(inputs['input_ids'], max_length=max_length, temperature=temperature) | |
# return tokenizer.decode(output[0], skip_special_tokens=True) | |
# # Create the Gradio interface | |
# iface = gr.Interface( | |
# fn=generate_text, | |
# inputs=[ | |
# gr.Textbox(label="Enter your prompt", placeholder="Start typing..."), | |
# gr.Slider(minimum=50, maximum=200, label="Max Length", value=100), | |
# gr.Slider(minimum=0.1, maximum=1.0, label="Temperature", value=0.7), | |
# ], | |
# outputs="text", | |
# title="LLaMA 3.2 Text Generator", | |
# description="Enter a prompt to generate text using the LLaMA 3.2 model.", | |
# ) | |
# # Launch the Gradio app | |
# iface.launch() | |