# Install necessary libraries # !pip install transformers from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch # Model and prompt details model_name = "mlabonne/llama-2-7b-guanaco" prompt = "What is a large language model?" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto", offload_folder="offload/folder" # Replace with the path to the offload folder ) # Generate text using the provided prompt sequences = pipeline( "text-generation", model=model, tokenizer=tokenizer, device=0, # Change to the appropriate device index or "cuda" if using GPU )( f'[INST] {prompt} [/INST]', do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, max_length=200, ) # Print the generated text for seq in sequences: print(f"Generated Text: {seq['generated_text']}")