mcysqrd
/

MODULARMOJO_Mistral_V1

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

mcysqrd commited on Dec 3, 2023

Commit

b171526

•

1 Parent(s): 0901a59

Update README.md

Files changed (1) hide show

README.md +29 -13

README.md CHANGED Viewed

@@ -19,17 +19,33 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 device = "cuda" # the device to load the model onto
-model = AutoModelForCausalLM.from_pretrained("mcysqrd/MODULARMOJO_Mistral_V1")
-tokenizer = AutoTokenizer.from_pretrained("mcysqrd/MODULARMOJO_Mistral_V1")
-message = "What can you tell me about mojo roadmap for Scoping and mutability of statement variables?"
-encodeds = tokenizer.apply_chat_template(message, return_tensors="pt")
-model_inputs = encodeds.to(device)
-model.to(device)
-generated_ids = model.generate(model_inputs, max_new_tokens=1650, do_sample=True, temperature = 0.01)
-decoded = tokenizer.batch_decode(generated_ids)
-print(decoded[0])
 ```

 device = "cuda" # the device to load the model onto
+model_name = "mcysqrd/MODULARMOJO_Mistral_V1"
+model = AutoModelForCausalLM.from_pretrained(model_name,
+                                             use_flash_attention_2=True,
+                                             max_memory={0: "24GB"},
+                                             device_map="auto",
+                                             trust_remote_code=True,
+                                             low_cpu_mem_usage=True,
+                                             return_dict=True,
+                                             torch_dtype=torch.bfloat16,
+                                            )
+eval_prompt = """ what can you tell me about MODULAR_MOJO mojo_roadmap Scoping and mutability of statement variables ? """
+tokenizer = AutoTokenizer.from_pretrained(model_name,add_bos_token=True,trust_remote_code=True)
+model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
+model_to_save.config.use_cache = True
+def stream(user_prompt):
+    runtimeFlag = "cuda:0"
+    system_prompt = 'The following is an excerpt from MODULAR_MOJO from the section on roadmap.'
+    B_INST, E_INST = "[INST]", "[/INST]"
+    prompt = f"{system_prompt}{B_INST}{user_prompt.strip()}\n{E_INST}"
+    inputs = tokenizer([prompt], return_tensors="pt").to(runtimeFlag)
+    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    _ = model.generate(**inputs, streamer=streamer, max_new_tokens=200)
+stream("What can you tell me about MODULAR_MOJO mojo_roadmap Scoping and mutability of statement variables?")
 ```