Ellight commited on
Commit
4a47734
1 Parent(s): 41d1dbb

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -0
README.md CHANGED
@@ -28,12 +28,18 @@ Hindi-Gemma-2B-instruct is an instruction-tuned Hindi large language model (LLM)
28
  # TO do inference using the LORA adapters
29
 
30
  from unsloth import FastLanguageModel
 
31
  model, tokenizer = FastLanguageModel.from_pretrained(
 
32
  model_name = "Ellight/gemma-2b-bnb-4bit", # YOUR MODEL YOU USED FOR TRAINING
 
33
  max_seq_length = max_seq_length,
 
34
  dtype = dtype,
 
35
  load_in_4bit = load_in_4bit,
36
  )
 
37
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
38
 
39
  prompt = """
@@ -53,4 +59,5 @@ inputs = tokenizer(
53
  ], return_tensors = "pt").to("cuda")
54
 
55
  outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
 
56
  tokenizer.batch_decode(outputs)
 
28
  # TO do inference using the LORA adapters
29
 
30
  from unsloth import FastLanguageModel
31
+
32
  model, tokenizer = FastLanguageModel.from_pretrained(
33
+
34
  model_name = "Ellight/gemma-2b-bnb-4bit", # YOUR MODEL YOU USED FOR TRAINING
35
+
36
  max_seq_length = max_seq_length,
37
+
38
  dtype = dtype,
39
+
40
  load_in_4bit = load_in_4bit,
41
  )
42
+
43
  FastLanguageModel.for_inference(model) # Enable native 2x faster inference
44
 
45
  prompt = """
 
59
  ], return_tensors = "pt").to("cuda")
60
 
61
  outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
62
+
63
  tokenizer.batch_decode(outputs)