homebrewltd
/

llama3-s-2024-07-08

Text Generation

sound language model

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

HoangHa commited on Jul 19

Commit

f707e6b

•

1 Parent(s): f66c9a1

Update README.md

Files changed (1) hide show

README.md +3 -1

README.md CHANGED Viewed

@@ -78,7 +78,7 @@ def setup_pipeline(model_path, use_4bit=True):
     tokenizer = AutoTokenizer.from_pretrained(model_path)
     model_kwargs = {"device_map": "auto"}
     if use_4bit:
         model_kwargs["quantization_config"] = BitsAndBytesConfig(
             load_in_4bit=True,
@@ -86,6 +86,8 @@ def setup_pipeline(model_path, use_4bit=True):
             bnb_4bit_use_double_quant=True,
             bnb_4bit_quant_type="nf4",
         )
     model = AutoModelForCausalLM.from_pretrained(model_path, **model_kwargs)

     tokenizer = AutoTokenizer.from_pretrained(model_path)
     model_kwargs = {"device_map": "auto"}
     if use_4bit:
         model_kwargs["quantization_config"] = BitsAndBytesConfig(
             load_in_4bit=True,
             bnb_4bit_use_double_quant=True,
             bnb_4bit_quant_type="nf4",
         )
+    else:
+        model_kwargs["torch_dtype"] = torch.bfloat16
     model = AutoModelForCausalLM.from_pretrained(model_path, **model_kwargs)