Converting into 4-bit or 8-bit weights from tf/flax weights is currently not supported
def llm(messages):
bnb_config = BitsAndBytesConfig(
load_in_8bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
quantization_config=bnb_config,
)
#load the model
pipe = pipeline("text-generation", model=model,device_map="auto",model_kwargs={"torch_dtype": torch.bfloat16})
#prompt template
prompt = pipe.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt = True,
)
print("prompt",prompt)
#identify the end of text
terminators = [
pipe.tokenizer.eos_token_id,
pipe.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
response = pipe(
prompt,
max_new_tokens= 256,
eos_token_id = terminators,
do_sample = True, # text generation stragaries
temperature = 0.6,
top_p = 0,
)
return response[0]["generated_text"][len(prompt):]
messages = [{"role": "user", "content": "1+1?"}]
llm_output = llm(messages)
print(llm_output)
gives the following error:
File ~/.local/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:564, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
...
73 "Converting into 4-bit or 8-bit weights from tf/flax weights is currently not supported, please make"
74 " sure the weights are in PyTorch format."
75 )
ImportError: Using bitsandbytes
8-bit quantization requires the latest version of bitsandbytes: pip install -U bitsandbytes
Anyone knows how to fix it?