We need an `offload_dir` to dispatch this model according to this `device_map`

#3
by littleevillin - opened
from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("baichuan-inc/baichuan-7B", device_map="auto", trust_remote_code=True)
inputs = tokenizer('登鹳雀楼->王之涣\n夜雨寄北->', return_tensors='pt')
inputs = inputs.to('cuda:0')
pred = model.generate(**inputs, max_new_tokens=64,repetition_penalty=1.1)
print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True))

result:
登鹳雀楼->王之涣
夜雨寄北->李商隐
过零丁洋->文天祥
己亥杂诗(其五)->龚自珍
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
from peft import PeftModel

tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("baichuan-inc/baichuan-7B", device_map="auto", trust_remote_code=True)
model = PeftModel.from_pretrained(model, "hiyouga/baichuan-7b-sft")
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
query = "晚上睡不着怎么办"
inputs = tokenizer(["<human>:{}\n<bot>:".format(query)], return_tensors="pt")
inputs = inputs.to("cuda")
generate_ids = model.generate(**inputs, max_new_tokens=256, streamer=streamer)

result:
Traceback (most recent call last):
  File "model-sft.py", line 10, in <module>
    model = PeftModel.from_pretrained(model, "/root/.cache/huggingface/hub/models--hiyouga--baichuan-7b-sft/snapshots/64cf906a964bc94bf754cef8aa0d8c05107c7745")
  File "/usr/local/lib/python3.8/dist-packages/peft/peft_model.py", line 181, in from_pretrained
    model.load_adapter(model_id, adapter_name, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/peft/peft_model.py", line 406, in load_adapter
    dispatch_model(
  File "/usr/local/lib/python3.8/dist-packages/accelerate/big_modeling.py", line 345, in dispatch_model
    raise ValueError(
ValueError: We need an `offload_dir` to dispatch this model according to this `device_map`, the following submodules need to be offloaded: base_model.model.model.layers.18, base_model.model.model.layers.19, base_model.model.model.layers.20, base_model.model.model.layers.21, base_model.model.model.layers.22, base_model.model.model.layers.23, base_model.model.model.layers.24, base_model.model.model.layers.25, base_model.model.model.layers.26, base_model.model.model.layers.27, base_model.model.model.layers.28, base_model.model.model.layers.29, base_model.model.model.layers.30, base_model.model.model.layers.31, base_model.model.model.norm, base_model.model.lm_head.

update:model = PeftModel.from_pretrained(model, "hiyouga/baichuan-7b-sft", offload_folder='./')

littleevillin changed discussion status to closed

I encountered same problem, did you solve it?

I'm also facing the same issue here the code

from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

MODEL_NAME = "antony-pk/llama-3-8b-Instruct-bnb-4bit-e10-emp-gold-jul16"
offload_dir = "offload_directory"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_NAME,
#     max_seq_length = max_seq_length,
#     dtype = dtype,
    dtype=torch.float32,
    load_in_4bit = False,
#     offload_dir = offload_dir,  # Add the offload directory here
    device_map = "auto"  # Adjust this according to your needs
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

Error

==((====))==  Unsloth: Fast Llama patching release 2024.7
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.2+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.25.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Loading checkpoint shards: 100%
 4/4 [01:06<00:00, 14.34s/it]

generation_config.json: 100%
 131/131 [00:00<00:00, 13.3kB/s]
tokenizer_config.json: 100%
 51.1k/51.1k [00:00<00:00, 4.49MB/s]
tokenizer.json: 100%
 9.09M/9.09M [00:00<00:00, 50.2MB/s]
special_tokens_map.json: 100%
 459/459 [00:00<00:00, 39.9kB/s]
adapter_model.safetensors: 100%
 168M/168M [00:03<00:00, 40.9MB/s]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[4], line 10
      7 MODEL_NAME = "antony-pk/llama-3-8b-Instruct-bnb-4bit-e10-emp-gold-jul16"
      8 offload_dir = "offload_directory"
---> 10 model, tokenizer = FastLanguageModel.from_pretrained(
     11     model_name = MODEL_NAME,
     12 #     max_seq_length = max_seq_length,
     13 #     dtype = dtype,
     14     dtype=torch.float32,
     15     load_in_4bit = False,
     16 #     offload_dir = offload_dir,  # Add the offload directory here
     17     device_map = "auto"  # Adjust this according to your needs
     18     # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
     19 )

File /opt/conda/lib/python3.10/site-packages/unsloth/models/loader.py:223, in FastLanguageModel.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, token, device_map, rope_scaling, fix_tokenizer, trust_remote_code, use_gradient_checkpointing, resize_model_vocab, revision, *args, **kwargs)
    219 if is_peft:
    220     # From https://github.com/huggingface/peft/issues/184
    221     # Now add PEFT adapters
    222     model.enable_input_require_grads()
--> 223     model = PeftModel.from_pretrained(
    224         model,
    225         old_model_name,
    226         token = token,
    227         revision = revision,
    228         is_trainable = True,
    229     )
    230     # Patch it as well!
    231     model = dispatch_model.patch_peft_model(model, use_gradient_checkpointing)

File /opt/conda/lib/python3.10/site-packages/peft/peft_model.py:430, in PeftModel.from_pretrained(cls, model, model_id, adapter_name, is_trainable, config, **kwargs)
    428 else:
    429     model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config, adapter_name)
--> 430 model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
    431 return model

File /opt/conda/lib/python3.10/site-packages/peft/peft_model.py:1025, in PeftModel.load_adapter(self, model_id, adapter_name, is_trainable, torch_device, **kwargs)
   1022 self._update_offload(offload_index, adapters_weights)
   1023 dispatch_model_kwargs["offload_index"] = offload_index
-> 1025 dispatch_model(
   1026     self,
   1027     device_map=device_map,
   1028     offload_dir=offload_dir,
   1029     **dispatch_model_kwargs,
   1030 )
   1032 hook = AlignDevicesHook(io_same_device=True)
   1033 if self.peft_config[adapter_name].is_prompt_learning:

File /opt/conda/lib/python3.10/site-packages/accelerate/big_modeling.py:376, in dispatch_model(model, device_map, main_device, state_dict, offload_dir, offload_index, offload_buffers, skip_keys, preload_module_classes, force_hooks)
    374 disk_modules = [name for name, device in device_map.items() if device == "disk"]
    375 if offload_dir is None and offload_index is None and len(disk_modules) > 0:
--> 376     raise ValueError(
    377         "We need an `offload_dir` to dispatch this model according to this `device_map`, the following submodules "
    378         f"need to be offloaded: {', '.join(disk_modules)}."
    379     )
    380 if (
    381     len(disk_modules) > 0
    382     and offload_index is None
    383     and (not os.path.isdir(offload_dir) or not os.path.isfile(os.path.join(offload_dir, "index.json")))
    384 ):
    385     disk_state_dict = extract_submodules_state_dict(model.state_dict(), disk_modules)

ValueError: We need an `offload_dir` to dispatch this model according to this `device_map`, the following submodules need to be offloaded: base_model.model.model.layers.9, base_model.model.model.layers.10, base_model.model.model.layers.11, base_model.model.model.layers.12, base_model.model.model.layers.13, base_model.model.model.layers.14, base_model.model.model.layers.15, base_model.model.model.layers.16, base_model.model.model.layers.17, base_model.model.model.layers.18, base_model.model.model.layers.19, base_model.model.model.layers.20, base_model.model.model.layers.21, base_model.model.model.layers.22, base_model.model.model.layers.23, base_model.model.model.layers.24, base_model.model.model.layers.25, base_model.model.model.layers.26, base_model.model.model.layers.27, base_model.model.model.layers.28, base_model.model.model.layers.29, base_model.model.model.layers.30, base_model.model.model.layers.31, base_model.model.model.norm, base_model.model.lm_head.

Sign up or log in to comment