getting RuntimeError: weight model.layers.0.self_attn.rotary_emb.inv_freq does not exist
I am new to ML, and trying to deploy this model on Amazon Sagemaker, using the deploy script, Can someone guide
I am seeing error - RuntimeError: weight model.layers.0.self_attn.rotary_emb.inv_freq does not exist
Traceback (most recent call last):
File "/opt/conda/bin/text-generation-server", line 8, in
sys.exit(app())
File "/opt/conda/lib/python3.9/site-packages/typer/main.py", line 311, in call
return get_command(self)(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1130, in call
return self.main(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/typer/core.py", line 778, in main
return _main(
File "/opt/conda/lib/python3.9/site-packages/typer/core.py", line 216, in _main
rv = self.invoke(ctx)
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/typer/main.py", line 683, in wrapper
return callback(**use_params) # type: ignore
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/cli.py", line 78, in serve
server.serve(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/server.py", line 175, in serve
asyncio.run(
File "/opt/conda/lib/python3.9/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 634, in run_until_complete
self.run_forever()
File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
self._run_once()
File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
handle._run()
File "/opt/conda/lib/python3.9/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
Traceback (most recent call last): File "/opt/conda/bin/text-generation-server", line 8, in sys.exit(app()) File "/opt/conda/lib/python3.9/site-packages/typer/main.py", line 311, in call return get_command(self)(*args, **kwargs) File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1130, in call return self.main(*args, **kwargs) File "/opt/conda/lib/python3.9/site-packages/typer/core.py", line 778, in main return _main( File "/opt/conda/lib/python3.9/site-packages/typer/core.py", line 216, in _main rv = self.invoke(ctx) File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1657, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1404, in invoke return ctx.invoke(self.callback, **ctx.params) File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 760, in invoke return __callback(*args, **kwargs) File "/opt/conda/lib/python3.9/site-packages/typer/main.py", line 683, in wrapper return callback(**use_params) # type: ignore File "/opt/conda/lib/python3.9/site-packages/text_generation_server/cli.py", line 78, in serve server.serve( File "/opt/conda/lib/python3.9/site-packages/text_generation_server/server.py", line 175, in serve asyncio.run( File "/opt/conda/lib/python3.9/asyncio/runners.py", line 44, in run return loop.run_until_complete(main) File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 634, in run_until_complete self.run_forever() File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 601, in run_forever self._run_once() File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once handle._run() File "/opt/conda/lib/python3.9/asyncio/events.py", line 80, in _run self._context.run(self._callback, *self._args)
2023-09-07T01:02:07.830-04:00
Copy
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/server.py", line 142, in serve_inner
model = get_model(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/init.py", line 185, in get_model
return FlashLlama(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/flash_llama.py", line 65, in init
model = FlashLlamaForCausalLM(config, weights)
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 452, in init
self.model = FlashLlamaModel(config, weights)
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 390, in init
[
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 391, in
FlashLlamaLayer(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 326, in init
self.self_attn = FlashLlamaAttention(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 183, in init
self.rotary_emb = PositionRotaryEmbedding.load(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/utils/layers.py", line 395, in load
inv_freq = weights.get_tensor(f"{prefix}.inv_freq")
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/utils/weights.py", line 62, in get_tensor
filename, tensor_name = self.get_filename(tensor_name)
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/utils/weights.py", line 49, in get_filename
raise RuntimeError(f"weight {tensor_name} does not exist")
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/server.py", line 142, in serve_inner model = get_model( File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/init.py", line 185, in get_model return FlashLlama( File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/flash_llama.py", line 65, in init model = FlashLlamaForCausalLM(config, weights) File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 452, in init self.model = FlashLlamaModel(config, weights) File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 390, in init [ File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 391, in FlashLlamaLayer( File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 326, in init self.self_attn = FlashLlamaAttention( File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 183, in init self.rotary_emb = PositionRotaryEmbedding.load( File "/opt/conda/lib/python3.9/site-packages/text_generation_server/utils/layers.py", line 395, in load inv_freq = weights.get_tensor(f"{prefix}.inv_freq") File "/opt/conda/lib/python3.9/site-packages/text_generation_server/utils/weights.py", line 62, in get_tensor filename, tensor_name = self.get_filename(tensor_name) File "/opt/conda/lib/python3.9/site-packages/text_generation_server/utils/weights.py", line 49, in get_filename raise RuntimeError(f"weight {tensor_name} does not exist")
2023-09-07T01:02:08.580-04:00
Copy
RuntimeError: weight model.layers.0.self_attn.rotary_emb.inv_freq does not exist
RuntimeError: weight model.layers.0.self_attn.rotary_emb.inv_freq does not exist
I'm sorry but without code examples it's a little difficult to diagnose what you're issue is. The trouble lies in not knowing why you're trying to use weight.model.layers.0.self_attn.rotary_emb.inv_freq. These are what are in the checkpoints, that can be run: "transformer.word_embeddings", "transformer.word_embeddings_layernorm", "model.embed_tokens.weight", "model.layers.0.self_attn.q_proj.weight", "model.layers.0.self_attn.k_proj.weight","model.layers.0.self_attn.v_proj.weight","model.layers.0.self_attn.o_proj.weight","model.layers.0.mlp.gate_proj.weight","model.layers.0.mlp.up_proj.weight","model.layers.0.mlp.down_proj.weight","model.layers.0.input_layernorm.weight", the same layers but instead of 0 they repeat up until 49, "model.norm.weight","lm_head","transformer.h","transformer.ln_f", not sure if there are more but I'm not getting any errors from not running them. That being said unless you're doing something similar to me where I'm running a specific quantization and need to set a custom device_map you should be able to simply run: model_path = "Phind/Phind-CodeLlama-34B-v2"
model = LlamaForCausalLM.from_pretrained(model_path, quantization_config=bnb_config, device_map="auto")
Also there's no guarantee that the device_map is actually your problem but given that it's trying to run layers I'm guessing it's trying to map those improperly.