Last active
July 15, 2023 22:49
-
-
Save filmo/6720092a1ceac129f52402dc61af0f5a to your computer and use it in GitHub Desktop.
mat1 and mat2 shapes cannot be multiplied (trying to finetune llama-7b using https://huggingface.co/timdettmers/guanaco-33b)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
0%| | 0/1000 [00:00<?, ?it/s]You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding. | |
Traceback (most recent call last): | |
File "/home/philglau/PycharmProjects/tokenizersLLM/medium_article_falcon7b.py", line 87, in <module> | |
trainer.train() | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/transformers/trainer.py", line 1645, in train | |
return inner_training_loop( | |
^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/transformers/trainer.py", line 1938, in _inner_training_loop | |
tr_loss_step = self.training_step(model, inputs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/transformers/trainer.py", line 2759, in training_step | |
loss = self.compute_loss(model, inputs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/transformers/trainer.py", line 2784, in compute_loss | |
outputs = model(**inputs) | |
^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl | |
return forward_call(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/parallel/data_parallel.py", line 171, in forward | |
outputs = self.parallel_apply(replicas, inputs, kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/parallel/data_parallel.py", line 181, in parallel_apply | |
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/parallel/parallel_apply.py", line 89, in parallel_apply | |
output.reraise() | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/_utils.py", line 644, in reraise | |
raise exception | |
RuntimeError: Caught RuntimeError in replica 0 on device 0. | |
Original Traceback (most recent call last): | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/parallel/parallel_apply.py", line 64, in _worker | |
output = module(*input, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl | |
return forward_call(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/peft/peft_model.py", line 678, in forward | |
return self.base_model( | |
^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl | |
return forward_call(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/accelerate/hooks.py", line 165, in new_forward | |
output = old_forward(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b/378337427557d1df3e742264a2901a49f25d4eb1/modelling_RW.py", line 753, in forward | |
transformer_outputs = self.transformer( | |
^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl | |
return forward_call(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/accelerate/hooks.py", line 165, in new_forward | |
output = old_forward(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b/378337427557d1df3e742264a2901a49f25d4eb1/modelling_RW.py", line 648, in forward | |
outputs = block( | |
^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl | |
return forward_call(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/accelerate/hooks.py", line 165, in new_forward | |
output = old_forward(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b/378337427557d1df3e742264a2901a49f25d4eb1/modelling_RW.py", line 385, in forward | |
attn_outputs = self.self_attention( | |
^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl | |
return forward_call(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/accelerate/hooks.py", line 165, in new_forward | |
output = old_forward(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/.cache/huggingface/modules/transformers_modules/tiiuae/falcon-7b/378337427557d1df3e742264a2901a49f25d4eb1/modelling_RW.py", line 242, in forward | |
fused_qkv = self.query_key_value(hidden_states) # [batch_size, seq_length, 3 x hidden_size] | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl | |
return forward_call(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/philglau/anaconda3/envs/pytorch_hug_llm_203/lib/python3.11/site-packages/peft/tuners/lora.py", line 565, in forward | |
result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
RuntimeError: mat1 and mat2 shapes cannot be multiplied (512x4544 and 1x10614784) | |
0%| | 0/1000 [00:01<?, ?it/s] | |
Process finished with exit code 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
System: Fresh Ubuntu 22.04 as of 2023-07-12 Nvidia Drivers 535.54.03
PyTorch installed with Cuda 11.7 via conda
All other required packages installed via PIP inside my new pytorch environment.