-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Description
Traceback (most recent call last): File "/home/dugis/shihaofeng/llm/ms-swift/swift/cli/export.py", line 5, in <module> export_main() File "/home/dugis/shihaofeng/llm/ms-swift/swift/llm/export/export.py", line 53, in export_main return SwiftExport(args).main() ^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/shihaofeng/llm/ms-swift/swift/llm/base.py", line 49, in main result = self.run() ^^^^^^^^^^ File "/home/dugis/shihaofeng/llm/ms-swift/swift/llm/export/export.py", line 30, in run quantize_model(args) File "/home/dugis/shihaofeng/llm/ms-swift/swift/llm/export/quant.py", line 287, in quantize_model QuantEngine(args).quantize() File "/home/dugis/shihaofeng/llm/ms-swift/swift/llm/export/quant.py", line 45, in quantize gptq_quantizer = self.gptq_model_quantize(v2=(args.quant_method == 'gptq_v2')) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/shihaofeng/llm/ms-swift/swift/llm/export/quant.py", line 281, in gptq_model_quantize gptq_quantizer.quantize_model(self.model, self.tokenizer) File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/optimum/gptq/quantizer.py", line 634, in quantize_model block(*layer_inputs[j], **layer_input_kwargs[j]) File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/transformers/modeling_layers.py", line 83, in __call__ return super().__call__(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/accelerate/hooks.py", line 175, in new_forward output = module._old_forward(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/transformers/models/qwen3/modeling_qwen3.py", line 262, in forward hidden_states, self_attn_weights = self.self_attn( ^^^^^^^^^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/accelerate/hooks.py", line 175, in new_forward output = module._old_forward(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/transformers/models/qwen3/modeling_qwen3.py", line 201, in forward query_states = self.q_norm(self.q_proj(hidden_states).view(hidden_shape)).transpose(1, 2) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl return self._call_impl(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1857, in _call_impl return inner() ^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1818, in inner hook_result = hook(self, args, result) ^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/optimum/gptq/quantizer.py", line 620, in tmp gptq[name].add_batch(input[0].data, output.data) File "/home/dugis/miniforge3/envs/ms-swift/lib/python3.11/site-packages/auto_gptq/quantization/gptq.py", line 60, in add_batch self.H += inp.matmul(inp.t()) RuntimeError: CUDA error: an illegal memory access was encountered CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1 Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
命令:
PYTORCH_CUDA_ALLOC_CONF='expandable_segments:True' \ OMP_NUM_THREADS=14 \ CUDA_VISIBLE_DEVICES=0,1,2,3 \ swift export \ --model /home/dugis/shihaofeng/llm/ms-swift/sh/output/v10-20251209-194815/checkpoint-2996-merged \ --dataset '/home/dugis/shihaofeng/llm/ali_address-res-cot.jsonl' \ --quant_n_samples 256 \ --quant_batch_size 1 \ --max_length 8124 \ --quant_method gptq \ --quant_bits 4 \ --output_dir Qwen3-8B-GPTQ-Int4-cot