Deepseek v3.2模型在MindIE框架下部署指导
DeepSeek-V3.2是一款在高计算效率与卓越推理能力间取得突破性平衡的先进模型,基于以下关键技术突破:DeepSeek 稀疏注意力(DSA): 引入DSA高效的注意力机制,它显著降低了计算复杂性,同时保持了模型性能,特别针对长上下文场景进行了优化。可扩展的强化学习框架: 通过实施强大的 RL 协议并扩展后训练计算,DeepSeek-V3.2 的表现与 GPT-5 相当。
昇腾实战派
更多内容可查看【DeepSeek推理部署】知识地图
1. 模型概述及场景
DeepSeek-V3.2是一款在高计算效率与卓越推理能力间取得突破性平衡的先进模型,基于以下关键技术突破:
DeepSeek 稀疏注意力(DSA): 引入DSA高效的注意力机制,它显著降低了计算复杂性,同时保持了模型性能,特别针对长上下文场景进行了优化。
可扩展的强化学习框架: 通过实施强大的 RL 协议并扩展后训练计算,DeepSeek-V3.2 的表现与 GPT-5 相当。
w8a8权重下载链接:https://modelers.cn/models/Eco-Tech/DeepSeek-V3.2-w8a8-QuaRot/
2. 准备运行环境
2.1 硬件版本
| 组件 | 版本 |
|---|---|
| 硬件环境 | Atlas 800T A2(16卡) |
2.2 软件版本
| 组件 | 版本 |
|---|---|
| MindIE | 2.2.T32 |
| HDK | Ascend HDK 25.2.1 |
| CANN | 8.3.RC1 |
| 模型 | DeepSeek V3.2 |
3. 运行指导
3.1 获取镜像
镜像获取链接:https://www.hiascend.com/developer/ascendhub/detail/af85b724a7e5469ebd7ea13c3439d48f
3.2 创建容器
docker run -itd --privileged --name=mindie-test --net=host \
--shm-size 500g \
--device=/dev/davinci0 \
--device=/dev/davinci1 \
--device=/dev/davinci2 \
--device=/dev/davinci3 \
--device=/dev/davinci4 \
--device=/dev/davinci5 \
--device=/dev/davinci6 \
--device=/dev/davinci7 \
--device=/dev/davinci_manager \
--device=/dev/hisi_hdc \
--device /dev/devmm_svm \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \
-v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \
-v /usr/local/sbin:/usr/local/sbin \
-v /etc/hccn.conf:/etc/hccn.conf \
-v /home:/home \
-v /disk1:/disk1 \
-v /disk2:/disk2 \
-v /disk3:/disk3 \
-v /opt:/opt \
-v /home:/home \
--entrypoint /bin/bash mindiexxx
3.3 生成ranktable表
{
"version": "1.0",
"server_count": "2",
"server_list": [
{
"server_id": "xxx",
"container_ip": "xxx",
"device": [
{
"device_id": "0",
"device_ip": "xxx",
"rank_id": "0"
},
{
"device_id": "1",
"device_ip": "xxx",
"rank_id": "1"
},
{
"device_id": "2",
"device_ip": "xxx",
"rank_id": "2"
},
{
"device_id": "3",
"device_ip": "xxx",
"rank_id": "3"
},
{
"device_id": "4",
"device_ip": "xxx",
"rank_id": "4"
},
{
"device_id": "5",
"device_ip": "xxx",
"rank_id": "5"
},
{
"device_id": "6",
"device_ip": "xxx",
"rank_id": "6"
},
{
"device_id": "7",
"device_ip": "xxx",
"rank_id": "7"
}
],
"host_nic_ip": "reserve"
},
{
"server_id": "xxx",
"container_ip": "xxx",
"device": [
{
"device_id": "0",
"device_ip": "xxx",
"rank_id": "8"
},
{
"device_id": "1",
"device_ip": "xxx",
"rank_id": "9"
},
{
"device_id": "2",
"device_ip": "xxx",
"rank_id": "10"
},
{
"device_id": "3",
"device_ip": "xxx",
"rank_id": "11"
},
{
"device_id": "4",
"device_ip": "xxx",
"rank_id": "12"
},
{
"device_id": "5",
"device_ip": "xxx",
"rank_id": "13"
},
{
"device_id": "6",
"device_ip": "xxx",
"rank_id": "14"
},
{
"device_id": "7",
"device_ip": "xxx",
"rank_id": "15"
}
],
"host_nic_ip": "reserve"
}
],
"status": "completed"
}
3.4 修改服务化参数
供参考:
3.4.1 主节点:
{
"BackendConfig": {
"ModelDeployConfig": {
"ModelConfig": [
{
"backendType": "atb",
"cpuMemSize": 5,
"dp": 1,
"enable_warmup_with_sampling": false,
"ignore_eos": true,
"modelInstanceType": "Standard",
"modelName": "m_model",
"modelWeightPath": "/disk2/models/dsv3.2",
"models": {
"deepseekv2": {
"enable_mlapo_prefetch": true,
"kv_cache_options": {
"enable_nz": true
}
}
},
"moe_ep": 1,
"moe_tp": 16,
"npuMemSize": 4,
"plugin_params": "{\"plugin_type\":\"mtp\",\"num_speculative_tokens\": 1}",
"sp": 1,
"tp": 16,
"trustRemoteCode": false,
"worldSize": 8
}
],
"maxInputTokenLen": 8384,
"maxSeqLen": 8384,
"truncation": false
},
"ScheduleConfig": {
"cacheBlockSize": 128,
"decodePolicyType": 0,
"decodeTimeMsPerReq": 50,
"maxBatchSize": 200,
"maxIterTimes": 8384,
"maxPreemptCount": 0,
"maxPrefillBatchSize": 1,
"maxPrefillTokens": 8384,
"maxQueueDelayMicroseconds": 5000,
"prefillPolicyType": 0,
"prefillTimeMsPerReq": 150,
"supportSelectBatch": false,
"templateName": "Standard_LLM",
"templateType": "Standard"
},
"backendName": "mindieservice_llm_engine",
"interNodeKmcKsfMaster": "tools/pmt/master/ksfa",
"interNodeKmcKsfStandby": "tools/pmt/standby/ksfb",
"interNodeTLSEnabled": false,
"interNodeTlsCaFiles": [
"ca.pem"
],
"interNodeTlsCaPath": "security/grpc/ca/",
"interNodeTlsCert": "security/grpc/certs/server.pem",
"interNodeTlsCrlFiles": [
"server_crl.pem"
],
"interNodeTlsCrlPath": "security/grpc/certs/",
"interNodeTlsPk": "security/grpc/keys/server.key.pem",
"interNodeTlsPkPwd": "security/grpc/pass/mindie_server_key_pwd.txt",
"modelInstanceNumber": 1,
"multiNodesInferEnabled": true,
"multiNodesInferPort": 1120,
"npuDeviceIds": [
[
0,
1,
2,
3,
4,
5,
6,
7
]
],
"tokenizerProcessNumber": 8
},
"LogConfig": {
"dynamicLogLevel": "",
"dynamicLogLevelValidHours": 2,
"dynamicLogLevelValidTime": ""
},
"ServerConfig": {
"allowAllZeroIpListening": false,
"distDPServerEnabled": false,
"e2eTimeout": 3600,
"fullTextEnabled": false,
"httpsEnabled": false,
"inferMode": "standard",
"interCommPk": "security/grpc/keys/server.key.pem",
"interCommPkPwd": "security/grpc/pass/key_pwd.txt",
"interCommPort": 1121,
"interCommTLSEnabled": false,
"interCommTlsCaFiles": [
"ca.pem"
],
"interCommTlsCaPath": "security/grpc/ca/",
"interCommTlsCert": "security/grpc/certs/server.pem",
"interCommTlsCrlFiles": [
"server_crl.pem"
],
"interCommTlsCrlPath": "security/grpc/certs/",
"ipAddress": "此次为MASTER_IP",
"kmcKsfMaster": "tools/pmt/master/ksfa",
"kmcKsfStandby": "tools/pmt/standby/ksfb",
"managementIpAddress": "127.0.0.2",
"managementPort": 1026,
"managementTlsCaFile": [
"management_ca.pem"
],
"managementTlsCert": "security/certs/management/server.pem",
"managementTlsCrlFiles": [
"server_crl.pem"
],
"managementTlsCrlPath": "security/management/certs/",
"managementTlsPk": "security/keys/management/server.key.pem",
"managementTlsPkPwd": "security/pass/management/key_pwd.txt",
"maxLinkNum": 1000,
"metricsPort": 1027,
"openAiSupport": "vllm",
"port": 8080,
"tlsCaFile": [
"ca.pem"
],
"tlsCaPath": "security/ca/",
"tlsCert": "security/certs/server.pem",
"tlsCrlFiles": [
"server_crl.pem"
],
"tlsCrlPath": "security/certs/",
"tlsPk": "security/keys/server.key.pem",
"tlsPkPwd": "security/pass/key_pwd.txt",
"tokenTimeout": 3600
},
"Version": "1.0.0"
}
3.4.2 从节点:
{
"BackendConfig": {
"ModelDeployConfig": {
"ModelConfig": [
{
"backendType": "atb",
"cpuMemSize": 5,
"dp": 1,
"enable_warmup_with_sampling": false,
"ignore_eos": true,
"modelInstanceType": "Standard",
"modelName": "m_model",
"modelWeightPath": "/disk2/models/dsv3.2",
"models": {
"deepseekv2": {
"enable_mlapo_prefetch": true,
"kv_cache_options": {
"enable_nz": true
}
}
},
"moe_ep": 1,
"moe_tp": 16,
"npuMemSize": 4,
"plugin_params": "{\"plugin_type\":\"mtp\",\"num_speculative_tokens\": 1}",
"sp": 1,
"tp": 16,
"trustRemoteCode": false,
"worldSize": 8
}
],
"maxInputTokenLen": 8384,
"maxSeqLen": 8384,
"truncation": false
},
"ScheduleConfig": {
"cacheBlockSize": 128,
"decodePolicyType": 0,
"decodeTimeMsPerReq": 50,
"maxBatchSize": 200,
"maxIterTimes": 8384,
"maxPreemptCount": 0,
"maxPrefillBatchSize": 1,
"maxPrefillTokens": 8384,
"maxQueueDelayMicroseconds": 5000,
"prefillPolicyType": 0,
"prefillTimeMsPerReq": 150,
"supportSelectBatch": false,
"templateName": "Standard_LLM",
"templateType": "Standard"
},
"backendName": "mindieservice_llm_engine",
"interNodeKmcKsfMaster": "tools/pmt/master/ksfa",
"interNodeKmcKsfStandby": "tools/pmt/standby/ksfb",
"interNodeTLSEnabled": false,
"interNodeTlsCaFiles": [
"ca.pem"
],
"interNodeTlsCaPath": "security/grpc/ca/",
"interNodeTlsCert": "security/grpc/certs/server.pem",
"interNodeTlsCrlFiles": [
"server_crl.pem"
],
"interNodeTlsCrlPath": "security/grpc/certs/",
"interNodeTlsPk": "security/grpc/keys/server.key.pem",
"interNodeTlsPkPwd": "security/grpc/pass/mindie_server_key_pwd.txt",
"modelInstanceNumber": 1,
"multiNodesInferEnabled": true,
"multiNodesInferPort": 1120,
"npuDeviceIds": [
[
0,
1,
2,
3,
4,
5,
6,
7
]
],
"tokenizerProcessNumber": 8
},
"LogConfig": {
"dynamicLogLevel": "",
"dynamicLogLevelValidHours": 2,
"dynamicLogLevelValidTime": ""
},
"ServerConfig": {
"allowAllZeroIpListening": false,
"distDPServerEnabled": false,
"e2eTimeout": 3600,
"fullTextEnabled": false,
"httpsEnabled": false,
"inferMode": "standard",
"interCommPk": "security/grpc/keys/server.key.pem",
"interCommPkPwd": "security/grpc/pass/key_pwd.txt",
"interCommPort": 1121,
"interCommTLSEnabled": false,
"interCommTlsCaFiles": [
"ca.pem"
],
"interCommTlsCaPath": "security/grpc/ca/",
"interCommTlsCert": "security/grpc/certs/server.pem",
"interCommTlsCrlFiles": [
"server_crl.pem"
],
"interCommTlsCrlPath": "security/grpc/certs/",
"ipAddress": "127.0.0.1",
"kmcKsfMaster": "tools/pmt/master/ksfa",
"kmcKsfStandby": "tools/pmt/standby/ksfb",
"managementIpAddress": "127.0.0.2",
"managementPort": 1026,
"managementTlsCaFile": [
"management_ca.pem"
],
"managementTlsCert": "security/certs/management/server.pem",
"managementTlsCrlFiles": [
"server_crl.pem"
],
"managementTlsCrlPath": "security/management/certs/",
"managementTlsPk": "security/keys/management/server.key.pem",
"managementTlsPkPwd": "security/pass/management/key_pwd.txt",
"maxLinkNum": 1000,
"metricsPort": 1027,
"openAiSupport": "vllm",
"port": 8080,
"tlsCaFile": [
"ca.pem"
],
"tlsCaPath": "security/ca/",
"tlsCert": "security/certs/server.pem",
"tlsCrlFiles": [
"server_crl.pem"
],
"tlsCrlPath": "security/certs/",
"tlsPk": "security/keys/server.key.pem",
"tlsPkPwd": "security/pass/key_pwd.txt",
"tokenTimeout": 3600
},
"Version": "1.0.0"
}
3.3 部署服务
3.3.1 修改模型config文件
修改模型配置文件config.json,将model_type改为deepseek_v3:
"model_type": "deepseek_v3"`
3.3.2 主节点启动
source /usr/local/Ascend/ascend-toolkit/set_env.sh
source /usr/local/Ascend/nnal/atb/set_env.sh
source /usr/local/Ascend/atb-models/set_env.sh
source /usr/local/Ascend/mindie/set_env.sh
export MIES_CONTAINER_IP=xxx1
# ranktable.json文件位置
export RANKTABLEFILE=xxx/hccl_2s_16p.json
# 主节点 ip 和端口
export MASTER_IP=xxx1
export MASTER_PORT=8088
# 使能内存池扩展段特性
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
# 提升权重加载速度
export OMP_NUM_THREADS=10
export HCCL_CONNECT_TIMEOUT=7200
export HCCL_HOST_SOCKET_PORT_RANGE=60000-60050
export MINDIE_LOG_LEVEL=INFO
cd /usr/local/Ascend/mindie/latest/mindie-service/
./bin/mindieservice_daemon
3.3.2 从节点启动
source /usr/local/Ascend/ascend-toolkit/set_env.sh
source /usr/local/Ascend/nnal/atb/set_env.sh
source /usr/local/Ascend/atb-models/set_env.sh
source /usr/local/Ascend/mindie/set_env.sh
export MIES_CONTAINER_IP=xxx2
# ranktable.json文件位置
export RANKTABLEFILE=xxx/hccl_2s_16p.json
# 主节点 ip 和端口
export MASTER_IP=xxx1
export MASTER_PORT=8088
# 使能内存池扩展段特性
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
# 提升权重加载速度
export OMP_NUM_THREADS=10
export HCCL_CONNECT_TIMEOUT=7200
export HCCL_HOST_SOCKET_PORT_RANGE=60000-60050
export MINDIE_LOG_LEVEL=INFO
cd /usr/local/Ascend/mindie/latest/mindie-service/
./bin/mindieservice_daemon
注意:MASTER_PORT不要和config.json内的port冲突。
3.4 发送curl请求验证
curl -X POST http://xxx:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "m_model",
"messages": [
{
"role": "user",
"content": "你是谁?"
}
],
"max_tokens": 100,
"ignore_eos": false,
"stream": false
}'
curl -X POST http://xxx:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "m_model",
"prompt": "<|begin▁of▁sentence|><|User|>hello<|Assistant|></think>Hello! I am DeepSeek.<|end▁of▁sentence|><|User|>自我介绍<|Assistant|><think>",
"max_tokens": 100,
"temperature": 1.0,
"top_p": 0.95,
"stop": ["<|end▁of▁sentence|>"]
}'
4. 问题记录
4.1 问题描述:The model does not appear to be a chat model because it is not configured with a chat_template.
解决方案:
需要修改tokenizer_config.json,DeepSeek-V3.2内的tokenizer_config.json缺少chat_templat,可以采用DeepSeek-V3.2-Exp内的chat_template。
{
"add_bos_token": false,
"add_eos_token": false,
"bos_token": {
"__type": "AddedToken",
"content": "<|begin▁of▁sentence|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"clean_up_tokenization_spaces": false,
"eos_token": {
"__type": "AddedToken",
"content": "<|end▁of▁sentence|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"legacy": true,
"model_max_length": 131072,
"pad_token": {
"__type": "AddedToken",
"content": "<|end▁of▁sentence|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"sp_model_kwargs": {},
"unk_token": null,
"tokenizer_class": "LlamaTokenizerFast",
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false, is_only_sys=false, is_prefix=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{% set ns.is_only_sys = true %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user or ns.is_only_sys %}{{'<|Assistant|></think>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{'<think>'}}{%- else %}{{'</think>'}}{%- endif %}{%- endif %}{%- if message['prefix'] is defined and message['prefix'] %}{%- set ns.is_prefix = true -%}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- if message['role'] != 'system' %}{% set ns.is_only_sys = false %}{%- endif %}{%- endfor -%}{% if add_generation_prompt and not ns.is_tool%}{% if ns.is_last_user or ns.is_only_sys or not ns.is_prefix %}{{'<|Assistant|>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'<think>'}}{%- endif %}{% endif %}{% endif %}"
}
鲲鹏昇腾开发者社区是面向全社会开放的“联接全球计算开发者,聚合华为+生态”的社区,内容涵盖鲲鹏、昇腾资源,帮助开发者快速获取所需的知识、经验、软件、工具、算力,支撑开发者易学、好用、成功,成为核心开发者。
更多推荐
所有评论(0)