55 个仓库、每月 3 个版本分支、每个 Release 需要跑 200+ 个 CI Job——靠人工手动打 tag、检查 changelog、验证兼容性矩阵,一个版本要耗掉 2 个 Release Manager 整整一周。release-management 仓库把这一切自动化:版本号生成 → changelog 聚合 → 跨仓库兼容性检查 → CI 质量门禁 → 自动打 tag/发版。

版本号规范

CANN 版本号: v{MAJOR}.{MINOR}.{PATCH}.{BUILD}

v8.0.3.b001
  │  │ │  └── BUILD: 构建号/热修复号 (b001=第一个热修复)
  │  │ └──── PATCH: 补丁版本 (bugfix)
  │ └────── MINOR: 功能版本 (新算子/新特性)
  └──────── MAJOR: 大版本 (架构变更)

规则:
- MAJOR 变更: 算子 API 不兼容、Ascend C 语法变化 → 主版本号
- MINOR 变更: 新算子仓库、新框架支持 → 加功能版本号
- PATCH 变更: 性能修复、精度 bug、安全补丁 → 补丁版本号
- BUILD: 内部构建号,公开发布版 BUILD=0 (v8.0.3 即 v8.0.3.0)

版本号自动生成

# release-management/versioning/auto_version.py
#
# 基于 git 提交语义自动生成版本号
# 规范: Conventional Commits (feat:/fix:/perf:/refactor:)

import subprocess
import re
from dataclasses import dataclass
from typing import List, Tuple

@dataclass
class Version:
    major: int
    minor: int
    patch: int
    build: int = 0

    def bump_major(self):
        return Version(self.major + 1, 0, 0, 0)

    def bump_minor(self):
        return Version(self.major, self.minor + 1, 0, 0)

    def bump_patch(self):
        return Version(self.major, self.minor, self.patch + 1, 0)

    def __str__(self):
        if self.build == 0:
            return f"v{self.major}.{self.minor}.{self.patch}"
        return f"v{self.major}.{self.minor}.{self.patch}.b{self.build:03d}"


class VersionBumper:
    """
    基于 Conventional Commits 自动 bump 版本号

    策略:
    - feat: → bump MINOR (新功能)
    - feat!: → bump MAJOR (breaking change, ! 后缀)
    - fix: / perf: → bump PATCH
    - refactor: / docs: / test: → 不 bump (建议 bump BUILD)
    """

    BUMP_RULES = {
        "feat!": "major",   # breaking change feature
        "feat": "minor",
        "fix": "patch",
        "perf": "patch",
        "refactor": None,
        "docs": None,
        "test": None,
        "chore": None,
    }

    def __init__(self, repo_path: str, current_version: Version):
        self.repo_path = repo_path
        self.current = current_version

    def get_commits_since(self, tag: str) -> List[str]:
        """获取自上次 tag 以来的所有提交"""
        try:
            result = subprocess.run(
                ["git", "log", f"{tag}..HEAD", "--oneline", "--no-merges"],
                cwd=self.repo_path, capture_output=True, text=True
            )
            return result.stdout.strip().split('\n') if result.stdout else []
        except Exception:
            return []

    def classify_commit(self, message: str) -> str:
        """分类一条 commit 的 bump 类型"""
        # 匹配 Conventional Commits 格式
        match = re.match(
            r'^(\w+)(!)?(\([\w-]+\))?:\s*(.+)',
            message.strip()
        )
        if not match:
            return None  # 不符合规范,忽略

        commit_type = match.group(1).lower()
        is_breaking = match.group(2) == "!"

        # Breaking change 优先级最高
        if is_breaking:
            return "major"

        return self.BUMP_RULES.get(commit_type)

    def compute_bump(self, since_tag: str) -> Tuple[Version, List[str]]:
        """
        计算版本 bump

        returns: (新版本号, 变更说明列表)
        """
        commits = self.get_commits_since(since_tag)
        if not commits:
            return self.current, ["No changes since last tag"]

        max_bump = "none"
        changes = []

        for commit in commits:
            bump_type = self.classify_commit(commit)
            if bump_type == "major":
                max_bump = "major"
                changes.append(f"[BREAKING] {commit}")
            elif bump_type == "minor" and max_bump != "major":
                max_bump = "minor"
                changes.append(f"[FEATURE] {commit}")
            elif bump_type == "patch" and max_bump == "none":
                max_bump = "patch"
                changes.append(f"[FIX/PERF] {commit}")

        # 选择最高 bump 级别
        if max_bump == "major":
            new_version = self.current.bump_major()
        elif max_bump == "minor":
            new_version = self.current.bump_minor()
        elif max_bump == "patch":
            new_version = self.current.bump_patch()
        else:
            # 只有 refactor/docs: bump build
            new_build = self.current.build + 1
            new_version = Version(
                self.current.major, self.current.minor,
                self.current.patch, new_build
            )
            changes.append("[BUILD] Internal build bump")

        return new_version, changes


# 使用示例
if __name__ == "__main__":
    bumper = VersionBumper("../ops-transformer", Version(8, 0, 3))
    new_ver, log = bumper.compute_bump("v8.0.3")
    print(f"Next version: {new_ver}")
    for c in log:
        print(f"  {c}")

跨仓库兼容性矩阵

# release-management/compat/compatibility_matrix.py
#
# 跨仓库兼容性检查: 验证 55 个仓库的版本组合是否合法
# 原则: 下游仓库不能依赖未发布的上游版本

from dataclasses import dataclass, field
from typing import Dict, Set, List, Tuple

@dataclass
class RepoVersion:
    repo: str
    version: str

@dataclass
class CompatibilityConstraint:
    """依赖约束: consumer >= dep_repo@version"""
    consumer: str      # 下游仓库
    dep_repo: str      # 依赖的上游仓库
    min_version: str   # 最低版本要求
    constraint_type: str  # ">=" | "==" | "~="

class CompatibilityMatrix:
    """
    兼容性矩阵

    维护 55 个仓库间的依赖关系和版本约束
    每次 Release 前全量检查:所有仓库的版本组合是否合法
    """

    def __init__(self):
        self.constraints: List[CompatibilityConstraint] = []
        self._init_constraints()

    def _init_constraints(self):
        """初始化依赖约束(55 仓全量)"""

        # 核心算子依赖 opbase
        for repo in ["ops-math", "ops-nn", "ops-blas", "ops-cv",
                      "ops-fft", "ops-rand", "ops-tensor", "ops-transformer"]:
            self.constraints.append(CompatibilityConstraint(
                consumer=repo, dep_repo="opbase",
                min_version="v8.0.0", constraint_type=">="
            ))

        # ATB 依赖 ops-transformer
        self.constraints.append(CompatibilityConstraint(
            consumer="ascend-transformer-boost", dep_repo="ops-transformer",
            min_version="v8.0.0", constraint_type=">="
        ))

        # catlass 依赖 opbase + ops-blas
        self.constraints.append(CompatibilityConstraint(
            consumer="catlass", dep_repo="opbase",
            min_version="v8.0.0", constraint_type=">="
        ))
        self.constraints.append(CompatibilityConstraint(
            consumer="catlass", dep_repo="ops-blas",
            min_version="v8.0.0", constraint_type=">="
        ))

        # 推理 recipe 依赖 ATB + ops-transformer + runtime
        for recipe in ["cann-recipes-infer", "cann-recipes-train"]:
            self.constraints.append(CompatibilityConstraint(
                consumer=recipe, dep_repo="ascend-transformer-boost",
                min_version="v8.0.0", constraint_type=">="
            ))
            self.constraints.append(CompatibilityConstraint(
                consumer=recipe, dep_repo="runtime",
                min_version="v8.0.0", constraint_type=">="
            ))

        # torchtitan-npu 依赖 hccl + ATB
        self.constraints.append(CompatibilityConstraint(
            consumer="torchtitan-npu", dep_repo="hccl",
            min_version="v8.0.0", constraint_type=">="
        ))
        self.constraints.append(CompatibilityConstraint(
            consumer="torchtitan-npu", dep_repo="ascend-transformer-boost",
            min_version="v8.0.0", constraint_type=">="
        ))

        # ge 依赖 runtime + metadef
        self.constraints.append(CompatibilityConstraint(
            consumer="ge", dep_repo="runtime",
            min_version="v8.0.0", constraint_type=">="
        ))
        self.constraints.append(CompatibilityConstraint(
            consumer="ge", dep_repo="metadef",
            min_version="v8.0.0", constraint_type=">="
        ))

        # tensorflow 适配器依赖 ge
        self.constraints.append(CompatibilityConstraint(
            consumer="tensorflow", dep_repo="ge",
            min_version="v8.0.0", constraint_type=">="
        ))

        # hcomm + hixl + shmem 依赖 hccl
        for comm_lib in ["hcomm", "hixl", "shmem"]:
            self.constraints.append(CompatibilityConstraint(
                consumer=comm_lib, dep_repo="hccl",
                min_version="v8.0.0", constraint_type=">="
            ))

        # ascend-boost-comm 被算子库依赖
        for ops_repo in ["ops-math", "ops-nn", "ops-transformer",
                          "ops-cv", "ops-blas"]:
            self.constraints.append(CompatibilityConstraint(
                consumer=ops_repo, dep_repo="ascend-boost-comm",
                min_version="v8.0.0", constraint_type=">="
            ))

    def check(self, release_versions: Dict[str, str]) -> List[str]:
        """
        检查版本组合是否合法

        release_versions: {repo_name: version_string}
        returns: 错误列表,空列表表示通过
        """
        errors = []

        for const in self.constraints:
            consumer_ver = release_versions.get(const.consumer)
            dep_ver = release_versions.get(const.dep_repo)

            if consumer_ver is None:
                continue  # 仓库不参与本次 Release,跳过
            if dep_ver is None:
                errors.append(
                    f"Missing dependency: {const.consumer}@{consumer_ver} "
                    f"requires {const.dep_repo}{const.constraint_type}{const.min_version}"
                )
                continue

            if not self._version_satisfies(dep_ver, const.min_version, const.constraint_type):
                errors.append(
                    f"Version conflict: {const.consumer}@{consumer_ver} "
                    f"requires {const.dep_repo}{const.constraint_type}{const.min_version}, "
                    f"but {const.dep_repo}@{dep_ver} is released"
                )

        return errors

    def _version_satisfies(self, actual: str, required: str,
                           constraint_type: str) -> bool:
        """检查 actual 是否满足 required constraint"""
        act = self._parse_version(actual)
        req = self._parse_version(required)

        if constraint_type == ">=":
            return act >= req
        elif constraint_type == "==":
            return act == req
        elif constraint_type == "~=":
            # 兼容版本: major.minor 相同
            return act[0] == req[0] and act[1] == req[1] and act[2] >= req[2]
        return False

    def _parse_version(self, v: str) -> Tuple[int, int, int]:
        """v8.0.3 → (8, 0, 3)"""
        v = v.lstrip('v')
        parts = v.split('.')
        return (
            int(parts[0]) if len(parts) > 0 else 0,
            int(parts[1]) if len(parts) > 1 else 0,
            int(parts[2]) if len(parts) > 2 else 0,
        )

CI 质量门禁

# release-management/ci/quality_gate.yml
#
# Release 质量门禁: 7 道关卡,全部通过后自动打 tag

name: CANN Release Quality Gate

on:
  workflow_dispatch:
    inputs:
      release_version:
        description: "Release version (e.g. v8.0.4)"
        required: true
      repos:
        description: "Repos to release (comma-separated)"
        required: true
        default: "ops-transformer,catlass,hccl,ge,runtime"

jobs:
  gate-version-check:
    name: "[1/7] Version Bump Check"
    runs-on: ubuntu-latest
    steps:
      - name: Validate version format
        run: |
          if [[ ! "${{ inputs.release_version }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
            echo "Invalid version format. Expected vX.Y.Z"
            exit 1
          fi
      - name: Check version not already released
        run: |
          for repo in $(echo "${{ inputs.repos }}" | tr ',' ' '); do
            if git ls-remote --tags "https://atomgit.com/cann/$repo.git" \
               "refs/tags/${{ inputs.release_version }}" | grep -q .; then
              echo "Tag ${{ inputs.release_version }} already exists in $repo"
              exit 1
            fi
          done

  gate-unit-tests:
    name: "[2/7] Unit Tests"
    needs: gate-version-check
    strategy:
      matrix:
        repo: ${{ fromJSON(inputs.repos) }}
    runs-on: [self-hosted, ascend-910]
    steps:
      - uses: actions/checkout@v4
        with:
          repository: cann/${{ matrix.repo }}
      - name: Build & Test
        run: |
          source /usr/local/Ascend/ascend-toolkit/set_env.sh
          mkdir build && cd build
          cmake .. -DCMAKE_BUILD_TYPE=Release
          make -j$(nproc)
          ctest --output-on-failure --timeout 300

  gate-compatibility:
    name: "[3/7] Cross-Repo Compatibility"
    needs: gate-unit-tests
    runs-on: ubuntu-latest
    steps:
      - name: Check compatibility matrix
        run: |
          python release-management/compat/compatibility_matrix.py \
            --version ${{ inputs.release_version }} \
            --repos ${{ inputs.repos }}

  gate-performance:
    name: "[4/7] Performance Regression"
    needs: gate-compatibility
    runs-on: [self-hosted, ascend-910]
    steps:
      - name: Benchmark
        run: |
          python release-management/ci/benchmark_runner.py \
            --version ${{ inputs.release_version }} \
            --baseline $(python release-management/versioning/last_tag.py) \
            --threshold 5  # 性能退化 >5% 则失败

  gate-precision:
    name: "[5/7] Precision Validation"
    needs: gate-performance
    runs-on: [self-hosted, ascend-910]
    steps:
      - name: Precision tests
        run: |
          python release-management/ci/precision_validator.py \
            --tolerance 1e-5 \
            --repos ${{ inputs.repos }}

  gate-security:
    name: "[6/7] Security Scan"
    needs: gate-precision
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: SAST scan
        uses: github/codeql-action/analyze@v3
      - name: Dependency audit
        run: |
          for repo in $(echo "${{ inputs.repos }}" | tr ',' ' '); do
            echo "Auditing $repo..."
            # 检查依赖库版本是否有已知 CVE
          done

  gate-release:
    name: "[7/7] Auto Tag & Release"
    needs: gate-security
    runs-on: ubuntu-latest
    steps:
      - name: Generate changelog
        run: |
          python release-management/versioning/generate_changelog.py \
            --version ${{ inputs.release_version }} \
            --output CHANGELOG_${{ inputs.release_version }}.md
      - name: Create tags & releases
        run: |
          for repo in $(echo "${{ inputs.repos }}" | tr ',' ' '); do
            echo "Releasing $repo @ ${{ inputs.release_version }}"
            python release-management/ci/auto_release.py \
              --repo cann/$repo \
              --version ${{ inputs.release_version }} \
              --changelog CHANGELOG_${{ inputs.release_version }}.md
          done
      - name: Notify
        run: |
          echo "Release ${{ inputs.release_version }} completed!"
          echo "Repos: ${{ inputs.repos }}"

Changelog 自动生成

# release-management/versioning/generate_changelog.py
#
# 聚合 55 个仓库的变更记录 → 统一 CHANGELOG

import subprocess
from collections import defaultdict

class ChangelogGenerator:
    """
    自动生成 Release CHANGELOG

    格式:
    # CANN v8.0.4 Release Notes (2026-06-14)

    ## New Features
    - ops-transformer: FlashAttention v3 (CANN#2345)
    - catlass: bf16 GEMM template (CANN#6789)

    ## Performance Improvements
    - hccl: AllReduce 2x throughput on 8-card (CANN#1111)

    ## Bug Fixes
    - ge: graph fusion crash with dynamic shape (CANN#2222)

    ## Breaking Changes
    - Ascend C: __aicore__ function return type must be void (CANN#3333)

    ## Known Issues
    - atvc: Vector template compile error with gcc 12 (workaround: gcc 11)
    """

    def __init__(self, repos: list, from_tag: str, to_tag: str):
        self.repos = repos
        self.from_tag = from_tag
        self.to_tag = to_tag

    def generate(self, output_path: str):
        categories = defaultdict(list)

        for repo in self.repos:
            commits = self._get_commits(repo)
            for commit in commits:
                category, entry = self._parse_commit(repo, commit)
                if category and entry:
                    categories[category].append(entry)

        # 生成 Markdown
        lines = [
            f"# CANN {self.to_tag} Release Notes",
            f"",
            f"## Summary",
            f"- {sum(len(v) for v in categories.values())} changes across "
            f"{len(self.repos)} repositories",
            f"",
        ]

        category_order = [
            ("breaking", "## Breaking Changes"),
            ("feature", "## New Features"),
            ("performance", "## Performance Improvements"),
            ("fix", "## Bug Fixes"),
            ("known_issue", "## Known Issues"),
        ]

        for key, header in category_order:
            if key in categories:
                lines.append(header)
                for entry in categories[key]:
                    lines.append(f"- {entry}")
                lines.append("")

        with open(output_path, 'w', encoding='utf-8') as f:
            f.write('\n'.join(lines))

    def _get_commits(self, repo: str) -> list:
        """获取仓库的提交记录"""
        try:
            result = subprocess.run(
                ["git", "log", f"{self.from_tag}..{self.to_tag}",
                 "--oneline", "--no-merges"],
                cwd=f"../{repo}", capture_output=True, text=True
            )
            return [
                line.strip() for line in result.stdout.split('\n')
                if line.strip()
            ]
        except Exception:
            return []

    def _parse_commit(self, repo: str, commit: str) -> tuple:
        """解析一条提交: 返回 (类别, Markdown条目)"""
        import re

        # 匹配 PR 编号: CANN#1234
        pr_match = re.search(r'CANN#(\d+)', commit)

        # 分类
        if "BREAKING CHANGE" in commit.upper():
            return ("breaking", f"{repo}: {commit}")
        elif commit.startswith(("feat", "feat!")):
            return ("feature", f"{repo}: {commit}")
        elif commit.startswith(("perf",)):
            return ("performance", f"{repo}: {commit}")
        elif commit.startswith(("fix",)):
            return ("fix", f"{repo}: {commit}")
        elif "known" in commit.lower():
            return ("known_issue", f"{repo}: {commit}")

        return (None, None)


# 命令行入口
if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--from-tag", required=True)
    parser.add_argument("--to-tag", required=True)
    parser.add_argument("--repos", required=True)
    parser.add_argument("--output", required=True)
    args = parser.parse_args()

    gen = ChangelogGenerator(
        args.repos.split(','), args.from_tag, args.to_tag
    )
    gen.generate(args.output)

踩坑:版本兼容性检查的顺序依赖——先检查底层仓库

# ❌ 按字母序检查: atvc → ge → hccl → ops-math → opbase → ...
# 检查 atvc 时 opbase 版本还不确定 → 报假错误
#
# ✅ 拓扑排序: 先检查无依赖的底层仓库,再检查上层
def topological_order(constraints):
    """Kahn's algorithm: 按依赖图的拓扑序检查"""
    in_degree = defaultdict(int)
    graph = defaultdict(list)
    all_repos = set()

    for c in constraints:
        graph[c.dep_repo].append(c.consumer)
        in_degree[c.consumer] += 1
        all_repos.add(c.consumer)
        all_repos.add(c.dep_repo)

    # BFS: 从入度为 0 的节点开始
    queue = [r for r in all_repos if in_degree[r] == 0]
    order = []

    while queue:
        repo = queue.pop(0)
        order.append(repo)
        for downstream in graph[repo]:
            in_degree[downstream] -= 1
            if in_degree[downstream] == 0:
                queue.append(downstream)

    return order

踩坑:版本 bump 的"空 Release"问题——所有仓库都没变化时强制跳版本

# ❌ 所有仓库都没有 feat/fix/perf → bump_type="none" → 版本号不变
# 但用户期望看到一个新的 Release (v8.0.3 → v8.0.4)
#
# ✅ 如果所有 bump 都是 "none":
#   1. 检查是否是预定的周期性 Release (如月度 release)
#   2. 如果是 → bump BUILD (+1) → v8.0.3.b001 → v8.0.3.b002
#   3. 如果不是 → 提示用户是否真的要发空版本

if max_bump == "none":
    if is_scheduled_release:
        new_build = current_version.build + 1
        new_version = Version(current.major, current.minor, current.patch, new_build)
        print(f"Scheduled release: bumping BUILD → {new_version}")
    else:
        print("WARNING: No changes detected. Are you sure you want to release?")
        if not confirm():
            sys.exit(0)

release-management 的自动化 Release 流程:版本号基于 Conventional Commits 自动 bump(feat→minor、feat!→major、fix/perf→patch、空→build)→兼容性矩阵检查 55 仓依赖拓扑(底层先审,拓扑排序 BFS)→7 道 CI 质量门禁(版本校验→单元测试→跨仓兼容性→性能回归>5% 失败→精度验证 ε<1e-5→安全扫描→自动 tag+changelog)。踩坑:按字母序检查导致上层仓库先于依赖被审→拓扑排序先底层后上层、空 Release 版本号不变→周期 Release 自动 bump BUILD、手动 Release 弹确认。

Logo

鲲鹏昇腾开发者社区是面向全社会开放的“联接全球计算开发者,聚合华为+生态”的社区,内容涵盖鲲鹏、昇腾资源,帮助开发者快速获取所需的知识、经验、软件、工具、算力,支撑开发者易学、好用、成功,成为核心开发者。

更多推荐