46-EFT.WP.Data.Benchmarks v1.0 | 第15章机器可读 Schema 与 Lint

目录／文档-技术白皮书（V5.05）／ 46-EFT.WP.Data.Benchmarks v1.0

第15章机器可读 Schema 与 Lint

I. 章节目的与范围

。禁用中文与门户/CI 自动校验。键名统一 snake_case；跨卷引用采用“卷名 vX.Y:锚点”；数学表达用反引号并加括号，发布前阻断，覆盖结构/类型/正则/依赖/跨卷引用锚点/计量校核/冻结切分与泄漏护栏/评分归一化与显著性最小检查/合规最小检查；用于Lint 规则集与规范性 JSON Schema提供基准套件的

II. 规范性工件（发布必备）

artifacts:

- path: "schema/benchmark.schema.json"

- path: "schema/lint_rules.yaml"

- path: "schema/examples/minimal.yaml"

- path: "schema/examples/full.yaml"

以上工件须在 export_manifest.artifacts[] 登记并附 sha256，引用锚点与本卷口径一致。

III. 规范性 JSON Schema（核心摘录）

JSON json

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://eift.org/schema/benchmark.schema.json",
  "title": "EFT Data Benchmark Suite",
  "type": "object",
  "required": [ "suite", "tasks", "metrology", "export_manifest" ],
  "properties": {
    "suite": {
      "type": "object",
      "required": [ "id", "title", "version", "modalities" ],
      "properties": {
        "id": { "type": "string", "pattern": "^[a-z0-9_.\\-]+$" },
        "title": { "type": "string", "minLength": 3 },
        "version": { "type": "string", "pattern": "^v\\d+\\.\\d+(\\.\\d+)?$" },
        "modalities": { "type": "array", "items": { "type": "string" } },
        "risks": { "type": "array", "items": { "type": "string" } },
        "coverage_matrix": { "type": "object" }
      }
    },
    "tasks": {
      "type": "array",
      "items": {
        "type": "object",
        "required": [ "id", "io_mode", "dataset_ref", "splits", "protocol", "metrics", "leakage_guard" ],
        "properties": {
          "id": { "type": "string", "pattern": "^[a-z0-9_.\\-]+$" },
          "io_mode": { "type": "string", "enum": [ "offline", "online", "stream", "interactive" ] },
          "evaluatee": { "type": "string", "enum": [ "model", "system", "pipeline" ] },
          "dataset_ref": { "type": "string", "pattern": "^datasets/[a-z0-9_\\-]+@v\\d+\\.\\d+$" },
          "splits": {
            "type": "object",
            "required": [ "train", "val", "test" ],
            "properties": {
              "train": {
                "type": "object",
                "required": [ "frozen", "index" ],
                "properties": {
                  "frozen": { "type": "boolean", "const": true },
                  "index": { "type": "string" },
                  "sha256": { "type": "string" }
                }
              },
              "val": {
                "type": "object",
                "required": [ "frozen", "index" ],
                "properties": {
                  "frozen": { "type": "boolean", "const": true },
                  "index": { "type": "string" },
                  "sha256": { "type": "string" }
                }
              },
              "test": {
                "type": "object",
                "required": [ "frozen", "index" ],
                "properties": {
                  "frozen": { "type": "boolean", "const": true },
                  "index": { "type": "string" },
                  "sha256": { "type": "string" }
                }
              },
              "ratio": {
                "type": "object",
                "properties": { "train": { "type": "number" }, "val": { "type": "number" }, "test": { "type": "number" } }
              },
              "freeze_indices": { "type": "boolean", "const": true }
            }
          },
          "leakage_guard": { "type": "array", "items": { "type": "string" } },
          "protocol": { "type": "object" },
          "metrics": { "type": "array", "items": { "type": "object" } },
          "aggregation": { "type": "object" },
          "significance": { "type": "object" }
        }
      }
    },
    "metrology": {
      "type": "object",
      "required": [ "units", "check_dim" ],
      "properties": {
        "units": { "type": "string", "const": "SI" },
        "check_dim": { "type": "boolean", "const": true }
      }
    },
    "export_manifest": {
      "type": "object",
      "required": [ "version", "artifacts", "references" ],
      "properties": {
        "version": { "type": "string" },
        "artifacts": { "type": "array", "items": { "type": "object" } },
        "references": {
          "type": "array",
          "minItems": 1,
          "items": { "type": "string", "pattern": "^[^:]+ v\\d+\\.\\d+:[A-Z].+$" }
        }
      }
    }
  },
  "additionalProperties": false
}

references[] 强制“卷名 vX.Y:锚点”；metrology.units="SI" 与 check_dim=true 为强制项。

IV. Lint 规则（规范性）

version: "v1.0"

rules:

# 基础结构与版本

- id: STRUCT.REQUIRED

when: "$"

assert: "has_keys(suite,tasks,metrology,export_manifest)"

level: error

- id: SUITE.VERSION.SEMVER

when: "$.suite.version"

assert: "matches('^v\\d+\\.\\d+(\\.\\d+)?$')"

level: error

- id: SUITE.ID_FORMAT

when: "$.suite.id"

assert: "matches('^[a-z0-9_.\\-]+$')"

level: error

# 任务与数据

- id: TASK.REQUIRED_KEYS

when: "$.tasks[*]"

assert: "has_keys(id, io_mode, dataset_ref, splits, protocol, metrics, leakage_guard)"

level: error

- id: DATASET.REF_FORMAT

when: "$.tasks[*].dataset_ref"

assert: "matches('^datasets/[a-z0-9_\\-]+@v\\d+\\.\\d+$')"

level: error

- id: SPLITS.FROZEN_REQUIRED

when: "$.tasks[*].splits"

assert: "splits.train.frozen and splits.val.frozen and splits.test.frozen and splits.freeze_indices == true"

level: error

- id: SPLITS.RATIO_SUM

when: "$.tasks[*].splits.ratio"

assert: "abs(value.train + value.val + value.test - 1) <= 1e-6"

level: error

- id: LEAKAGE.GUARD_ALLOWED

when: "$.tasks[*].leakage_guard"

assert: "contains_any(['per-object','per-timewindow','per-scene'])"

level: error

# 指标与协议

- id: METRICS.FAMILY_UNIT

when: "$.tasks[*].metrics[*]"

assert: "has_keys(name, family, unit, higher_is_better)"

level: error

- id: METRICS.UNIT_SI_OR_DIMLESS

when: "$.tasks[*].metrics[*].unit"

assert: "all_units_in_SI(value) or value in ['—','%']"

level: error

- id: PROTOCOL.MODE_ALLOWED

when: "$.tasks[*].protocol.mode"

assert: "value in ['offline','online','stream','interactive']"

level: error

- id: SIG.PARAMS

when: "$.tasks[*].significance"

assert: "has_keys(method, alpha)"

level: error

# 评分与归一化

- id: SCORE.AGG_LEVELS

when: "$.tasks[*].aggregation.levels"

assert: "contains_all(['task'])"

level: error

- id: SCORE.NORM_SCHEME

when: "$.tasks[*].aggregation or $.scoring.normalization.scheme"

assert: "value in ['zscore','minmax','fixed-anchor']"

level: warn

# 鲁棒与公平（最小检查）

- id: ROBUST.THRESHOLDS_MIN

when: "$.robustness.thresholds"

assert: "has_keys(drop_rel_max, acc_robust_min)"

level: warn

- id: FAIR.THRESHOLDS_MIN

when: "$.fairness_ethics.thresholds"

assert: "has_keys(fairness_warn, fairness_block)"

level: warn

# 计量与引用

- id: METROLOGY.SI_AND_CHECKDIM

when: "$.metrology"

assert: "units == 'SI' and check_dim == true"

level: error

- id: REFERENCES.FORMAT

when: "$.export_manifest.references[*]"

assert: "matches('^[^:]+ v\\d+\\.\\d+:[A-Z].+$')"

level: error

：STRUCT.REQUIRED、SUITE.VERSION.SEMVER、SUITE.ID_FORMAT、TASK.REQUIRED_KEYS、DATASET.REF_FORMAT、SPLITS.FROZEN_REQUIRED、SPLITS.RATIO_SUM、LEAKAGE.GUARD_ALLOWED、METRICS.FAMILY_UNIT、METRICS.UNIT_SI_OR_DIMLESS、PROTOCOL.MODE_ALLOWED、METROLOGY.SI_AND_CHECKDIM、REFERENCES.FORMAT。阻断项

V. 失败样例与诊断（节选）

fail_examples:

- case: "bad reference"

input: {export_manifest:{references:["Core.DataSpec:EXPORT"]}}

expect: {rule:"REFERENCES.FORMAT", level:"error",

fix:"Use 'EFT.WP.Core.DataSpec v1.0:EXPORT'"}

- case: "splits not frozen"

input: {tasks:[{id:"cls", io_mode:"offline", dataset_ref:"datasets/core@v1.0",

splits:{train:{frozen:false,index:"..."}, val:{frozen:true,index:"..."}, test:{frozen:true,index:"..."}, freeze_indices:false},

protocol:{mode:"offline"}, metrics:[], leakage_guard:["per-object"]}]}

expect: {rule:"SPLITS.FROZEN_REQUIRED", level:"error",

fix:"Set all splits to frozen and freeze_indices=true"}

- case: "metric without unit"

input: {tasks:[{id:"cls", io_mode:"offline", dataset_ref:"datasets/core@v1.0",

splits:{train:{frozen:true,index:"..."}, val:{frozen:true,index:"..."}, test:{frozen:true,index:"..."}, freeze_indices:true},

protocol:{mode:"offline"}, metrics:[{name:"F1_macro"}], leakage_guard:["per-object"]}]}

expect: {rule:"METRICS.FAMILY_UNIT", level:"error",

fix:"Provide family/unit/higher_is_better for each metric"}

Lint 输出要求包含 rule/path/message/fix 四要素，便于一键修复。

VI. 最小可用示例（通过 Schema 与 Lint）

suite:

id: "eift.bench.core"

title: "EIFT Core Benchmarks"

version: "v1.0"

modalities: ["text"]

tasks:

- id: "cls.binary"

io_mode: "offline"

dataset_ref: "datasets/core_cls@v1.0"

splits:

train: {frozen:true, index:"splits/train.index", sha256:"..."}

val: {frozen:true, index:"splits/val.index", sha256:"..."}

test: {frozen:true, index:"splits/test.index", sha256:"..."}

freeze_indices: true

ratio: {train:0.8, val:0.1, test:0.1}

leakage_guard: ["per-object"]

protocol: {mode:"offline", seed:1701, repeats:5}

metrics:

- {name:"F1_macro", family:"classification", unit:"—", higher_is_better:true, agg:"macro"}

aggregation: {levels:["task"], weights:{scheme:"uniform"}}

significance: {method:"bootstrap", alpha:0.05}

metrology: {units:"SI", check_dim:true}

export_manifest:

version: "v1.0"

artifacts: [{path:"benchmark.yaml", sha256:"..."}]

references:

- "EFT.WP.Core.DataSpec v1.0:EXPORT"

- "EFT.WP.Core.Metrology v1.0:check_dim"

VII. 与导出清单的耦合（规范性）

export_manifest:

artifacts:

- {path:"schema/benchmark.schema.json", sha256:"..."}

- {path:"schema/lint_rules.yaml", sha256:"..."}

- {path:"schema/examples/minimal.yaml", sha256:"..."}

references:

- "EFT.WP.Core.DataSpec v1.0:EXPORT"

- "EFT.WP.Core.Metrology v1.0:check_dim"

- "EFT.WP.Data.ModelCards v1.0:Ch.11"

必须列出并可校验；引用携带“卷名 vX.Y:锚点”。阻断件Schema 与 Lint 为

VIII. 验证接口（实现绑定 Ixx-?，统一返回）

def validate_benchmark(spec: dict) -> dict: ...

def lint_benchmark(spec: dict, rules: dict) -> dict: ...

def check_units(spec: dict) -> dict: ... # uses Core.Metrology v1.0:check_dim

def verify_references(spec: dict) -> dict: ...# regex + anchor reachability

返回 {"ok": bool, "errors":[...], "warnings":[...], "metrics":{...}}，供门户/CI 使用。

IX. 本章合规自检

benchmark.schema.json 与 lint_rules.yaml 已生成并在 export_manifest 登记 sha256。
Schema 强制 metrology.units="SI"&check_dim=true 与 export_manifest.references[] 正则；Lint 阻断未冻结切分、泄漏护栏缺失、指标缺单位、协议/引用不合规。
任务 dataset_ref/splits/protocol/metrics/leakage_guard 齐备，比例和为 1±1e-6。
评分/归一化/显著性/公平与鲁棒最小检查启用；跨卷引用有效可达。
最小示例可一次通过 Schema 与 Lint；验证接口已集成并返回统一结构。

版权与许可：除另有说明外，《能量丝理论》（含文本、图表、插图、符号与公式）的著作权由作者（屠广林）享有。
许可方式（CC BY 4.0）：在注明作者与来源的前提下，允许复制、转载、节选、改编与再分发。
署名格式（建议）：作者：屠广林｜作品：《能量丝理论》｜来源：energyfilament.org｜许可证：CC BY 4.0
验证召集： 作者独立自费、无雇主无资助；下一阶段将优先在最愿意公开讨论、公开复现、公开挑错的环境中推进落地，不限国家。欢迎各国媒体与同行抓住窗口组织验证，并与我们联系。
版本信息： 首次发布：2025-11-11 ｜当前版本：v6.0+5.05