目录文档-技术白皮书46-EFT.WP.Data.Benchmarks v1.0

第15章 机器可读 Schema 与 Lint


I. 章节目的与范围

禁用中文与门户/CI 自动校验。键名统一 snake_case;跨卷引用采用“卷名 vX.Y:锚点”;数学表达用反引号并加括号,发布前阻断,覆盖结构/类型/正则/依赖/跨卷引用锚点/计量校核/冻结切分与泄漏护栏/评分归一化与显著性最小检查/合规最小检查;用于Lint 规则集规范性 JSON Schema提供基准套件的

II. 规范性工件(发布必备)

artifacts:

- path: "schema/benchmark.schema.json"

- path: "schema/lint_rules.yaml"

- path: "schema/examples/minimal.yaml"

- path: "schema/examples/full.yaml"

以上工件须在 export_manifest.artifacts[] 登记并附 sha256,引用锚点与本卷口径一致。

III. 规范性 JSON Schema(核心摘录)

JSON json
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://eift.org/schema/benchmark.schema.json",
  "title": "EFT Data Benchmark Suite",
  "type": "object",
  "required": [ "suite", "tasks", "metrology", "export_manifest" ],
  "properties": {
    "suite": {
      "type": "object",
      "required": [ "id", "title", "version", "modalities" ],
      "properties": {
        "id": { "type": "string", "pattern": "^[a-z0-9_.\\-]+$" },
        "title": { "type": "string", "minLength": 3 },
        "version": { "type": "string", "pattern": "^v\\d+\\.\\d+(\\.\\d+)?$" },
        "modalities": { "type": "array", "items": { "type": "string" } },
        "risks": { "type": "array", "items": { "type": "string" } },
        "coverage_matrix": { "type": "object" }
      }
    },
    "tasks": {
      "type": "array",
      "items": {
        "type": "object",
        "required": [ "id", "io_mode", "dataset_ref", "splits", "protocol", "metrics", "leakage_guard" ],
        "properties": {
          "id": { "type": "string", "pattern": "^[a-z0-9_.\\-]+$" },
          "io_mode": { "type": "string", "enum": [ "offline", "online", "stream", "interactive" ] },
          "evaluatee": { "type": "string", "enum": [ "model", "system", "pipeline" ] },
          "dataset_ref": { "type": "string", "pattern": "^datasets/[a-z0-9_\\-]+@v\\d+\\.\\d+$" },
          "splits": {
            "type": "object",
            "required": [ "train", "val", "test" ],
            "properties": {
              "train": {
                "type": "object",
                "required": [ "frozen", "index" ],
                "properties": {
                  "frozen": { "type": "boolean", "const": true },
                  "index": { "type": "string" },
                  "sha256": { "type": "string" }
                }
              },
              "val": {
                "type": "object",
                "required": [ "frozen", "index" ],
                "properties": {
                  "frozen": { "type": "boolean", "const": true },
                  "index": { "type": "string" },
                  "sha256": { "type": "string" }
                }
              },
              "test": {
                "type": "object",
                "required": [ "frozen", "index" ],
                "properties": {
                  "frozen": { "type": "boolean", "const": true },
                  "index": { "type": "string" },
                  "sha256": { "type": "string" }
                }
              },
              "ratio": {
                "type": "object",
                "properties": { "train": { "type": "number" }, "val": { "type": "number" }, "test": { "type": "number" } }
              },
              "freeze_indices": { "type": "boolean", "const": true }
            }
          },
          "leakage_guard": { "type": "array", "items": { "type": "string" } },
          "protocol": { "type": "object" },
          "metrics": { "type": "array", "items": { "type": "object" } },
          "aggregation": { "type": "object" },
          "significance": { "type": "object" }
        }
      }
    },
    "metrology": {
      "type": "object",
      "required": [ "units", "check_dim" ],
      "properties": {
        "units": { "type": "string", "const": "SI" },
        "check_dim": { "type": "boolean", "const": true }
      }
    },
    "export_manifest": {
      "type": "object",
      "required": [ "version", "artifacts", "references" ],
      "properties": {
        "version": { "type": "string" },
        "artifacts": { "type": "array", "items": { "type": "object" } },
        "references": {
          "type": "array",
          "minItems": 1,
          "items": { "type": "string", "pattern": "^[^:]+ v\\d+\\.\\d+:[A-Z].+$" }
        }
      }
    }
  },
  "additionalProperties": false
}
references[] 强制“卷名 vX.Y:锚点”;metrology.units="SI" 与 check_dim=true 为强制项。

IV. Lint 规则(规范性)

version: "v1.0"

rules:

# 基础结构与版本

- id: STRUCT.REQUIRED

when: "$"

assert: "has_keys(suite,tasks,metrology,export_manifest)"

level: error

- id: SUITE.VERSION.SEMVER

when: "$.suite.version"

assert: "matches('^v\\d+\\.\\d+(\\.\\d+)?$')"

level: error

- id: SUITE.ID_FORMAT

when: "$.suite.id"

assert: "matches('^[a-z0-9_.\\-]+$')"

level: error

# 任务与数据

- id: TASK.REQUIRED_KEYS

when: "$.tasks[*]"

assert: "has_keys(id, io_mode, dataset_ref, splits, protocol, metrics, leakage_guard)"

level: error

- id: DATASET.REF_FORMAT

when: "$.tasks[*].dataset_ref"

assert: "matches('^datasets/[a-z0-9_\\-]+@v\\d+\\.\\d+$')"

level: error

- id: SPLITS.FROZEN_REQUIRED

when: "$.tasks[*].splits"

assert: "splits.train.frozen and splits.val.frozen and splits.test.frozen and splits.freeze_indices == true"

level: error

- id: SPLITS.RATIO_SUM

when: "$.tasks[*].splits.ratio"

assert: "abs(value.train + value.val + value.test - 1) <= 1e-6"

level: error

- id: LEAKAGE.GUARD_ALLOWED

when: "$.tasks[*].leakage_guard"

assert: "contains_any(['per-object','per-timewindow','per-scene'])"

level: error

# 指标与协议

- id: METRICS.FAMILY_UNIT

when: "$.tasks[*].metrics[*]"

assert: "has_keys(name, family, unit, higher_is_better)"

level: error

- id: METRICS.UNIT_SI_OR_DIMLESS

when: "$.tasks[*].metrics[*].unit"

assert: "all_units_in_SI(value) or value in ['—','%']"

level: error

- id: PROTOCOL.MODE_ALLOWED

when: "$.tasks[*].protocol.mode"

assert: "value in ['offline','online','stream','interactive']"

level: error

- id: SIG.PARAMS

when: "$.tasks[*].significance"

assert: "has_keys(method, alpha)"

level: error

# 评分与归一化

- id: SCORE.AGG_LEVELS

when: "$.tasks[*].aggregation.levels"

assert: "contains_all(['task'])"

level: error

- id: SCORE.NORM_SCHEME

when: "$.tasks[*].aggregation or $.scoring.normalization.scheme"

assert: "value in ['zscore','minmax','fixed-anchor']"

level: warn

# 鲁棒与公平(最小检查)

- id: ROBUST.THRESHOLDS_MIN

when: "$.robustness.thresholds"

assert: "has_keys(drop_rel_max, acc_robust_min)"

level: warn

- id: FAIR.THRESHOLDS_MIN

when: "$.fairness_ethics.thresholds"

assert: "has_keys(fairness_warn, fairness_block)"

level: warn

# 计量与引用

- id: METROLOGY.SI_AND_CHECKDIM

when: "$.metrology"

assert: "units == 'SI' and check_dim == true"

level: error

- id: REFERENCES.FORMAT

when: "$.export_manifest.references[*]"

assert: "matches('^[^:]+ v\\d+\\.\\d+:[A-Z].+$')"

level: error

:STRUCT.REQUIRED、SUITE.VERSION.SEMVER、SUITE.ID_FORMAT、TASK.REQUIRED_KEYS、DATASET.REF_FORMAT、SPLITS.FROZEN_REQUIRED、SPLITS.RATIO_SUM、LEAKAGE.GUARD_ALLOWED、METRICS.FAMILY_UNIT、METRICS.UNIT_SI_OR_DIMLESS、PROTOCOL.MODE_ALLOWED、METROLOGY.SI_AND_CHECKDIM、REFERENCES.FORMAT。阻断项

V. 失败样例与诊断(节选)

fail_examples:

- case: "bad reference"

input: {export_manifest:{references:["Core.DataSpec:EXPORT"]}}

expect: {rule:"REFERENCES.FORMAT", level:"error",

fix:"Use 'EFT.WP.Core.DataSpec v1.0:EXPORT'"}

- case: "splits not frozen"

input: {tasks:[{id:"cls", io_mode:"offline", dataset_ref:"datasets/core@v1.0",

splits:{train:{frozen:false,index:"..."}, val:{frozen:true,index:"..."}, test:{frozen:true,index:"..."}, freeze_indices:false},

protocol:{mode:"offline"}, metrics:[], leakage_guard:["per-object"]}]}

expect: {rule:"SPLITS.FROZEN_REQUIRED", level:"error",

fix:"Set all splits to frozen and freeze_indices=true"}

- case: "metric without unit"

input: {tasks:[{id:"cls", io_mode:"offline", dataset_ref:"datasets/core@v1.0",

splits:{train:{frozen:true,index:"..."}, val:{frozen:true,index:"..."}, test:{frozen:true,index:"..."}, freeze_indices:true},

protocol:{mode:"offline"}, metrics:[{name:"F1_macro"}], leakage_guard:["per-object"]}]}

expect: {rule:"METRICS.FAMILY_UNIT", level:"error",

fix:"Provide family/unit/higher_is_better for each metric"}

Lint 输出要求包含 rule/path/message/fix 四要素,便于一键修复。

VI. 最小可用示例(通过 Schema 与 Lint)

suite:

id: "eift.bench.core"

title: "EIFT Core Benchmarks"

version: "v1.0"

modalities: ["text"]

tasks:

- id: "cls.binary"

io_mode: "offline"

dataset_ref: "datasets/core_cls@v1.0"

splits:

train: {frozen:true, index:"splits/train.index", sha256:"..."}

val: {frozen:true, index:"splits/val.index", sha256:"..."}

test: {frozen:true, index:"splits/test.index", sha256:"..."}

freeze_indices: true

ratio: {train:0.8, val:0.1, test:0.1}

leakage_guard: ["per-object"]

protocol: {mode:"offline", seed:1701, repeats:5}

metrics:

- {name:"F1_macro", family:"classification", unit:"—", higher_is_better:true, agg:"macro"}

aggregation: {levels:["task"], weights:{scheme:"uniform"}}

significance: {method:"bootstrap", alpha:0.05}

metrology: {units:"SI", check_dim:true}

export_manifest:

version: "v1.0"

artifacts: [{path:"benchmark.yaml", sha256:"..."}]

references:

- "EFT.WP.Core.DataSpec v1.0:EXPORT"

- "EFT.WP.Core.Metrology v1.0:check_dim"


VII. 与导出清单的耦合(规范性)

export_manifest:

artifacts:

- {path:"schema/benchmark.schema.json", sha256:"..."}

- {path:"schema/lint_rules.yaml", sha256:"..."}

- {path:"schema/examples/minimal.yaml", sha256:"..."}

references:

- "EFT.WP.Core.DataSpec v1.0:EXPORT"

- "EFT.WP.Core.Metrology v1.0:check_dim"

- "EFT.WP.Data.ModelCards v1.0:Ch.11"

必须列出并可校验;引用携带“卷名 vX.Y:锚点”。阻断件Schema 与 Lint 为

VIII. 验证接口(实现绑定 Ixx-?,统一返回)

def validate_benchmark(spec: dict) -> dict: ...

def lint_benchmark(spec: dict, rules: dict) -> dict: ...

def check_units(spec: dict) -> dict: ... # uses Core.Metrology v1.0:check_dim

def verify_references(spec: dict) -> dict: ...# regex + anchor reachability

返回 {"ok": bool, "errors":[...], "warnings":[...], "metrics":{...}},供门户/CI 使用。

IX. 本章合规自检


版权与许可(CC BY 4.0)

版权声明:除另有说明外,《能量丝理论》(含文本、图表、插图、符号与公式)的著作权由作者(“屠广林”先生)享有。
许可方式:本作品采用 Creative Commons 署名 4.0 国际许可协议(CC BY 4.0)进行许可;在注明作者与来源的前提下,允许为商业或非商业目的进行复制、转载、节选、改编与再分发。
署名格式(建议):作者:“屠广林”;作品:《能量丝理论》;来源:energyfilament.org;许可证:CC BY 4.0。

首次发布: 2025-11-11|当前版本:v5.1
协议链接:https://creativecommons.org/licenses/by/4.0/