目录文档-技术白皮书44-EFT.WP.Data.ModelCards v1.0

第15章 机器可读 Schema 与 Lint


I. 章节目的与范围

禁用中文,覆盖结构/类型/正则/依赖/引用锚点/计量校核/冻结切分/泄漏防控/公平性与鲁棒性最小检查;产物直接用于发布前阻断检查与门户自动校验。键名统一 snake_case;跨卷引用采用“卷名 vX.Y:锚点”;数学表达用反引号并加括号,Lint 规则集规范性 JSON Schema提供模型卡的

II. 规范性工件(发布必备)

artifacts:

- path: "schema/model_card.schema.json" # 规范性 JSON Schema

- path: "schema/lint_rules.yaml" # 规范性 Lint 规则(可扩展)

- path: "schema/examples/minimal.yaml" # 最小可用示例

- path: "schema/examples/full.yaml" # 全字段示例

以上工件须在 export_manifest.artifacts[] 中登记并附 sha256;引用锚点与本卷口径一致。

III. 规范性 JSON Schema(核心摘录)

JSON json
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://eift.org/schema/model_card.schema.json",
  "title": "EFT Model Card",
  "type": "object",
  "required": [
    "model_id",
    "title",
    "version",
    "task",
    "io_schema",
    "architecture",
    "training_data",
    "preprocess",
    "optimization",
    "hyperparams",
    "evaluation",
    "calibration",
    "robustness",
    "fairness",
    "safety",
    "deployment",
    "resources",
    "export_manifest"
  ],
  "properties": {
    "model_id": { "type": "string", "pattern": "^[a-z0-9_\\-\\.]+$" },
    "title": { "type": "string", "minLength": 3 },
    "version": { "type": "string", "pattern": "^v\\d+\\.\\d+(\\.\\d+)?$" },
    "task": { "type": "string" },
    "io_schema": {
      "type": "object",
      "required": [ "inputs", "outputs" ],
      "properties": {
        "inputs": { "type": "array", "items": { "type": "object" } },
        "outputs": { "type": "array", "items": { "type": "object" } },
        "batching": { "type": "object" },
        "streaming": { "type": "object" },
        "constraints": { "type": "array", "items": { "type": "object" } }
      }
    },
    "architecture": { "type": "object", "required": [ "version", "backbone", "topology" ] },
    "training_data": {
      "type": "object",
      "required": [ "refs", "splits_ref" ],
      "properties": {
        "refs": { "type": "array", "items": { "type": "string" } },
        "splits_ref": { "type": "string" }
      }
    },
    "preprocess": {
      "type": "object",
      "required": [ "pipeline_id", "steps", "parameter_lock" ],
      "properties": { "steps": { "type": "array", "items": { "type": "object" } } }
    },
    "optimization": { "type": "object" },
    "hyperparams": { "type": "object" },
    "evaluation": {
      "type": "object",
      "required": [ "protocol", "metrics" ],
      "properties": {
        "protocol": { "type": "object", "properties": { "splits": { "type": "string", "const": "frozen" } } },
        "metrics": { "type": "object" }
      }
    },
    "calibration": { "type": "object" },
    "robustness": { "type": "object" },
    "fairness": { "type": "object" },
    "safety": { "type": "object" },
    "deployment": { "type": "object" },
    "resources": {
      "type": "object",
      "properties": {
        "M_param": { "type": "number" },
        "FLOPs": { "type": "number" },
        "T_inf": { "type": "number" }
      }
    },
    "export_manifest": {
      "type": "object",
      "required": [ "version", "artifacts", "references" ],
      "properties": {
        "version": { "type": "string" },
        "artifacts": { "type": "array", "items": { "type": "object" } },
        "references": {
          "type": "array",
          "minItems": 1,
          "items": { "type": "string", "pattern": "^[^:]+ v\\d+\\.\\d+:[A-Z].+$" }
        }
      }
    },
    "see": { "type": "array", "items": { "type": "string" } }
  },
  "additionalProperties": false
}
references[] 正则强制“卷名 vX.Y:锚点”;evaluation.protocol.splits 固定为 "frozen";resources 数量单位由计量章统一校核。

IV. Lint 规则(规范性)

version: "v1.0"

rules:

# 结构与版本

- id: STRUCT.REQUIRED

when: "$"

assert: "has_keys(model_id,title,version,task,io_schema,architecture,training_data,preprocess,optimization,hyperparams,evaluation,calibration,robustness,fairness,safety,deployment,resources,export_manifest)"

level: error

- id: VERSION.SEMVER

when: "$.version"

assert: "matches('^v\\d+\\.\\d+(\\.\\d+)?$')"

level: error

# 冻结切分与引用口径

- id: EVAL.SPLITS_FROZEN

when: "$.evaluation.protocol.splits"

assert: "value == 'frozen'"

level: error

- id: REFERENCES.FORMAT

when: "$.export_manifest.references[*]"

assert: "matches('^[^:]+ v\\d+\\.\\d+:[A-Z].+$')"

level: error

# 计量与单位

- id: METROLOGY.SI_AND_CHECKDIM

when: "$.resources"

assert: "exists($.metrology) ? ($.metrology.units=='SI' and $.metrology.check_dim==true) : true"

level: error

see: ["EFT.WP.Core.Metrology v1.0:check_dim"]

# I/O 合规

- id: IO.SOFTMAX_RANGE

when: "$.io_schema.outputs[?(@.semantics=='softmax')]"

assert: "all(_.range == '[0,1]' for _ in items)"

level: error

- id: IO.SHAPE_NONEMPTY

when: "$.io_schema.inputs[*].shape"

assert: "matches('^\\(')"

level: error

# 训练数据绑定与泄漏防控

- id: TD.REFS_REQUIRED

when: "$.training_data"

assert: "len(refs) > 0 and splits_ref"

level: error

- id: TD.LEAKAGE_GUARDS

when: "$.training_data.leakage_guards"

assert: "contains_any(['per-object','per-timewindow','per-scene'])"

level: warn

# 公平性与鲁棒性最小检查

- id: FAIRNESS.THRESHOLD

when: "$.fairness.threshold"

assert: "value <= 0.10"

level: warn

- id: ROBUSTNESS.DROP_MAX

when: "$.robustness.thresholds.drop_rel_max"

assert: "value <= 0.20"

level: warn

# 文本与记号护栏

- id: MATH.NO_CHINESE

when: "$"

assert: "no_chinese_in_math()"

level: warn

- id: SYMBOLS.CONFLICT

when: "$"

assert: "not_mixed(['T_fil','T_trans']) and not_mixed(['n','n_eff'])"

level: error

阻断项以上规则与 Schema 联合执行;STRUCT.REQUIRED/EVAL.SPLITS_FROZEN/REFERENCES.FORMAT/METROLOGY.SI_AND_CHECKDIM/SYMBOLS.CONFLICT 为

V. 失败样例与诊断(节选)

fail_examples:

- case: "invalid reference"

input: {export_manifest:{references:["Core.DataSpec:EXPORT"]}}

expect: {rule:"REFERENCES.FORMAT", level:"error",

fix:"Use 'EFT.WP.Core.DataSpec v1.0:EXPORT'"}

- case: "splits not frozen"

input: {evaluation:{protocol:{splits:"custom"}}}

expect: {rule:"EVAL.SPLITS_FROZEN", level:"error",

fix:"Set evaluation.protocol.splits to 'frozen'"}

- case: "missing metrology check"

input: {resources:{M_param:25.6, FLOPs:4.1e9, T_inf:3.8}}

expect: {rule:"METROLOGY.SI_AND_CHECKDIM", level:"error",

fix:"Add metrology {units:'SI', check_dim:true}"}

修复建议Lint 报告需包含 rule、path、message 与

VI. 机器可读最小示例(通过校验)

model_id: "eift.vision.cls.resnet50"

title: "ResNet50 Classifier"

version: "v1.0"

task: "classification"

io_schema:

inputs: [{name:"image", shape:"(H,W,3)", dtype:"uint8", range:"[0,255]"}]

outputs: [{name:"probs", shape:"(K,)", dtype:"float32", range:"[0,1]", semantics:"softmax"}]

batching: {mode:"dynamic", max_batch:128}

architecture: {version:"v1.0", backbone:"resnet50", topology:[{name:"stem", type:"conv"}]}

training_data: {refs:["EFT.WP.Data.DatasetCards v1.0:Ch.11"], splits_ref:"eift.obs.demo@v1.0"}

preprocess: {pipeline_id:"img-prep-v1", steps:[], parameter_lock:true}

optimization: {objective:{name:"cross_entropy"}}

hyperparams: {batch_size:256, epochs:200}

evaluation:

protocol: {splits:"frozen", seeds:[0,1,2,3,4], repeats:5}

metrics: {classification:["f1_macro","roc_auc","ece","brier"]}

calibration: {method:"temperature", params:{t:1.7}}

robustness: {thresholds:{drop_rel_max:0.10}}

fairness: {axes:["class","region"], gap_metric:"abs_diff", threshold:0.05}

safety: {allowed_use:["academic"], prohibited_use:["surveillance"]}

deployment: {forms:["rest"], devices:["A100"], latency_targets_ms:{p50:5}}

resources: {M_param:25.6, FLOPs:4.1e9, T_inf:3.8}

metrology: {units:"SI", check_dim:true}

export_manifest:

version: "v1.0"

artifacts: [{path:"model_card.yaml", sha256:"..."}]

references: ["EFT.WP.Core.DataSpec v1.0:EXPORT","EFT.WP.Core.Metrology v1.0:check_dim"]


VII. 与导出清单的耦合(规范性)

export_manifest:

artifacts:

- {path:"schema/model_card.schema.json", sha256:"..."}

- {path:"schema/lint_rules.yaml", sha256:"..."}

- {path:"schema/examples/minimal.yaml", sha256:"..."}

references:

- "EFT.WP.Core.DataSpec v1.0:EXPORT"

- "EFT.WP.Core.Metrology v1.0:check_dim"

必须列出并可校验;引用携带“卷名 vX.Y:锚点”。阻断件Schema 与 Lint 作为

VIII. 验证接口(实现绑定 Ixx-?,返回统一结构)

def validate_model_card(card: dict) -> dict: ...

def lint_model_card(card: dict, rules: dict) -> dict: ...

def check_units(card: dict) -> dict: ... # uses Core.Metrology v1.0:check_dim

def verify_references(card: dict) -> dict: ...# regex + anchor reachability

返回形如 {"ok": bool, "errors":[...], "warnings":[...], "metrics":{...}},用于门户/CI。

IX. 本章合规自检


版权与许可(CC BY 4.0)

版权声明:除另有说明外,《能量丝理论》(含文本、图表、插图、符号与公式)的著作权由作者(“屠广林”先生)享有。
许可方式:本作品采用 Creative Commons 署名 4.0 国际许可协议(CC BY 4.0)进行许可;在注明作者与来源的前提下,允许为商业或非商业目的进行复制、转载、节选、改编与再分发。
署名格式(建议):作者:“屠广林”;作品:《能量丝理论》;来源:energyfilament.org;许可证:CC BY 4.0。

首次发布: 2025-11-11|当前版本:v5.1
协议链接:https://creativecommons.org/licenses/by/4.0/