44-EFT.WP.Data.ModelCards v1.0 | 第17章示例与最佳实践

目录／文档-技术白皮书（V5.05）／ 44-EFT.WP.Data.ModelCards v1.0

第17章示例与最佳实践

I. 章节目的与范围

的端到端示例，覆盖任务与 I/O、架构与参数、训练数据与采样绑定、预处理与特征工程、目标函数与优化/超参、评测协议与指标、校准与不确定度、鲁棒性/公平性/伦理与使用限制，以及 Schema & Lint 与 API 落地；并给出常见失败模式—修复策略与发布前自检清单。完整模型卡到最小模型卡提供从

II. 最小可用模型卡（通过 Schema 与 Lint 的发布级示例）

model_id: "eift.vision.cls.resnet50"

title: "ResNet50 Classifier"

version: "v1.0"

task: "classification"

io_schema:

inputs: [{name:"image", shape:"(H,W,3)", dtype:"uint8", range:"[0,255]", semantics:"rgb"}]

outputs: [{name:"probs", shape:"(K,)", dtype:"float32", range:"[0,1]", semantics:"softmax"}]

batching: {mode:"dynamic", max_batch:128}

streaming: {enabled:false}

architecture:

version: "v1.0"

backbone: "resnet50"

topology:

- {name:"stem", type:"conv", params:{out:64, k:7, s:2, norm:"bn", act:"relu"}}

- {name:"stage1",type:"resblk", repeat:3, params:{out:256, bottleneck:true}}

- {name:"stage2",type:"resblk", repeat:4, params:{out:512}}

- {name:"head", type:"linear", params:{out_dim:1000}}

training_data:

refs:

- "EFT.WP.Data.DatasetCards v1.0:Ch.11"

splits_ref: "eift.obs.demo@v1.0"

contamination_policy: "forbid-cross-split"

leakage_guards: ["per-object"]

preprocess:

pipeline_id: "img-prep-v1"

steps:

- {name:"standardize", enabled:true, idempotent:true,

params:{type:"zscore", mean:[0.485,0.456,0.406], std:[0.229,0.224,0.225], stats_from:"train-only"},

inputs:["image"], outputs:["image_std"]}

feature_space: {type:"dense", shape:"(H',W',C')", dtype:"float32", normalization:"zscore"}

parameter_lock: true

optimization:

objective: {name:"cross_entropy", reduction:"mean"}

optimizer: {name:"adamw", lr:3.0e-4, betas:[0.9,0.999], eps:1.0e-8, weight_decay:0.05}

scheduler: {name:"cosine", warmup:{steps:500, mode:"linear"}}

hyperparams: {batch_size:256, epochs:200}

evaluation:

protocol:

splits: "frozen"

seeds: [0,1,2,3,4]

repeats: 5

ci: {method:"bootstrap-bca", level:0.95, samples:1000}

significance: {test:"permutation", alpha:0.05}

metrics:

classification: ["f1_macro","roc_auc","ece","brier"]

calibration:

method: "temperature"

params: {t: 1.7}

robustness:

thresholds: {drop_rel_max: 0.10}

fairness:

axes: ["class","region"]

gap_metric: "abs_diff"

threshold: 0.05

safety:

allowed_use: ["academic","benchmark"]

prohibited_use: ["surveillance"]

deployment:

forms: ["rest"]

devices: ["A100"]

latency_targets_ms: {p50: 5}

resources:

M_param: 25.6

FLOPs: 4.1e9

T_inf: 3.8

metrology: {units:"SI", check_dim:true}

export_manifest:

version: "v1.0"

artifacts:

- {path:"model_card.yaml", sha256:"..."}

- {path:"eval/summary.csv", sha256:"..."}

references:

- "EFT.WP.Core.DataSpec v1.0:EXPORT"

- "EFT.WP.Core.Metrology v1.0:check_dim"

III. 含路径量（含 T_arr）的完整示例（多模态 + 鲁棒/公平）

model_id: "eift.radio.audio.asr_toa"

title: "ASR + TOA Estimator"

version: "v1.2"

task: ["asr","forecasting"]

io_schema:

modes:

asr:

inputs: [{name:"waveform", shape:"(T,)", dtype:"float32", range:"[-1,1]", semantics:"pcm"}]

outputs: [{name:"text", shape:"()", dtype:"string", range:"N/A", semantics:"utf-8"}]

forecasting:

inputs:

- {name:"series", shape:"(T,C)", dtype:"float32", semantics:"zscore"}

- {name:"time_index", shape:"(T,)", dtype:"int64", semantics:"unix_ms"}

outputs:

- {name:"y_hat", shape:"(H,C)", dtype:"float32", semantics:"forecast"}

- {name:"q_hat", shape:"(H,C,Q)", dtype:"float32", semantics:"quantiles"}

architecture:

version: "v1.0"

backbone: "conformer-xs"

topology:

- {name:"enc", type:"conformer", repeat:12, params:{dim:256, heads:4, ff_mult:4, act:"silu", norm:"ln"}}

- {name:"toa_head", type:"mlp", params:{hidden:512, out_dim:1}}

params_report: {M_param: 32.4, FLOPs: 9.6e9, T_inf: 12.1}

training_data:

refs:

- "EFT.WP.Data.DatasetCards v1.0:Ch.6"

- "EFT.WP.Data.DatasetCards v1.0:Ch.11"

- "EFT.WP.Data.DatasetCards v1.0:Ch.12"

splits_ref: "eift.radio.toa-set@v1.2"

sampling_binding:

strategy: "stratified"

strata: [{by:"snr_bin", buckets:{"7-10":300,"10-20":500,"20+":700}}]

contamination_policy: "forbid-cross-split"

leakage_guards: ["per-timewindow"]

preprocess:

pipeline_id: "toa-prep-v2"

steps:

- {name:"rfi_clean", enabled:true, idempotent:true, params:{method:"spectral-kurtosis", window:256, thr_sigma:5}}

- {name:"stft", enabled:true, idempotent:true, params:{win:512, hop:160, window:"hann"}}

feature_space: {type:"audio_spec", shape:"(F,T)", dtype:"float32", normalization:"zscore"}

parameter_lock: true

path_dependence:

applies_to: ["T_arr"]

delta_form: "const-factor"

path: "gamma(ell)"

measure: "d ell"

see: ["EFT.WP.Core.Equations v1.1:S20-1","EFT.WP.Core.Metrology v1.0:check_dim"]

optimization:

objective:

name: "mse"

reduction: "mean"

formula: "L(θ) = ( E_{(x,y)∼D} [ ( y - f_θ(x) )^2 ] )"

optimizer: {name:"adamw", lr:2.0e-4, betas:[0.9,0.999], weight_decay:0.01}

scheduler: {name:"cosine", warmup:{steps:2000, mode:"linear"}}

hyperparams: {batch_size:64, epochs:120}

evaluation:

protocol:

splits: "frozen"

seeds: [0,1,2,3,4]

repeats: 5

significance: {test:"bootstrap", alpha:0.05}

ci: {method:"bootstrap-bca", level:0.95, samples:2000}

metrics:

timeseries: ["rmse","mae","qloss@{0.1,0.5,0.9}"]

classification: ["ece","brier"] # 对 ASR 置信输出校准

calibration:

method: "temperature"

params: {t:1.6}

eval: {report:["ece","brier","calibration_curve"], ece_bins:15}

uncertainty:

model: "linear"

components:

- {name:"thermal", type:"random", value:2.1, unit:"K", distribution:"normal", coverage:{k:1.0}}

- {name:"iono", type:"systematic", value:0.7e-9, unit:"s", distribution:"normal", coverage:{k:2.0}, corr_group:"path"}

correlation: {posture:"groups", groups:[{name:"path", pairwise:"rho=0.5"}]}

propagation: {rule:"linear", linearization:"first-order"}

coverage_policy: {target_p:0.95, k:2.0}

robustness:

shift_tests:

- {name:"snr_drop", severity:[3,6,9], policy:"additive-noise"}

- {name:"spec_notch",bands:[["0.3","0.5"],["0.6","0.7"]], unit:"fraction"}

thresholds: {drop_rel_max:0.10}

fairness:

axes: ["region","device"]

gap_metric: "abs_diff"

threshold: 0.05

deployment:

forms: ["grpc"]

devices: ["A100","CPU-AVX2"]

latency_targets_ms: {p50: 20, p99: 80}

concurrency: 256

resources: {M_param:32.4, FLOPs:9.6e9, T_inf:12.1}

metrology: {units:"SI", check_dim:true}

export_manifest:

version: "v1.2"

artifacts:

- {path:"model_card.yaml", sha256:"..."}

- {path:"eval/summary.csv", sha256:"..."}

- {path:"robustness/acc_vs_snr.csv", sha256:"..."}

references:

- "EFT.WP.Core.DataSpec v1.0:EXPORT"

- "EFT.WP.Core.Metrology v1.0:check_dim"

- "EFT.WP.Core.Equations v1.1:S20-1"

IV. 常见失败模式 → 修复策略（对照 Lint/Schema）

引用不合规："Core.DataSpec:EXPORT" → 修复："EFT.WP.Core.DataSpec v1.0:EXPORT"。
评测切分未冻结：evaluation.protocol.splits:"custom" → 修复：改为 "frozen"，并回指数据卡冻结索引。
softmax 输出未限定：range:"(-∞,+∞)" → 修复：range:"[0,1]" 且约束 sum==1±1e-6。
计量未声明：缺少 metrology 或 check_dim:false → 修复：metrology:{units:"SI", check_dim:true}。
路径量未登记：使用 T_arr 但无 delta_form/path/measure → 修复：补齐三字段，并采用两种等价式之一进行一致性校验。
泄漏：对象/时间窗跨 split → 修复：设置 leakage_guards 并重建索引；在 audits 与评测报告中记录。
公平性阈值缺失：未设 threshold → 修复：显式设定并在评测中给出置信区间与显著性。

V. 最佳实践清单（发布必读）

引用与版本：所有跨卷引用使用“卷名 vX.Y:锚点”，公开材料锁定到稳定线（如 v1.* 或次版本）。
切分与随机性：仅使用冻结切分；给出 seeds/repeats 与 CI/显著性，报告批量与设备口径。
计量一致：所有单位/量纲通过 check_dim；合并统计与计量不确定度前先做单位归一。
I/O 对齐：io_schema 与部署端点完全一致；多任务提供命名子模式。
鲁棒/公平：设定阻断阈值（如 drop_rel_max、gap 阈值），并在导出物中附曲线与分层表。
工件可追溯：将日志、锁定配置、评测表/图、告警规则等纳入 export_manifest.artifacts[] 并提供 sha256。

VI. 与验证 API 的联动示例

# 结构 + 跨卷校验

curl -s -X POST https://api.eift.org/api/v1/validate_model_card \

-H "Authorization: Bearer <token>" \

-H "Content-Type: application/json" \

-H "x-eift-idempotency: a0d7b6c4-9478-4e4a-9f13-8c1b9f77f111" \

-d @model_card.json

# 计量一致性

curl -s -X POST https://api.eift.org/api/v1/check_units \

-H "Authorization: Bearer <token>" -H "Content-Type: application/json" \

-d @model_card.json

# 鲁棒/公平评测

curl -s -X POST https://api.eift.org/api/v1/robustness_eval -d @model_card.json

curl -s -X POST https://api.eift.org/api/v1/fairness_eval -d @model_card.json

VII. 本章合规自检

示例模型卡可直接通过第15章 Schema 与 Lint，并在 export_manifest 中列出引用锚点与工件 sha256。
涉及 T_arr 的示例已登记 delta_form/path/measure 且通过 check_dim；数学表达使用反引号与括号、不含中文。
io_schema、评测协议与部署端点一致；冻结切分、显著性与置信区间报告齐备。
鲁棒性与公平性条目设置阈值并通过；对抗设置（如启用）含范数/步数/ε/重启。
验证 API 调用示例可复现，阻断项清零后方可发布。

版权与许可：除另有说明外，《能量丝理论》（含文本、图表、插图、符号与公式）的著作权由作者（屠广林）享有。
许可方式（CC BY 4.0）：在注明作者与来源的前提下，允许复制、转载、节选、改编与再分发。
署名格式（建议）：作者：屠广林｜作品：《能量丝理论》｜来源：energyfilament.org｜许可证：CC BY 4.0
验证召集： 作者独立自费、无雇主无资助；下一阶段将优先在最愿意公开讨论、公开复现、公开挑错的环境中推进落地，不限国家。欢迎各国媒体与同行抓住窗口组织验证，并与我们联系。
版本信息： 首次发布：2025-11-11 ｜当前版本：v6.0+5.05