目录 / 文档-技术白皮书 / 44-EFT.WP.Data.ModelCards v1.0
I. 章节目的与范围
的端到端示例,覆盖任务与 I/O、架构与参数、训练数据与采样绑定、预处理与特征工程、目标函数与优化/超参、评测协议与指标、校准与不确定度、鲁棒性/公平性/伦理与使用限制,以及 Schema & Lint 与 API 落地;并给出常见失败模式—修复策略与发布前自检清单。完整模型卡到最小模型卡提供从II. 最小可用模型卡(通过 Schema 与 Lint 的发布级示例)
model_id: "eift.vision.cls.resnet50"
title: "ResNet50 Classifier"
version: "v1.0"
task: "classification"
io_schema:
inputs: [{name:"image", shape:"(H,W,3)", dtype:"uint8", range:"[0,255]", semantics:"rgb"}]
outputs: [{name:"probs", shape:"(K,)", dtype:"float32", range:"[0,1]", semantics:"softmax"}]
batching: {mode:"dynamic", max_batch:128}
streaming: {enabled:false}
architecture:
version: "v1.0"
backbone: "resnet50"
topology:
- {name:"stem", type:"conv", params:{out:64, k:7, s:2, norm:"bn", act:"relu"}}
- {name:"stage1",type:"resblk", repeat:3, params:{out:256, bottleneck:true}}
- {name:"stage2",type:"resblk", repeat:4, params:{out:512}}
- {name:"head", type:"linear", params:{out_dim:1000}}
training_data:
refs:
- "EFT.WP.Data.DatasetCards v1.0:Ch.11"
splits_ref: "eift.obs.demo@v1.0"
contamination_policy: "forbid-cross-split"
leakage_guards: ["per-object"]
preprocess:
pipeline_id: "img-prep-v1"
steps:
- {name:"standardize", enabled:true, idempotent:true,
params:{type:"zscore", mean:[0.485,0.456,0.406], std:[0.229,0.224,0.225], stats_from:"train-only"},
inputs:["image"], outputs:["image_std"]}
feature_space: {type:"dense", shape:"(H',W',C')", dtype:"float32", normalization:"zscore"}
parameter_lock: true
optimization:
objective: {name:"cross_entropy", reduction:"mean"}
optimizer: {name:"adamw", lr:3.0e-4, betas:[0.9,0.999], eps:1.0e-8, weight_decay:0.05}
scheduler: {name:"cosine", warmup:{steps:500, mode:"linear"}}
hyperparams: {batch_size:256, epochs:200}
evaluation:
protocol:
splits: "frozen"
seeds: [0,1,2,3,4]
repeats: 5
ci: {method:"bootstrap-bca", level:0.95, samples:1000}
significance: {test:"permutation", alpha:0.05}
metrics:
classification: ["f1_macro","roc_auc","ece","brier"]
calibration:
method: "temperature"
params: {t: 1.7}
robustness:
thresholds: {drop_rel_max: 0.10}
fairness:
axes: ["class","region"]
gap_metric: "abs_diff"
threshold: 0.05
safety:
allowed_use: ["academic","benchmark"]
prohibited_use: ["surveillance"]
deployment:
forms: ["rest"]
devices: ["A100"]
latency_targets_ms: {p50: 5}
resources:
M_param: 25.6
FLOPs: 4.1e9
T_inf: 3.8
metrology: {units:"SI", check_dim:true}
export_manifest:
version: "v1.0"
artifacts:
- {path:"model_card.yaml", sha256:"..."}
- {path:"eval/summary.csv", sha256:"..."}
references:
- "EFT.WP.Core.DataSpec v1.0:EXPORT"
- "EFT.WP.Core.Metrology v1.0:check_dim"
III. 含路径量(含 T_arr)的完整示例(多模态 + 鲁棒/公平)
model_id: "eift.radio.audio.asr_toa"
title: "ASR + TOA Estimator"
version: "v1.2"
task: ["asr","forecasting"]
io_schema:
modes:
asr:
inputs: [{name:"waveform", shape:"(T,)", dtype:"float32", range:"[-1,1]", semantics:"pcm"}]
outputs: [{name:"text", shape:"()", dtype:"string", range:"N/A", semantics:"utf-8"}]
forecasting:
inputs:
- {name:"series", shape:"(T,C)", dtype:"float32", semantics:"zscore"}
- {name:"time_index", shape:"(T,)", dtype:"int64", semantics:"unix_ms"}
outputs:
- {name:"y_hat", shape:"(H,C)", dtype:"float32", semantics:"forecast"}
- {name:"q_hat", shape:"(H,C,Q)", dtype:"float32", semantics:"quantiles"}
architecture:
version: "v1.0"
backbone: "conformer-xs"
topology:
- {name:"enc", type:"conformer", repeat:12, params:{dim:256, heads:4, ff_mult:4, act:"silu", norm:"ln"}}
- {name:"toa_head", type:"mlp", params:{hidden:512, out_dim:1}}
params_report: {M_param: 32.4, FLOPs: 9.6e9, T_inf: 12.1}
training_data:
refs:
- "EFT.WP.Data.DatasetCards v1.0:Ch.6"
- "EFT.WP.Data.DatasetCards v1.0:Ch.11"
- "EFT.WP.Data.DatasetCards v1.0:Ch.12"
splits_ref: "eift.radio.toa-set@v1.2"
sampling_binding:
strategy: "stratified"
strata: [{by:"snr_bin", buckets:{"7-10":300,"10-20":500,"20+":700}}]
contamination_policy: "forbid-cross-split"
leakage_guards: ["per-timewindow"]
preprocess:
pipeline_id: "toa-prep-v2"
steps:
- {name:"rfi_clean", enabled:true, idempotent:true, params:{method:"spectral-kurtosis", window:256, thr_sigma:5}}
- {name:"stft", enabled:true, idempotent:true, params:{win:512, hop:160, window:"hann"}}
feature_space: {type:"audio_spec", shape:"(F,T)", dtype:"float32", normalization:"zscore"}
parameter_lock: true
path_dependence:
applies_to: ["T_arr"]
delta_form: "const-factor"
path: "gamma(ell)"
measure: "d ell"
see: ["EFT.WP.Core.Equations v1.1:S20-1","EFT.WP.Core.Metrology v1.0:check_dim"]
optimization:
objective:
name: "mse"
reduction: "mean"
formula: "L(θ) = ( E_{(x,y)∼D} [ ( y - f_θ(x) )^2 ] )"
optimizer: {name:"adamw", lr:2.0e-4, betas:[0.9,0.999], weight_decay:0.01}
scheduler: {name:"cosine", warmup:{steps:2000, mode:"linear"}}
hyperparams: {batch_size:64, epochs:120}
evaluation:
protocol:
splits: "frozen"
seeds: [0,1,2,3,4]
repeats: 5
significance: {test:"bootstrap", alpha:0.05}
ci: {method:"bootstrap-bca", level:0.95, samples:2000}
metrics:
timeseries: ["rmse","mae","qloss@{0.1,0.5,0.9}"]
classification: ["ece","brier"] # 对 ASR 置信输出校准
calibration:
method: "temperature"
params: {t:1.6}
eval: {report:["ece","brier","calibration_curve"], ece_bins:15}
uncertainty:
model: "linear"
components:
- {name:"thermal", type:"random", value:2.1, unit:"K", distribution:"normal", coverage:{k:1.0}}
- {name:"iono", type:"systematic", value:0.7e-9, unit:"s", distribution:"normal", coverage:{k:2.0}, corr_group:"path"}
correlation: {posture:"groups", groups:[{name:"path", pairwise:"rho=0.5"}]}
propagation: {rule:"linear", linearization:"first-order"}
coverage_policy: {target_p:0.95, k:2.0}
robustness:
shift_tests:
- {name:"snr_drop", severity:[3,6,9], policy:"additive-noise"}
- {name:"spec_notch",bands:[["0.3","0.5"],["0.6","0.7"]], unit:"fraction"}
thresholds: {drop_rel_max:0.10}
fairness:
axes: ["region","device"]
gap_metric: "abs_diff"
threshold: 0.05
deployment:
forms: ["grpc"]
devices: ["A100","CPU-AVX2"]
latency_targets_ms: {p50: 20, p99: 80}
concurrency: 256
resources: {M_param:32.4, FLOPs:9.6e9, T_inf:12.1}
metrology: {units:"SI", check_dim:true}
export_manifest:
version: "v1.2"
artifacts:
- {path:"model_card.yaml", sha256:"..."}
- {path:"eval/summary.csv", sha256:"..."}
- {path:"robustness/acc_vs_snr.csv", sha256:"..."}
references:
- "EFT.WP.Core.DataSpec v1.0:EXPORT"
- "EFT.WP.Core.Metrology v1.0:check_dim"
- "EFT.WP.Core.Equations v1.1:S20-1"
IV. 常见失败模式 → 修复策略(对照 Lint/Schema)
- 引用不合规:"Core.DataSpec:EXPORT" → 修复:"EFT.WP.Core.DataSpec v1.0:EXPORT"。
- 评测切分未冻结:evaluation.protocol.splits:"custom" → 修复:改为 "frozen",并回指数据卡冻结索引。
- softmax 输出未限定:range:"(-∞,+∞)" → 修复:range:"[0,1]" 且约束 sum==1±1e-6。
- 计量未声明:缺少 metrology 或 check_dim:false → 修复:metrology:{units:"SI", check_dim:true}。
- 路径量未登记:使用 T_arr 但无 delta_form/path/measure → 修复:补齐三字段,并采用两种等价式之一进行一致性校验。
- 泄漏:对象/时间窗跨 split → 修复:设置 leakage_guards 并重建索引;在 audits 与评测报告中记录。
- 公平性阈值缺失:未设 threshold → 修复:显式设定并在评测中给出置信区间与显著性。
V. 最佳实践清单(发布必读)
- 引用与版本:所有跨卷引用使用“卷名 vX.Y:锚点”,公开材料锁定到稳定线(如 v1.* 或次版本)。
- 切分与随机性:仅使用冻结切分;给出 seeds/repeats 与 CI/显著性,报告批量与设备口径。
- 计量一致:所有单位/量纲通过 check_dim;合并统计与计量不确定度前先做单位归一。
- I/O 对齐:io_schema 与部署端点完全一致;多任务提供命名子模式。
- 鲁棒/公平:设定阻断阈值(如 drop_rel_max、gap 阈值),并在导出物中附曲线与分层表。
- 工件可追溯:将日志、锁定配置、评测表/图、告警规则等纳入 export_manifest.artifacts[] 并提供 sha256。
VI. 与验证 API 的联动示例
# 结构 + 跨卷校验
curl -s -X POST https://api.eift.org/api/v1/validate_model_card \
-H "Authorization: Bearer <token>" \
-H "Content-Type: application/json" \
-H "x-eift-idempotency: a0d7b6c4-9478-4e4a-9f13-8c1b9f77f111" \
-d @model_card.json
# 计量一致性
curl -s -X POST https://api.eift.org/api/v1/check_units \
-H "Authorization: Bearer <token>" -H "Content-Type: application/json" \
-d @model_card.json
# 鲁棒/公平评测
curl -s -X POST https://api.eift.org/api/v1/robustness_eval -d @model_card.json
curl -s -X POST https://api.eift.org/api/v1/fairness_eval -d @model_card.json
VII. 本章合规自检
- 示例模型卡可直接通过第15章 Schema 与 Lint,并在 export_manifest 中列出引用锚点与工件 sha256。
- 涉及 T_arr 的示例已登记 delta_form/path/measure 且通过 check_dim;数学表达使用反引号与括号、不含中文。
- io_schema、评测协议与部署端点一致;冻结切分、显著性与置信区间报告齐备。
- 鲁棒性与公平性条目设置阈值并通过;对抗设置(如启用)含范数/步数/ε/重启。
- 验证 API 调用示例可复现,阻断项清零后方可发布。
版权与许可(CC BY 4.0)
版权声明:除另有说明外,《能量丝理论》(含文本、图表、插图、符号与公式)的著作权由作者(“屠广林”先生)享有。
许可方式:本作品采用 Creative Commons 署名 4.0 国际许可协议(CC BY 4.0)进行许可;在注明作者与来源的前提下,允许为商业或非商业目的进行复制、转载、节选、改编与再分发。
署名格式(建议):作者:“屠广林”;作品:《能量丝理论》;来源:energyfilament.org;许可证:CC BY 4.0。
首次发布: 2025-11-11|当前版本:v5.1
协议链接:https://creativecommons.org/licenses/by/4.0/