目录 / 文档-技术白皮书 / 44-EFT.WP.Data.ModelCards v1.0
I. 模板范围与口径
。冻结切分两套可直接落盘的 YAML/JSON 模板;键名统一 snake_case;跨卷引用采用“卷名 vX.Y:锚点”;单位体系遵循 SI 与 check_dim=true;评测一律使用完整骨架模板与最小模板提供II. 最小模板(可直接复制)
# ===== Minimal Model Card (release-grade) =====
model_id: "<org.project.model>"
title: "<Human-readable Title>"
version: "v1.0"
task: "classification" # 或其它任务名
io_schema:
inputs: [{name:"<input>", shape:"(<...>)", dtype:"<uint8|float32|...>", range:"<[lo,hi]|N/A>", semantics:"<rgb|tokenized|...>"}]
outputs: [{name:"<output>", shape:"(<...>)", dtype:"float32", range:"[0,1]", semantics:"softmax"}]
batching: {mode:"dynamic", max_batch: 128}
streaming: {enabled:false}
architecture:
version: "v1.0"
backbone: "<resnet50|vit-b|...>"
topology: [{name:"stem", type:"conv"}]
training_data:
refs: ["EFT.WP.Data.DatasetCards v1.0:Ch.11"] # 仅引用
splits_ref: "<dataset_id@vX.Y>"
contamination_policy: "forbid-cross-split"
leakage_guards: ["per-object"]
preprocess:
pipeline_id: "<prep-name>"
steps: []
feature_space: {type:"dense", shape:"(<...>)", dtype:"float32", normalization:"zscore"}
parameter_lock: true
optimization:
objective: {name:"cross_entropy", reduction:"mean"}
optimizer: {name:"adamw", lr:3.0e-4}
hyperparams: {batch_size:256, epochs:200}
evaluation:
protocol: {splits:"frozen", seeds:[0,1,2,3,4], repeats:5}
metrics: {classification:["f1_macro","roc_auc","ece","brier"]}
calibration: {method:"temperature", params:{t:1.7}}
robustness: {thresholds:{drop_rel_max:0.10}}
fairness: {axes:["class","region"], gap_metric:"abs_diff", threshold:0.05}
safety:
allowed_use: ["academic","benchmark"]
prohibited_use: ["surveillance"]
deployment:
forms: ["rest"]
devices: ["A100"]
latency_targets_ms: {p50: 5}
resources: {M_param:25.6, FLOPs:4.1e9, T_inf:3.8}
metrology: {units:"SI", check_dim:true}
export_manifest:
version: "v1.0"
artifacts: [{path:"model_card.yaml", sha256:"<hex>"}]
references:
- "EFT.WP.Core.DataSpec v1.0:EXPORT"
- "EFT.WP.Core.Metrology v1.0:check_dim"
III. 完整骨架模板(发布级,含可选扩展)
# ===== Full Model Card Skeleton =====
model_id: "<org.project.model>"
title: "<Human-readable Title>"
version: "v1.0.0"
task: "<classification|retrieval|generation|asr|segmentation|detection|timeseries|...>"
io_schema:
version: "v1.0"
inputs:
- {name:"<...>", shape:"(<...>)", dtype:"<...>", range:"<[lo,hi]|N/A>", semantics:"<...>"}
outputs:
- {name:"<...>", shape:"(<...>)", dtype:"<...>", range:"[0,1]", semantics:"softmax"}
batching: {mode:"<static|dynamic>", max_batch:<int>}
streaming: {enabled:<bool>, chunk_ms:<int?>, lookahead_ms:<int?>}
constraints:
- {type:"range", target:"outputs[softmax]", rule:"[0,1] & sum==1±1e-6"}
architecture:
version: "v1.0"
backbone: "<resnet50|vit-b|conformer-xs|...>"
topology:
- {name:"<module>", type:"<conv|resblk|transformer_block|mlp|...>", repeat: <int?>, params:{<k:v>}}
positional_encoding: {type:"<sinusoidal|learned|none>", dim:<int?>}
norm: {type:"<bn|ln|rmsnorm>", eps:1e-5, affine:true}
act: {type:"<relu|gelu|silu|tanh>"}
dropout: {p: 0.1}
attention: {type:"<msa|lsa|flash>", heads:<int?>, window:<int?>}
mixed_precision: {train:"<fp16|bf16|fp32>", infer:"<fp16|bf16|fp32>", loss_scale:"<dynamic|static|none>"}
init: {scheme:"<kaiming_uniform|xavier_normal|trunc_normal>", seed:1701}
params_report: {M_param:<number>, FLOPs:<number>, T_inf:<ms>}
constraints: {grad_ckpt:true, amp_safe_ops:["conv","gemm"]}
training_data:
refs:
- "EFT.WP.Data.DatasetCards v1.0:Ch.6"
- "EFT.WP.Data.DatasetCards v1.0:Ch.11"
- "EFT.WP.Data.DatasetCards v1.0:Ch.12"
splits_ref: "<dataset_id@vX.Y>"
sampling_binding:
strategy: "<random|stratified|time-based|spatial-tiles|systematic>"
strata: [{by:"<class|region|snr_bin>", buckets: {"A":100,"B":200}}]
weights: {class:"inverse_freq"}
contamination_policy: "forbid-cross-split"
leakage_guards: ["per-object","per-timewindow","per-scene"]
preprocess:
pipeline_id: "<prep-name>"
steps:
- {name:"<clean|filter|normalize|standardize|resample|impute|encode|tokenize|stft|feature_map|pca|custom>",
enabled:true, idempotent:true, params:{<...>}, inputs:["<...>"], outputs:["<...>"]}
feature_space: {type:"<dense|sparse|sequence|image|audio_spec|tabular|embedding>", shape:"(<...>)", dtype:"<...>", normalization:"<zscore|minmax|robust|unit-norm|none>"}
parameter_lock: true
randomness: {seed:1701, libraries:{numpy:"1.26.4"}}
environment: {os:"ubuntu22.04", toolchain:["python3.11"], containers:["ghcr.io/eift/model-prep:1.0.2"]}
audits: ["nan-check","range-check","leakage","drift"]
path_dependence: # 如涉及 T_arr 等路径量则启用
applies_to: ["T_arr"]
delta_form: "const-factor" # or "general"
path: "gamma(ell)"
measure: "d ell"
see: ["EFT.WP.Core.Equations v1.1:S20-1","EFT.WP.Core.Metrology v1.0:check_dim"]
optimization:
objective: {name:"<cross_entropy|mse|mae|nll|ctc|triplet|contrastive|custom>", reduction:"<mean|sum|none>"}
regularization: {weight_decay:0.05, grad_clip:{type:"norm", value:1.0}}
optimizer: {name:"<adamw|sgd|...>", lr:3.0e-4, betas:[0.9,0.999], eps:1.0e-8, weight_decay:0.05}
scheduler: {name:"<cosine|step|...>", warmup:{steps:500, mode:"linear"}}
hyperparams:
batch_size: 256
accum_steps: 1
epochs: 200
search_space: {lr:{type:"loguniform", low:1.0e-5, high:1.0e-3}}
evaluation:
protocol:
splits: "frozen"
seeds: [0,1,2,3,4]
repeats: 5
significance: {test:"permutation|bootstrap", alpha:0.05}
ci: {method:"bootstrap-bca", level:0.95, samples:1000}
metrics:
classification: ["f1_macro","roc_auc","ece","brier"]
detection: ["mAP@0.50:0.95","mAP@0.50"]
regression: ["rmse","mae","mape","nll"]
calibration:
method: "temperature"
params: {t: 1.7}
eval: {report:["ece","brier","calibration_curve"], ece_bins:15}
uncertainty:
model: "<GUM|linear|montecarlo|bayesian>"
components:
- {name:"<...>", type:"<random|systematic>", value:<...>, unit:"<...>", distribution:"<normal|uniform|...>", coverage:{k:<...>}}
correlation: {posture:"<groups|covariance>"}
propagation: {rule:"<rss|linear|montecarlo|bayesian>"}
coverage_policy: {target_p:0.95, k:2.0}
robustness:
shift_tests: [{name:"snr_drop", severity:[3,6,9]}]
thresholds: {drop_rel_max: 0.10}
fairness:
axes: ["class","region"]
gap_metric: "abs_diff"
threshold: 0.05
ethics:
intended_use: ["academic","benchmark"]
prohibited_use: ["surveillance","biometric_identification"]
disclosures: {human_in_the_loop:true}
usage:
regional_compliance: ["EU-GDPR"]
access_control: {roles:["owner","maintainer","reader"], enforcement:["signed-url","token"]}
deployment:
forms: ["rest","grpc"]
devices: ["A100","CPU-AVX2"]
latency_targets_ms: {p50: 5, p99: 20}
concurrency: 512
resources: {M_param:<number>, FLOPs:<number>, T_inf:<ms>}
metrology: {units:"SI", check_dim:true}
export_manifest:
version: "v1.0"
artifacts:
- {path:"model_card.yaml", sha256:"<hex>"}
- {path:"eval/summary.csv", sha256:"<hex>"}
references:
- "EFT.WP.Core.DataSpec v1.0:EXPORT"
- "EFT.WP.Core.Metrology v1.0:check_dim"
IV. 字段占位符与最小正则(速查)
- model_id: ^[a-z0-9_\\-\\.]+$;version: ^v\\d+\\.\\d+(\\.\\d+)?$;
- export_manifest.references[*]: ^[^:]+ v\\d+\\.\\d+:[A-Z].+$;
- evaluation.protocol.splits: 固定 "frozen";softmax 输出 range="[0,1]" 且 sum==1±1e-6;
- 计量:metrology.units="SI" 与 check_dim=true。
V. 导出清单模板(规范性)
export_manifest:
version: "v1.0"
artifacts:
- {path:"model_card.yaml", sha256:"<hex>"}
- {path:"eval/summary.csv", sha256:"<hex>"}
- {path:"robustness/summary.csv", sha256:"<hex>"}
- {path:"fairness/by_axis_metrics.csv", sha256:"<hex>"}
references:
- "EFT.WP.Core.DataSpec v1.0:EXPORT"
- "EFT.WP.Core.Metrology v1.0:check_dim"
VI. 发布前阻断自检(清单)
- 结构/必填:本卷第15章 Schema 必填键齐备;evaluation.protocol.splits="frozen"。
- 引用/版本:export_manifest.references[] 使用“卷名 vX.Y:锚点”,无短码与缺版本。
- 计量/单位:units="SI"、check_dim=true;性能/能耗/时间等单位一致;
- 路径量:如含 T_arr,已登记 delta_form/path/measure 并通过校核。
- 泄漏/公平/鲁棒:泄漏审计通过;公平/鲁棒阈值设定并满足;
- 工件可验:导出清单内所有文件具 sha256,可复现。
VII. 机器可读空白模板(无注释版,CI 友好)
model_id: ""
title: ""
version: "v1.0"
task: ""
io_schema: {inputs: [], outputs: [], batching:{mode:"dynamic", max_batch:0}, streaming:{enabled:false}}
architecture: {version:"v1.0", backbone:"", topology: []}
training_data: {refs: [], splits_ref: ""}
preprocess: {pipeline_id:"", steps: [], feature_space:{type:"", shape:"", dtype:"", normalization:"none"}, parameter_lock:true}
optimization: {objective:{name:"", reduction:"mean"}}
hyperparams: {batch_size:0, epochs:0}
evaluation: {protocol:{splits:"frozen", seeds:[], repeats:1}, metrics:{}}
calibration: {method:"", params:{}}
uncertainty: {}
robustness: {}
fairness: {axes:[], gap_metric:"abs_diff", threshold:0.05}
ethics: {intended_use:[], prohibited_use:[]}
usage: {regional_compliance:[], access_control:{roles:[], enforcement:[]}}
deployment: {forms:[], devices:[], latency_targets_ms:{}}
resources: {M_param:0, FLOPs:0, T_inf:0}
metrology: {units:"SI", check_dim:true}
export_manifest: {version:"v1.0", artifacts: [], references:["EFT.WP.Core.DataSpec v1.0:EXPORT","EFT.WP.Core.Metrology v1.0:check_dim"]}
版权与许可(CC BY 4.0)
版权声明:除另有说明外,《能量丝理论》(含文本、图表、插图、符号与公式)的著作权由作者(“屠广林”先生)享有。
许可方式:本作品采用 Creative Commons 署名 4.0 国际许可协议(CC BY 4.0)进行许可;在注明作者与来源的前提下,允许为商业或非商业目的进行复制、转载、节选、改编与再分发。
署名格式(建议):作者:“屠广林”;作品:《能量丝理论》;来源:energyfilament.org;许可证:CC BY 4.0。
首次发布: 2025-11-11|当前版本:v5.1
协议链接:https://creativecommons.org/licenses/by/4.0/