目录文档-技术白皮书18-EFT.WP.Methods.CrossStats v1.0

附录C 清单模板与样例(stats manifest)


一句话目标:提供跨统计产出的标准发布清单 manifest.stats 之最小键集、字段语义、校验与多场景样例,确保估计、检验、漂移、实验与审计在不同系统间可复现、可追溯、可对齐。


I. 范围与对象


II. 最小键集(必须存在)


III. 字段与类型说明

  1. schema_version : string(如 "1.0.0")
  2. book_ref : string(固定 "EFT.WP.Methods.CrossStats v1.0")
  3. TraceID : string(跨系统追溯 ID)
  4. repro_hash : string(hash_sha256(code+params+data_fingerprint))
  5. signature : string(发布方签名)
  6. timebase : object
    • tau_mono_range : [int,int](内部单调时基区间)
    • ts_range : [string,string](ISO8601 对外发布区间)
    • offset/skew/J : {offset: double, skew: double, J: double}
  7. arrival.two_forms : {delta_form: double, tol_Tarr: double}
  8. window.Delta_t : string(统计窗口,如 "PT24H")
  9. dataset : {N: int, N_eff?: double, sampling?: string}
  10. weights : {W_norm: double, cap_w?: double, p_trim?: double}
  11. metrics.core[*] : {name: string, est: double, se?: double, ci?: [double,double], posterior?: {q05: double, q50: double, q95: double}, unit?: string, dim?: string, notes?: string}
  12. metrics.drift? : {W1?: double, KL?: double, psi?: double}
  13. metrics.ab? : {lift: double, se: double, ci: [double,double], mde?: double, alpha_spent?: double}
  14. metrics.causal? : {ATE: double, U?: double, SMD_max?: double, overlap_min?: double}
  15. contracts[*] : {id: string, status: string, severity: string, evidence: object}
  16. actions[*] : {policy_id: string, decision: string, reason: string, at: string}
  17. provenance : {data_uri: string, code_uri: string, env: {python?: string, pkg?: object}}

IV. 模板:最小可发布清单

{

"schema_version": "1.0.0",

"book_ref": "EFT.WP.Methods.CrossStats v1.0",

"release_tag": "stats-prod-2025-08-31T12:00Z",

"TraceID": "trc_01HXYZ...",

"repro_hash": "sha256:REPRO_HASH",

"signature": "SIG_BASE64",

"timebase": {

"tau_mono_range": [1725062400, 1725148800],

"ts_range": ["2025-08-31T00:00:00Z", "2025-09-01T00:00:00Z"],

"offset/skew/J": {"offset": 0.0012, "skew": 2.3e-6, "J": 0.0031}

},

"arrival": {

"two_forms": {

"delta_form": 2.1e-6,

"tol_Tarr": 5.0e-6

}

},

"window": {"Delta_t": "PT24H"},

"dataset": {"N": 125034},

"weights": {"W_norm": 0.9996},

"metrics": {

"core": [

{"name": "conversion_rate", "est": 0.0842, "ci": [0.0831, 0.0853], "unit": "1", "dim": "[]"},

{"name": "avg_order_value", "est": 56.73, "se": 0.42, "unit": "USD", "dim": "[M]"}

]

},

"contracts": [

{"id": "C30-000", "status": "pass", "severity": "info", "evidence": {"checks": 128}},

{"id": "C30-001", "status": "pass", "severity": "info"},

{"id": "C30-004", "status": "pass", "severity": "info", "evidence": {"W_norm": 0.9996}},

{"id": "C30-342", "status": "pass", "severity": "info", "evidence": {"coverage_rate": 0.949}}

],

"actions": [

{"policy_id": "SC-SLO-01", "decision": "ship", "reason": "all guardrails pass", "at": "2025-08-31T12:01:05Z"}

],

"provenance": {

"data_uri": "s3://bucket/ds/2025-08-31/",

"code_uri": "git+https://repo/commit/abcdef",

"env": {"python": "3.11.5", "pkg": {"numpy": "2.0.1", "scipy": "1.14.0"}}

}

}


V. 样例A:A/B 在线实验(序贯 alpha,护栏 SLO)

{

"schema_version": "1.0.0",

"book_ref": "EFT.WP.Methods.CrossStats v1.0",

"release_tag": "ab-exp-42-int-07",

"TraceID": "trc_AB42_07",

"repro_hash": "sha256:HASH_AB42_07",

"signature": "SIG_BASE64",

"timebase": {

"tau_mono_range": [1725148800, 1725235200],

"ts_range": ["2025-09-01T00:00:00Z", "2025-09-02T00:00:00Z"],

"offset/skew/J": {"offset": 0.0007, "skew": 2.0e-6, "J": 0.0025}

},

"arrival": { "two_forms": {"delta_form": 1.8e-6, "tol_Tarr": 5.0e-6} },

"window": {"Delta_t": "PT24H"},

"dataset": {"N": 98023, "sampling": "online_randomized"},

"weights": {"W_norm": 1.0002},

"metrics": {

"core": [

{"name": "lift_cr_B_vs_A", "est": 0.0124, "se": 0.0039, "ci": [0.0048, 0.0200], "unit": "1", "dim": "[]"},

{"name": "guardrail_latency_ms_p99", "est": 245.0, "unit": "ms", "dim": "[T]"}

],

"ab": {

"lift": 0.0124,

"se": 0.0039,

"ci": [0.0048, 0.0200],

"mde": 0.01,

"alpha_spent": 0.043

}

},

"contracts": [

{"id": "C30-382", "status": "pass", "severity": "info", "evidence": {"latency_ms_p99": 245}},

{"id": "C30-383", "status": "pass", "severity": "info", "evidence": {"alpha_spent": 0.043, "alpha_budget": 0.05}},

{"id": "C30-381", "status": "pass", "severity": "info", "evidence": {"p_t": 0.501, "p_c": 0.499, "eps_exp": 0.01}}

],

"actions": [

{"policy_id": "SC-AB-01", "decision": "ship", "reason": "sequential boundary crossed; guardrails pass", "at": "2025-09-02T00:00:30Z"}

],

"provenance": {

"data_uri": "kafka://topic/exp42/day=2025-09-01",

"code_uri": "git+https://repo/commit/1122aabb",

"env": {"python": "3.11.5", "pkg": {"pandas": "2.2.2", "statsmodels": "0.14.2"}}

}

}


VI. 样例B:分布漂移监测(对齐与重校准触发)

{

"schema_version": "1.0.0",

"book_ref": "EFT.WP.Methods.CrossStats v1.0",

"release_tag": "drift-week-2025W36",

"TraceID": "trc_DRIFT_W36",

"repro_hash": "sha256:HASH_DRIFT_W36",

"signature": "SIG_BASE64",

"timebase": {

"tau_mono_range": [1725148800, 1725753600],

"ts_range": ["2025-09-01T00:00:00Z", "2025-09-08T00:00:00Z"],

"offset/skew/J": {"offset": 0.0011, "skew": 2.6e-6, "J": 0.0030}

},

"arrival": { "two_forms": {"delta_form": 2.5e-6, "tol_Tarr": 5.0e-6} },

"window": {"Delta_t": "P7D"},

"dataset": {"N": 705_211},

"weights": {"W_norm": 1.0000},

"metrics": {

"core": [

{"name": "score_calibration_ece", "est": 0.024, "unit": "1", "dim": "[]"}

],

"drift": {"W1": 0.095, "KL": 0.021, "psi": 0.14}

},

"contracts": [

{"id": "C30-370", "status": "fail", "severity": "warn", "evidence": {"W1": 0.095, "W1_max": 0.08}},

{"id": "C30-373", "status": "fail", "severity": "error", "evidence": {"r_win": 3}}

],

"actions": [

{"policy_id": "SC-DRIFT-01", "decision": "align_then_recalibrate", "reason": "persistent W1 breach; psi elevated", "at": "2025-09-08T00:01:00Z"},

{"policy_id": "SC-CAL-01", "decision": "canary_10pct", "reason": "ECE_after improves >= delta_min", "at": "2025-09-09T12:00:00Z"}

],

"provenance": {

"data_uri": "s3://bucket/weekly_snap/2025W36",

"code_uri": "git+https://repo/commit/55cc66dd",

"env": {"python": "3.11.5", "pkg": {"scikit-learn": "1.5.1"}}

}

}


VII. 样例C:因果估计(双稳健与重叠校核)

{

"schema_version": "1.0.0",

"book_ref": "EFT.WP.Methods.CrossStats v1.0",

"release_tag": "causal-ATE-geoQ3",

"TraceID": "trc_CAUSAL_GEO_Q3",

"repro_hash": "sha256:HASH_CAUSAL_GEO_Q3",

"signature": "SIG_BASE64",

"timebase": {

"tau_mono_range": [1719782400, 1727568000],

"ts_range": ["2024-07-01T00:00:00Z", "2024-09-30T23:59:59Z"],

"offset/skew/J": {"offset": 0.0009, "skew": 1.8e-6, "J": 0.0021}

},

"arrival": { "two_forms": {"delta_form": 1.2e-6, "tol_Tarr": 5.0e-6} },

"window": {"Delta_t": "P92D"},

"dataset": {"N": 40211, "N_eff": 27894.5, "sampling": "observational"},

"weights": {"W_norm": 1.0007, "cap_w": 20.0, "p_trim": 0.7},

"metrics": {

"core": [

{"name": "ATE", "est": 1.84, "ci": [0.95, 2.73], "unit": "USD", "dim": "[M]"}

],

"causal": {"ATE": 1.84, "U": 0.62, "SMD_max": 0.06, "overlap_min": 0.04}

},

"contracts": [

{"id": "C30-400", "status": "pass", "severity": "info", "evidence": {"overlap_min": 0.04, "eps_ol": 0.02}},

{"id": "C30-401", "status": "pass", "severity": "info", "evidence": {"SMD_max": 0.06, "smd_max": 0.10}},

{"id": "C30-402", "status": "pass", "severity": "info", "evidence": {"ATE_IPW": 1.79, "ATE_OR": 1.88}},

{"id": "C30-350", "status": "pass", "severity": "info", "evidence": {"B": 2000}}

],

"actions": [

{"policy_id": "SC-COVER-01", "decision": "publish_readonly", "reason": "coverage met; trimmed weights applied", "at": "2024-10-01T10:00:00Z"}

],

"provenance": {

"data_uri": "warehouse://table/geo_q3",

"code_uri": "git+https://repo/commit/aa77bb88",

"env": {"python": "3.10.14", "pkg": {"econml": "0.15.0", "pymc": "5.13.1"}}

}

}


VIII. 校验与断言(自动化)


IX. 与契约库映射


X. 追溯与签名规范


XI. 版本与兼容性


小结


版权与许可(CC BY 4.0)

版权声明:除另有说明外,《能量丝理论》(含文本、图表、插图、符号与公式)的著作权由作者(“屠广林”先生)享有。
许可方式:本作品采用 Creative Commons 署名 4.0 国际许可协议(CC BY 4.0)进行许可;在注明作者与来源的前提下,允许为商业或非商业目的进行复制、转载、节选、改编与再分发。
署名格式(建议):作者:“屠广林”;作品:《能量丝理论》;来源:energyfilament.org;许可证:CC BY 4.0。

首次发布: 2025-11-11|当前版本:v5.1
协议链接:https://creativecommons.org/licenses/by/4.0/