githubnext · mrjf · Jun 9, 2026 · Jun 9, 2026
diff --git a/.crane/scripts/score.go b/.crane/scripts/score.go
@@ -60,6 +60,7 @@ type CutoverGates struct {
 	PythonReferenceRequired bool    `json:"python_reference_required"`
 	SurfaceParity           float64 `json:"surface_parity"`
 	HelpParity              float64 `json:"help_parity"`
+	OptionParity            float64 `json:"option_parity"`
 	FunctionalContracts     float64 `json:"functional_contracts"`
 	StateDiffContracts      float64 `json:"state_diff_contracts"`
 	PythonBehaviorContracts float64 `json:"python_behavior_contracts"`
@@ -96,6 +97,7 @@ type Score struct {
 	PythonReferencePresent bool            `json:"python_reference_present"`
 	SurfaceParity          float64         `json:"surface_parity"`
 	HelpParity             float64         `json:"help_parity"`
+	OptionParity           float64         `json:"option_parity"`
 	FunctionalParity       float64         `json:"functional_parity"`
 	StateDiffParity        float64         `json:"state_diff_parity"`
 	KnownExceptions        int             `json:"known_exceptions"`
@@ -146,6 +148,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 	benchmarks := RatioGate{}
 	surface := RatioGate{}
 	help := RatioGate{}
+	optionParity := RatioGate{}
 	functional := RatioGate{}
 	stateDiff := RatioGate{}
 	behaviorContracts := RatioGate{}
@@ -165,6 +168,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 				&pythonReference,
 				&surface,
 				&help,
+				&optionParity,
 				&functional,
 				&stateDiff,
 				&behaviorContracts,
@@ -191,6 +195,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 					&pythonReference,
 					&surface,
 					&help,
+					&optionParity,
 					&functional,
 					&stateDiff,
 					&behaviorContracts,
@@ -266,6 +271,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 	if !help.Seen {
 		help = missingRatioGate()
 	}
+	if !optionParity.Seen {
+		optionParity = missingRatioGate()
+	}
 	if !functional.Seen {
 		functional = missingRatioGate()
 	}
@@ -290,6 +298,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 		PythonReferenceRequired: pythonReferenceSatisfied,
 		SurfaceParity:           surface.Percent(),
 		HelpParity:              help.Percent(),
+		OptionParity:            optionParity.Percent(),
 		FunctionalContracts:     functional.Percent(),
 		StateDiffContracts:      stateDiff.Percent(),
 		PythonBehaviorContracts: behaviorContracts.Percent(),
@@ -315,6 +324,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 	cutoverReady := gates.PythonReferenceRequired &&
 		gates.SurfaceParity == 1.0 &&
 		gates.HelpParity == 1.0 &&
+		gates.OptionParity == 1.0 &&
 		gates.FunctionalContracts == 1.0 &&
 		gates.StateDiffContracts == 1.0 &&
 		gates.PythonBehaviorContracts == 1.0 &&
@@ -355,6 +365,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 		PythonReferencePresent: gates.PythonReferenceRequired,
 		SurfaceParity:          gates.SurfaceParity,
 		HelpParity:             gates.HelpParity,
+		OptionParity:           gates.OptionParity,
 		FunctionalParity:       gates.FunctionalContracts,
 		StateDiffParity:        gates.StateDiffContracts,
 		KnownExceptions:        gates.KnownExceptions,
@@ -390,6 +401,7 @@ func applyGateEvent(
 	pythonReference *BoolGate,
 	surface *RatioGate,
 	help *RatioGate,
+	optionParity *RatioGate,
 	functional *RatioGate,
 	stateDiff *RatioGate,
 	behaviorContracts *RatioGate,
@@ -407,6 +419,8 @@ func applyGateEvent(
 		*surface = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
 	case "help":
 		*help = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
+	case "option_parity":
+		*optionParity = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
 	case "functional":
 		*functional = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
 	case "state_diff":
@@ -467,6 +481,7 @@ func gateResults(gates CutoverGates) []GateResult {
 		{Name: "go_tests_pass", Passing: gates.GoTests == "pass"},
 		{Name: "surface_parity", Passing: gates.SurfaceParity == 1.0},
 		{Name: "help_parity", Passing: gates.HelpParity == 1.0},
+		{Name: "option_parity", Passing: gates.OptionParity == 1.0},
 		{Name: "functional_contracts", Passing: gates.FunctionalContracts == 1.0},
 		{Name: "state_diff_contracts", Passing: gates.StateDiffContracts == 1.0},
 		{Name: "python_behavior_contracts", Passing: gates.PythonBehaviorContracts == 1.0},

diff --git a/.github/workflows/migration-ci.yml b/.github/workflows/migration-ci.yml
@@ -94,6 +94,15 @@ jobs:
         shell: bash
         run: |
           go build -o "$RUNNER_TEMP/apm-go" ./cmd/apm
+          enforce_behavior_contracts=false
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ "${{ inputs.enforce_completion == true }}" = "true" ]; then
+            enforce_behavior_contracts=true
+          elif [ "${{ github.event_name }}" = "pull_request" ] && [[ "${{ github.event.pull_request.head.ref }}" == crane/* ]]; then
+            enforce_behavior_contracts=true
+          fi
+          if [ "$enforce_behavior_contracts" = "true" ]; then
+            export APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS=1
+          fi
           set +e
           APM_GO_BIN="$RUNNER_TEMP/apm-go" \
             uv run pytest tests/parity/test_python_behavior_contracts.py -q --tb=short \
@@ -141,11 +150,22 @@ jobs:
       - name: Compute migration score
         run: |
           go run .crane/scripts/score.go < "$RUNNER_TEMP/go-test-events.json" | tee "$RUNNER_TEMP/migration-score.json"
+          coverage_args=(
+            --inventory "$RUNNER_TEMP/python-behavior-contracts.json"
+            --coverage tests/parity/python_contract_coverage.yml
+            --summary "$RUNNER_TEMP/python-contract-coverage.md"
+          )
+          if [ "${MIGRATION_COMPLETION_ENFORCED:-false}" != "true" ]; then
+            coverage_args+=(--allow-intentionally-incomplete --allow-obsolete-python-tests)
+          fi
+          set +e
           uv run python scripts/ci/python_behavior_contracts.py check \
-            --inventory "$RUNNER_TEMP/python-behavior-contracts.json" \
-            --coverage tests/parity/python_contract_coverage.yml \
-            --allow-intentionally-incomplete \
-            --summary "$RUNNER_TEMP/python-contract-coverage.md" || true
+            "${coverage_args[@]}"
+          coverage_status=$?
+          set -e
+          if [ "${MIGRATION_COMPLETION_ENFORCED:-false}" = "true" ] && [ "$coverage_status" != "0" ]; then
+            exit "$coverage_status"
+          fi
           python - "$RUNNER_TEMP/migration-score.json" "${MIGRATION_COMPLETION_ENFORCED:-false}" <<'PY'
           import json
           import sys

diff --git a/cmd/apm/CUTOVER.md b/cmd/apm/CUTOVER.md
@@ -6,32 +6,35 @@ framework in issue #78).
 
 ## Current State
 
-**Deletion-grade ready.** All 13 completion gates pass as of iteration 77.
+**Gate hardened; deletion-grade readiness is blocked.** The previous 13-gate
+score accepted representative behavior and help-only coverage mappings. The
+completion gate now requires strict option parity, behavior-backed Python test
+conversion mappings, and real Go-only command fixtures before the Go binary can
+be declared equivalent.
 
-The Go binary (`cmd/apm`) has full functional parity with the Python CLI.
-The Python CLI remains as the reference oracle until the explicit cutover
-steps below are executed, but it is no longer required for correctness.
+The Go binary (`cmd/apm`) is still evaluated against the Python CLI. It must not
+be treated as the shipped `apm` command until the strict gates below pass.
 
-Gate summary (all passing):
+Gate summary:
 
 | Gate | Status |
 |------|--------|
-| python_reference_required | pass |
-| surface_parity | 100% (855/855) |
-| help_parity | 100% |
-| functional_contracts | 100% |
-| state_diff_contracts | 100% |
-| python_behavior_contracts | 100% |
-| golden_fixture_corpus | pass |
-| all_go_golden_tests | pass |
-| no_python_runtime_dependency | pass |
-| known_exceptions | 0 |
-| go_tests | pass (900 tests) |
-| python_tests | pass (247 tests) |
-| benchmarks | pass |
-
-The Go binary is ready to replace Python as the shipped `apm` command once
-the cutover steps below are executed.
+| python_reference_required | required |
+| surface_parity | required |
+| help_parity | required |
+| option_parity | required; every Python CLI option must appear in Go help |
+| functional_contracts | required |
+| state_diff_contracts | required |
+| python_behavior_contracts | required; no obsolete or help-only mappings |
+| golden_fixture_corpus | required |
+| all_go_golden_tests | required |
+| no_python_runtime_dependency | required |
+| known_exceptions | must be 0 |
+| go_tests | required |
+| python_tests | required, or superseded by the all-Go replay |
+| benchmarks | required |
+
+The Go binary is ready to replace Python only when all rows above pass in CI.
 
 ### Pre-Cutover Verification
 
@@ -48,9 +51,9 @@ The output must show `"migration_score": 1` and `"cutover_ready": true`.
 ## Real Criteria
 
 Every completion criterion must be backed by real command execution. The scorer
-does not infer completion from test names for `surface`, `help`, `functional`,
-`state_diff`, `python_behavior_contracts`, or `benchmarks`; each one must emit an
-explicit ratio gate.
+does not infer completion from test names for `surface`, `help`,
+`option_parity`, `functional`, `state_diff`, `python_behavior_contracts`, or
+`benchmarks`; each one must emit an explicit ratio gate.
 
 Crane must run `APM_PYTHON_BIN= go test ./cmd/apm -run TestGoCutover -json`.
 These fixture-backed tests execute the built Go `apm` binary in temporary
@@ -66,12 +69,20 @@ directly:
 {"crane":"gate","name":"no_python_runtime_dependency","passed":true}
 ```
 
+The Python-vs-Go inventory tests must also emit:
+
+```json
+{"crane":"gate","name":"option_parity","passing":N,"total":N}
+```
+
 `python_behavior_contracts` is not allowed to mean "the Python CLI was
-available." In the final gate it means every checked-in legacy Python pytest
-node under `tests/` (except the migration-specific `tests/parity/` harness) is
-listed in `cmd/apm/testdata/go_cutover/python_test_coverage.json` with one or
-more Go test names that replace it. An empty or partial manifest is a hard
-failure.
+available" or "the test was declared obsolete." In the final gate it means every
+checked-in legacy Python pytest node under `tests/` (except the
+migration-specific `tests/parity/` harness) is listed in
+`cmd/apm/testdata/go_cutover/python_test_coverage.json` with one or more
+existing real Go-only cutover behavior tests that replace it. Empty mappings,
+partial mappings, stale Go test names, `python_tests.obsolete`, Python-vs-Go
+completion tests, and help-only/surface-only mappings are hard failures.
 
 Crane must also run the migration benchmark test. It executes fixture-backed
 Python-vs-Go benchmark workloads and emits:
@@ -111,7 +122,7 @@ completion.
 The Go binary becomes the shipped `apm` command when ALL of the following
 are true:
 
-1. All 26 commands respond correctly to `--help` (done)
+1. All public Python commands and options are present in Go help output
 2. The representative command matrix passes functional tests:
    `init`, `install`, `update`, `compile`, `pack`, `run`, `audit`,
    `policy`, `mcp`, `runtime`, `targets`, `list`, `view`, `cache`,
@@ -120,9 +131,11 @@ are true:
    fixture-backed real-command scenario and emits passing `functional` and
    `state_diff` gates
 4. `TestGoCutoverPythonTestConversionCoverage` proves every legacy Python test
-   has an explicit Go replacement in the cutover coverage manifest
-5. Python-vs-Go parity tests pass for all commands in the matrix while the
-   Python reference is still available
+   has an explicit existing Go-only behavior replacement in the cutover
+   coverage manifest; help-only, surface-only, coverage-only, obsolete, stale,
+   or Python-vs-Go completion mappings do not count
+5. Python-vs-Go parity tests pass for all commands, options, and unknown-option
+   paths while the Python reference is still available
 6. Migration benchmarks pass real fixture-backed command workloads and emit a
    passing counted `benchmarks` gate
 7. The final Python-reference parity run has been frozen into a committed,