From 3aa11ef0c8b5c12908c8bc0d4b63706842c16dbc Mon Sep 17 00:00:00 2001
From: mrjf <mrjf@github.com>
Date: Tue, 9 Jun 2026 14:05:04 -0700
Subject: [PATCH] harden Go migration completion gates

---
 .crane/scripts/score.go                       |  15 +++
 .github/workflows/migration-ci.yml            |  28 ++++-
 cmd/apm/CUTOVER.md                            |  79 +++++++------
 cmd/apm/go_cutover_coverage_test.go           |  90 ++++++++++++++-
 cmd/apm/python_behavior_contracts_test.go     |  76 ++++++++++---
 cmd/apm/real_behavior_test.go                 | 107 ++++++++++++++++++
 .../go_cutover/python_test_coverage.json      |   4 +-
 scripts/ci/python_behavior_contracts.py       |  27 ++++-
 tests/parity/README.md                        |  10 +-
 tests/parity/python_contract_coverage.yml     |   6 +-
 .../parity/test_python_behavior_contracts.py  |  55 ++++++++-
 tests/unit/test_crane_score.py                |   4 +
 tests/unit/test_migration_ci_workflow.py      |   2 +
 13 files changed, 437 insertions(+), 66 deletions(-)

diff --git a/.crane/scripts/score.go b/.crane/scripts/score.go
index e6e22b14..4c1fbacc 100644
--- a/.crane/scripts/score.go
+++ b/.crane/scripts/score.go
@@ -60,6 +60,7 @@ type CutoverGates struct {
 	PythonReferenceRequired bool    `json:"python_reference_required"`
 	SurfaceParity           float64 `json:"surface_parity"`
 	HelpParity              float64 `json:"help_parity"`
+	OptionParity            float64 `json:"option_parity"`
 	FunctionalContracts     float64 `json:"functional_contracts"`
 	StateDiffContracts      float64 `json:"state_diff_contracts"`
 	PythonBehaviorContracts float64 `json:"python_behavior_contracts"`
@@ -96,6 +97,7 @@ type Score struct {
 	PythonReferencePresent bool            `json:"python_reference_present"`
 	SurfaceParity          float64         `json:"surface_parity"`
 	HelpParity             float64         `json:"help_parity"`
+	OptionParity           float64         `json:"option_parity"`
 	FunctionalParity       float64         `json:"functional_parity"`
 	StateDiffParity        float64         `json:"state_diff_parity"`
 	KnownExceptions        int             `json:"known_exceptions"`
@@ -146,6 +148,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 	benchmarks := RatioGate{}
 	surface := RatioGate{}
 	help := RatioGate{}
+	optionParity := RatioGate{}
 	functional := RatioGate{}
 	stateDiff := RatioGate{}
 	behaviorContracts := RatioGate{}
@@ -165,6 +168,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 				&pythonReference,
 				&surface,
 				&help,
+				&optionParity,
 				&functional,
 				&stateDiff,
 				&behaviorContracts,
@@ -191,6 +195,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 					&pythonReference,
 					&surface,
 					&help,
+					&optionParity,
 					&functional,
 					&stateDiff,
 					&behaviorContracts,
@@ -266,6 +271,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 	if !help.Seen {
 		help = missingRatioGate()
 	}
+	if !optionParity.Seen {
+		optionParity = missingRatioGate()
+	}
 	if !functional.Seen {
 		functional = missingRatioGate()
 	}
@@ -290,6 +298,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 		PythonReferenceRequired: pythonReferenceSatisfied,
 		SurfaceParity:           surface.Percent(),
 		HelpParity:              help.Percent(),
+		OptionParity:            optionParity.Percent(),
 		FunctionalContracts:     functional.Percent(),
 		StateDiffContracts:      stateDiff.Percent(),
 		PythonBehaviorContracts: behaviorContracts.Percent(),
@@ -315,6 +324,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 	cutoverReady := gates.PythonReferenceRequired &&
 		gates.SurfaceParity == 1.0 &&
 		gates.HelpParity == 1.0 &&
+		gates.OptionParity == 1.0 &&
 		gates.FunctionalContracts == 1.0 &&
 		gates.StateDiffContracts == 1.0 &&
 		gates.PythonBehaviorContracts == 1.0 &&
@@ -355,6 +365,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) {
 		PythonReferencePresent: gates.PythonReferenceRequired,
 		SurfaceParity:          gates.SurfaceParity,
 		HelpParity:             gates.HelpParity,
+		OptionParity:           gates.OptionParity,
 		FunctionalParity:       gates.FunctionalContracts,
 		StateDiffParity:        gates.StateDiffContracts,
 		KnownExceptions:        gates.KnownExceptions,
@@ -390,6 +401,7 @@ func applyGateEvent(
 	pythonReference *BoolGate,
 	surface *RatioGate,
 	help *RatioGate,
+	optionParity *RatioGate,
 	functional *RatioGate,
 	stateDiff *RatioGate,
 	behaviorContracts *RatioGate,
@@ -407,6 +419,8 @@ func applyGateEvent(
 		*surface = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
 	case "help":
 		*help = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
+	case "option_parity":
+		*optionParity = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
 	case "functional":
 		*functional = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total}
 	case "state_diff":
@@ -467,6 +481,7 @@ func gateResults(gates CutoverGates) []GateResult {
 		{Name: "go_tests_pass", Passing: gates.GoTests == "pass"},
 		{Name: "surface_parity", Passing: gates.SurfaceParity == 1.0},
 		{Name: "help_parity", Passing: gates.HelpParity == 1.0},
+		{Name: "option_parity", Passing: gates.OptionParity == 1.0},
 		{Name: "functional_contracts", Passing: gates.FunctionalContracts == 1.0},
 		{Name: "state_diff_contracts", Passing: gates.StateDiffContracts == 1.0},
 		{Name: "python_behavior_contracts", Passing: gates.PythonBehaviorContracts == 1.0},
diff --git a/.github/workflows/migration-ci.yml b/.github/workflows/migration-ci.yml
index 9041e6d9..d5958e05 100644
--- a/.github/workflows/migration-ci.yml
+++ b/.github/workflows/migration-ci.yml
@@ -94,6 +94,15 @@ jobs:
         shell: bash
         run: |
           go build -o "$RUNNER_TEMP/apm-go" ./cmd/apm
+          enforce_behavior_contracts=false
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ "${{ inputs.enforce_completion == true }}" = "true" ]; then
+            enforce_behavior_contracts=true
+          elif [ "${{ github.event_name }}" = "pull_request" ] && [[ "${{ github.event.pull_request.head.ref }}" == crane/* ]]; then
+            enforce_behavior_contracts=true
+          fi
+          if [ "$enforce_behavior_contracts" = "true" ]; then
+            export APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS=1
+          fi
           set +e
           APM_GO_BIN="$RUNNER_TEMP/apm-go" \
             uv run pytest tests/parity/test_python_behavior_contracts.py -q --tb=short \
@@ -141,11 +150,22 @@ jobs:
       - name: Compute migration score
         run: |
           go run .crane/scripts/score.go < "$RUNNER_TEMP/go-test-events.json" | tee "$RUNNER_TEMP/migration-score.json"
+          coverage_args=(
+            --inventory "$RUNNER_TEMP/python-behavior-contracts.json"
+            --coverage tests/parity/python_contract_coverage.yml
+            --summary "$RUNNER_TEMP/python-contract-coverage.md"
+          )
+          if [ "${MIGRATION_COMPLETION_ENFORCED:-false}" != "true" ]; then
+            coverage_args+=(--allow-intentionally-incomplete --allow-obsolete-python-tests)
+          fi
+          set +e
           uv run python scripts/ci/python_behavior_contracts.py check \
-            --inventory "$RUNNER_TEMP/python-behavior-contracts.json" \
-            --coverage tests/parity/python_contract_coverage.yml \
-            --allow-intentionally-incomplete \
-            --summary "$RUNNER_TEMP/python-contract-coverage.md" || true
+            "${coverage_args[@]}"
+          coverage_status=$?
+          set -e
+          if [ "${MIGRATION_COMPLETION_ENFORCED:-false}" = "true" ] && [ "$coverage_status" != "0" ]; then
+            exit "$coverage_status"
+          fi
           python - "$RUNNER_TEMP/migration-score.json" "${MIGRATION_COMPLETION_ENFORCED:-false}" <<'PY'
           import json
           import sys
diff --git a/cmd/apm/CUTOVER.md b/cmd/apm/CUTOVER.md
index b5744eeb..aa772e66 100644
--- a/cmd/apm/CUTOVER.md
+++ b/cmd/apm/CUTOVER.md
@@ -6,32 +6,35 @@ framework in issue #78).
 
 ## Current State
 
-**Deletion-grade ready.** All 13 completion gates pass as of iteration 77.
+**Gate hardened; deletion-grade readiness is blocked.** The previous 13-gate
+score accepted representative behavior and help-only coverage mappings. The
+completion gate now requires strict option parity, behavior-backed Python test
+conversion mappings, and real Go-only command fixtures before the Go binary can
+be declared equivalent.
 
-The Go binary (`cmd/apm`) has full functional parity with the Python CLI.
-The Python CLI remains as the reference oracle until the explicit cutover
-steps below are executed, but it is no longer required for correctness.
+The Go binary (`cmd/apm`) is still evaluated against the Python CLI. It must not
+be treated as the shipped `apm` command until the strict gates below pass.
 
-Gate summary (all passing):
+Gate summary:
 
 | Gate | Status |
 |------|--------|
-| python_reference_required | pass |
-| surface_parity | 100% (855/855) |
-| help_parity | 100% |
-| functional_contracts | 100% |
-| state_diff_contracts | 100% |
-| python_behavior_contracts | 100% |
-| golden_fixture_corpus | pass |
-| all_go_golden_tests | pass |
-| no_python_runtime_dependency | pass |
-| known_exceptions | 0 |
-| go_tests | pass (900 tests) |
-| python_tests | pass (247 tests) |
-| benchmarks | pass |
-
-The Go binary is ready to replace Python as the shipped `apm` command once
-the cutover steps below are executed.
+| python_reference_required | required |
+| surface_parity | required |
+| help_parity | required |
+| option_parity | required; every Python CLI option must appear in Go help |
+| functional_contracts | required |
+| state_diff_contracts | required |
+| python_behavior_contracts | required; no obsolete or help-only mappings |
+| golden_fixture_corpus | required |
+| all_go_golden_tests | required |
+| no_python_runtime_dependency | required |
+| known_exceptions | must be 0 |
+| go_tests | required |
+| python_tests | required, or superseded by the all-Go replay |
+| benchmarks | required |
+
+The Go binary is ready to replace Python only when all rows above pass in CI.
 
 ### Pre-Cutover Verification
 
@@ -48,9 +51,9 @@ The output must show `"migration_score": 1` and `"cutover_ready": true`.
 ## Real Criteria
 
 Every completion criterion must be backed by real command execution. The scorer
-does not infer completion from test names for `surface`, `help`, `functional`,
-`state_diff`, `python_behavior_contracts`, or `benchmarks`; each one must emit an
-explicit ratio gate.
+does not infer completion from test names for `surface`, `help`,
+`option_parity`, `functional`, `state_diff`, `python_behavior_contracts`, or
+`benchmarks`; each one must emit an explicit ratio gate.
 
 Crane must run `APM_PYTHON_BIN= go test ./cmd/apm -run TestGoCutover -json`.
 These fixture-backed tests execute the built Go `apm` binary in temporary
@@ -66,12 +69,20 @@ directly:
 {"crane":"gate","name":"no_python_runtime_dependency","passed":true}
 ```
 
+The Python-vs-Go inventory tests must also emit:
+
+```json
+{"crane":"gate","name":"option_parity","passing":N,"total":N}
+```
+
 `python_behavior_contracts` is not allowed to mean "the Python CLI was
-available." In the final gate it means every checked-in legacy Python pytest
-node under `tests/` (except the migration-specific `tests/parity/` harness) is
-listed in `cmd/apm/testdata/go_cutover/python_test_coverage.json` with one or
-more Go test names that replace it. An empty or partial manifest is a hard
-failure.
+available" or "the test was declared obsolete." In the final gate it means every
+checked-in legacy Python pytest node under `tests/` (except the
+migration-specific `tests/parity/` harness) is listed in
+`cmd/apm/testdata/go_cutover/python_test_coverage.json` with one or more
+existing real Go-only cutover behavior tests that replace it. Empty mappings,
+partial mappings, stale Go test names, `python_tests.obsolete`, Python-vs-Go
+completion tests, and help-only/surface-only mappings are hard failures.
 
 Crane must also run the migration benchmark test. It executes fixture-backed
 Python-vs-Go benchmark workloads and emits:
@@ -111,7 +122,7 @@ completion.
 The Go binary becomes the shipped `apm` command when ALL of the following
 are true:
 
-1. All 26 commands respond correctly to `--help` (done)
+1. All public Python commands and options are present in Go help output
 2. The representative command matrix passes functional tests:
    `init`, `install`, `update`, `compile`, `pack`, `run`, `audit`,
    `policy`, `mcp`, `runtime`, `targets`, `list`, `view`, `cache`,
@@ -120,9 +131,11 @@ are true:
    fixture-backed real-command scenario and emits passing `functional` and
    `state_diff` gates
 4. `TestGoCutoverPythonTestConversionCoverage` proves every legacy Python test
-   has an explicit Go replacement in the cutover coverage manifest
-5. Python-vs-Go parity tests pass for all commands in the matrix while the
-   Python reference is still available
+   has an explicit existing Go-only behavior replacement in the cutover
+   coverage manifest; help-only, surface-only, coverage-only, obsolete, stale,
+   or Python-vs-Go completion mappings do not count
+5. Python-vs-Go parity tests pass for all commands, options, and unknown-option
+   paths while the Python reference is still available
 6. Migration benchmarks pass real fixture-backed command workloads and emit a
    passing counted `benchmarks` gate
 7. The final Python-reference parity run has been frozen into a committed,
diff --git a/cmd/apm/go_cutover_coverage_test.go b/cmd/apm/go_cutover_coverage_test.go
index baeec7ef..5ee56144 100644
--- a/cmd/apm/go_cutover_coverage_test.go
+++ b/cmd/apm/go_cutover_coverage_test.go
@@ -26,27 +26,40 @@ type pythonClassContext struct {
 var (
 	pythonClassRE = regexp.MustCompile(`^class\s+(Test[A-Za-z0-9_]*)\b`)
 	pythonTestRE  = regexp.MustCompile(`^(?:async\s+)?def\s+(test_[A-Za-z0-9_]*)\b`)
+	goTestFuncRE  = regexp.MustCompile(`^func\s+(Test[A-Za-z0-9_]*)\s*\(`)
 )
 
 func TestGoCutoverPythonTestConversionCoverage(t *testing.T) {
 	root := completionModuleRoot(t)
 	pythonTests := discoverPythonTestsForCutover(t, root)
+	goTests := discoverGoTestsForCutover(t, root)
 	coverage := loadGoCutoverPythonTestCoverage(t, root)
 
-	converted := 0
+	behaviorBacked := 0
 	var missing []string
+	var unknown []string
+	var weak []string
 	for _, id := range pythonTests {
 		tests := coverage.ConvertedPythonTests[id]
 		if len(tests) == 0 {
 			missing = append(missing, id)
 			continue
 		}
-		converted++
+		for _, testName := range tests {
+			if _, ok := goTests[testName]; !ok {
+				unknown = append(unknown, fmt.Sprintf("%s -> %s", id, testName))
+			}
+		}
+		if !hasBehaviorBackedGoTest(tests, goTests) {
+			weak = append(weak, fmt.Sprintf("%s -> %s", id, strings.Join(tests, ", ")))
+			continue
+		}
+		behaviorBacked++
 	}
 
-	defer emitCraneRatioGate("python_behavior_contracts", converted, len(pythonTests))
-	defer emitCraneBoolGate("golden_fixture_corpus", converted == len(pythonTests) && len(pythonTests) > 0)
-	defer emitCraneBoolGate("all_go_golden_tests", converted == len(pythonTests) && len(pythonTests) > 0)
+	defer emitCraneRatioGate("python_behavior_contracts", behaviorBacked, len(pythonTests))
+	defer emitCraneBoolGate("golden_fixture_corpus", behaviorBacked == len(pythonTests) && len(pythonTests) > 0)
+	defer emitCraneBoolGate("all_go_golden_tests", behaviorBacked == len(pythonTests) && len(pythonTests) > 0)
 
 	if len(pythonTests) == 0 {
 		t.Fatal("no Python tests discovered under tests/; coverage gate cannot prove conversion")
@@ -57,12 +70,28 @@ func TestGoCutoverPythonTestConversionCoverage(t *testing.T) {
 	if len(missing) > 0 {
 		t.Fatalf(
 			"Go cutover coverage incomplete: %d/%d Python tests mapped to Go tests; %d missing.\nFirst missing tests:\n%s",
-			converted,
+			behaviorBacked,
 			len(pythonTests),
 			len(missing),
 			formatCutoverMissing(missing, 80),
 		)
 	}
+	if len(unknown) > 0 {
+		sort.Strings(unknown)
+		t.Fatalf(
+			"Go cutover coverage references Go tests that do not exist: %d stale mappings.\nFirst stale mappings:\n%s",
+			len(unknown),
+			formatCutoverMissing(unknown, 80),
+		)
+	}
+	if len(weak) > 0 {
+		t.Fatalf(
+			"Go cutover coverage is not behavior-backed: %d/%d Python tests do not map to a real Go-only cutover behavior test.\nFirst weak mappings:\n%s",
+			len(weak),
+			len(pythonTests),
+			formatCutoverMissing(weak, 80),
+		)
+	}
 }
 
 func TestGoCutoverNoPythonRuntimeDependency(t *testing.T) {
@@ -166,6 +195,42 @@ func scanPythonTestFile(t *testing.T, root, path string) ([]string, error) {
 	return ids, nil
 }
 
+func discoverGoTestsForCutover(t *testing.T, root string) map[string]struct{} {
+	t.Helper()
+	tests := map[string]struct{}{}
+	err := filepath.WalkDir(filepath.Join(root, "cmd", "apm"), func(path string, entry os.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+		if entry.IsDir() {
+			return nil
+		}
+		if !strings.HasSuffix(entry.Name(), "_test.go") {
+			return nil
+		}
+		file, openErr := os.Open(path)
+		if openErr != nil {
+			return openErr
+		}
+		defer file.Close()
+		scanner := bufio.NewScanner(file)
+		for scanner.Scan() {
+			match := goTestFuncRE.FindStringSubmatch(strings.TrimSpace(scanner.Text()))
+			if match != nil {
+				tests[match[1]] = struct{}{}
+			}
+		}
+		return scanner.Err()
+	})
+	if err != nil {
+		t.Fatalf("discover Go tests: %v", err)
+	}
+	if len(tests) == 0 {
+		t.Fatal("no Go tests discovered under cmd/apm; coverage gate cannot prove conversion")
+	}
+	return tests
+}
+
 func loadGoCutoverPythonTestCoverage(t *testing.T, root string) goCutoverPythonTestCoverage {
 	t.Helper()
 	path := filepath.Join(root, "cmd", "apm", "testdata", "go_cutover", "python_test_coverage.json")
@@ -197,6 +262,19 @@ func formatCutoverMissing(missing []string, limit int) string {
 	return strings.Join(lines, "\n")
 }
 
+func hasBehaviorBackedGoTest(names []string, existing map[string]struct{}) bool {
+	for _, name := range names {
+		if _, ok := existing[name]; ok && isBehaviorBackedGoTest(name) {
+			return true
+		}
+	}
+	return false
+}
+
+func isBehaviorBackedGoTest(name string) bool {
+	return strings.HasPrefix(name, "TestGoCutoverReal")
+}
+
 func leadingWhitespaceWidth(line string) int {
 	width := 0
 	for _, r := range line {
diff --git a/cmd/apm/python_behavior_contracts_test.go b/cmd/apm/python_behavior_contracts_test.go
index 38303711..5530ee76 100644
--- a/cmd/apm/python_behavior_contracts_test.go
+++ b/cmd/apm/python_behavior_contracts_test.go
@@ -97,6 +97,23 @@ func contractHelpArgs(command pythonCommandContract) []string {
 	return args
 }
 
+func pythonCommandOptionNames(command pythonCommandContract) []string {
+	var options []string
+	for _, param := range command.Params {
+		if param.Type != "Option" {
+			continue
+		}
+		opts := append([]string{}, param.Opts...)
+		opts = append(opts, param.SecondaryOpts...)
+		for _, opt := range opts {
+			if opt != "" {
+				options = append(options, opt)
+			}
+		}
+	}
+	return options
+}
+
 func normalizeContractHelp(text string) string {
 	var lines []string
 	for _, line := range strings.Split(text, "\n") {
@@ -144,35 +161,68 @@ func TestParityPythonOptionsFromSource(t *testing.T) {
 		return
 	}
 	inv := loadPythonBehaviorInventory(t, false)
+	totalOptions := 0
+	missingOptions := 0
+	var missingDetails []string
+	defer func() {
+		passing := totalOptions - missingOptions
+		if totalOptions == 0 {
+			emitCraneRatioGate("option_parity", 0, 1)
+			return
+		}
+		emitCraneRatioGate("option_parity", passing, totalOptions)
+	}()
+
 	for _, command := range inv.Commands {
 		command := command
 		if command.Hidden {
 			continue
 		}
 		t.Run(command.ID, func(t *testing.T) {
+			commandOptions := pythonCommandOptionNames(command)
+			totalOptions += len(commandOptions)
 			goOut, goErr, goCode := runGo(t, contractHelpArgs(command)...)
 			if goCode != 0 {
+				missingOptions += len(commandOptions)
+				for _, opt := range commandOptions {
+					missingDetails = append(missingDetails, fmt.Sprintf("%s missing %s", command.ID, opt))
+				}
 				t.Fatalf("Go help for %s exited %d\nstdout:\n%s\nstderr:\n%s",
 					command.ID, goCode, goOut, goErr)
 			}
 			help := normalizeContractHelp(goOut + goErr)
-			for _, param := range command.Params {
-				if param.Type != "Option" {
-					continue
-				}
-				opts := append([]string{}, param.Opts...)
-				opts = append(opts, param.SecondaryOpts...)
-				for _, opt := range opts {
-					if opt == "" {
-						continue
-					}
-					if !strings.Contains(help, opt) {
-						t.Logf("TRACKING: %s help missing Python option %s (migration in progress)", command.ID, opt)
-					}
+			var commandMissing []string
+			for _, opt := range commandOptions {
+				if !strings.Contains(help, opt) {
+					missingOptions++
+					detail := fmt.Sprintf("%s missing %s", command.ID, opt)
+					commandMissing = append(commandMissing, detail)
+					missingDetails = append(missingDetails, detail)
 				}
 			}
+			if len(commandMissing) == 0 {
+				return
+			}
+			message := "Python option parity incomplete:\n" + formatCutoverMissing(commandMissing, 30)
+			if completionGatesEnforced() {
+				t.Error(message)
+			} else {
+				t.Logf("TRACKING: %s", message)
+			}
 		})
 	}
+	if totalOptions == 0 {
+		completionGateFailure(t, "HARD-GATE FAILED: Python inventory exposed no options; option parity cannot be verified")
+		return
+	}
+	if completionGatesEnforced() && missingOptions > 0 {
+		t.Fatalf(
+			"HARD-GATE FAILED: Go help is missing %d/%d Python CLI options.\nFirst missing options:\n%s",
+			missingOptions,
+			totalOptions,
+			formatCutoverMissing(missingDetails, 80),
+		)
+	}
 }
 
 func TestParityCompletionPythonBehaviorContracts(t *testing.T) {
diff --git a/cmd/apm/real_behavior_test.go b/cmd/apm/real_behavior_test.go
index cd825a09..f117d6cf 100644
--- a/cmd/apm/real_behavior_test.go
+++ b/cmd/apm/real_behavior_test.go
@@ -176,6 +176,71 @@ func TestGoCutoverRealFunctionalAndStateDiffContracts(t *testing.T) {
 				return ok
 			},
 		},
+		{
+			name:  "config get reads persisted configuration value",
+			args:  []string{"config", "get", "auto-integrate"},
+			env:   map[string]string{"APM_CONFIG_PATH": "apm-config.yml"},
+			setup: realBehaviorSetupConfigValue,
+			verify: func(t *testing.T, dir, stdout, stderr string, code int) bool {
+				ok := realBehaviorExpectExit(t, stdout, stderr, code, 0)
+				ok = realBehaviorExpectOutputContains(t, stdout+stderr, "false") && ok
+				ok = realBehaviorExpectFileContains(t, filepath.Join(dir, "apm-config.yml"), "auto-integrate") && ok
+				return ok
+			},
+		},
+		{
+			name:  "config unset removes persisted configuration value",
+			args:  []string{"config", "unset", "auto-integrate"},
+			env:   map[string]string{"APM_CONFIG_PATH": "apm-config.yml"},
+			setup: realBehaviorSetupConfigValue,
+			verify: func(t *testing.T, dir, stdout, stderr string, code int) bool {
+				ok := realBehaviorExpectExit(t, stdout, stderr, code, 0)
+				ok = realBehaviorExpectFileNotContains(t, filepath.Join(dir, "apm-config.yml"), "auto-integrate") && ok
+				return ok
+			},
+		},
+		{
+			name:  "mcp list reads manifest MCP dependencies",
+			args:  []string{"mcp", "list"},
+			setup: realBehaviorSetupMCPProject,
+			verify: func(t *testing.T, _ string, stdout, stderr string, code int) bool {
+				ok := realBehaviorExpectExit(t, stdout, stderr, code, 0)
+				ok = realBehaviorExpectOutputContains(t, stdout+stderr, "example-server") && ok
+				return ok
+			},
+		},
+		{
+			name:  "marketplace remove deletes registered marketplace",
+			args:  []string{"marketplace", "remove", "local"},
+			setup: realBehaviorSetupMarketplaceProject,
+			verify: func(t *testing.T, dir, stdout, stderr string, code int) bool {
+				ok := realBehaviorExpectExit(t, stdout, stderr, code, 0)
+				ok = realBehaviorExpectFileNotContains(t, filepath.Join(dir, "apm.yml"), "file://./marketplace.json") && ok
+				return ok
+			},
+		},
+		{
+			name: "marketplace validate rejects missing registered marketplace",
+			args: []string{"marketplace", "validate", "missing"},
+			verify: func(t *testing.T, _ string, stdout, stderr string, code int) bool {
+				if code == 0 {
+					realBehaviorFailure(t, "expected non-zero exit for missing marketplace validation\nstdout: %s\nstderr: %s", stdout, stderr)
+					return false
+				}
+				return true
+			},
+		},
+		{
+			name:  "runtime remove deletes persisted runtime config",
+			args:  []string{"runtime", "remove", "codex"},
+			env:   map[string]string{"APM_CONFIG_PATH": "apm-config.yml"},
+			setup: realBehaviorSetupRuntimeConfig,
+			verify: func(t *testing.T, dir, stdout, stderr string, code int) bool {
+				ok := realBehaviorExpectExit(t, stdout, stderr, code, 0)
+				ok = realBehaviorExpectFileNotContains(t, filepath.Join(dir, "apm-config.yml"), "codex") && ok
+				return ok
+			},
+		},
 		{
 			name:  "cache clean removes entries but preserves cache root",
 			args:  []string{"cache", "clean"},
@@ -437,6 +502,39 @@ policy:
 `)
 }
 
+func realBehaviorSetupConfigValue(t *testing.T, dir string) {
+	t.Helper()
+	realBehaviorWriteFile(t, filepath.Join(dir, "apm-config.yml"), "auto-integrate: false\n")
+}
+
+func realBehaviorSetupMCPProject(t *testing.T, dir string) {
+	t.Helper()
+	realBehaviorWriteFile(t, filepath.Join(dir, "apm.yml"), `name: mcp-fixture
+version: 1.0.0
+dependencies:
+  apm: []
+  mcp:
+    - example-server
+`)
+}
+
+func realBehaviorSetupMarketplaceProject(t *testing.T, dir string) {
+	t.Helper()
+	realBehaviorWriteFile(t, filepath.Join(dir, "apm.yml"), `name: marketplace-fixture
+version: 1.0.0
+dependencies:
+  apm: []
+  mcp: []
+marketplace:
+  local: file://./marketplace.json
+`)
+}
+
+func realBehaviorSetupRuntimeConfig(t *testing.T, dir string) {
+	t.Helper()
+	realBehaviorWriteFile(t, filepath.Join(dir, "apm-config.yml"), "runtime: codex\n")
+}
+
 func realBehaviorSetupCacheRoot(t *testing.T, dir string) {
 	t.Helper()
 	realBehaviorWriteFile(t, filepath.Join(dir, "cache-root", "http_v1", "old", "body"), "cached\n")
@@ -503,6 +601,15 @@ func realBehaviorExpectFileNotContains(t *testing.T, path, needle string) bool {
 	return true
 }
 
+func realBehaviorExpectOutputContains(t *testing.T, output, needle string) bool {
+	t.Helper()
+	if !strings.Contains(output, needle) {
+		realBehaviorFailure(t, "expected command output to contain %q, got:\n%s", needle, output)
+		return false
+	}
+	return true
+}
+
 func realBehaviorExpectPathExists(t *testing.T, path string) bool {
 	t.Helper()
 	if _, err := os.Stat(path); err != nil {
diff --git a/cmd/apm/testdata/go_cutover/python_test_coverage.json b/cmd/apm/testdata/go_cutover/python_test_coverage.json
index 25e3b5fa..6bc4d9d6 100644
--- a/cmd/apm/testdata/go_cutover/python_test_coverage.json
+++ b/cmd/apm/testdata/go_cutover/python_test_coverage.json
@@ -73518,6 +73518,6 @@
       "TestGoCutoverRealFunctionalAndStateDiffContracts"
     ]
   },
-  "description": "Go cutover coverage manifest. Every legacy Python pytest node under tests/ (except tests/parity/) must appear here with one or more Go test names before the Go CLI can be declared a 100% migration.",
+  "description": "Go cutover coverage manifest. Every legacy Python pytest node under tests/ (except tests/parity/) must appear here with one or more existing real Go-only cutover behavior tests before the Go CLI can be declared a 100% migration; help-only, surface-only, coverage-only, Python-vs-Go completion, stale-name, and obsolete mappings do not count.",
   "schema_version": 1
-}
\ No newline at end of file
+}
diff --git a/scripts/ci/python_behavior_contracts.py b/scripts/ci/python_behavior_contracts.py
index 56cd982e..91b64ed1 100644
--- a/scripts/ci/python_behavior_contracts.py
+++ b/scripts/ci/python_behavior_contracts.py
@@ -299,7 +299,12 @@ def _has_tests(entry: dict[str, Any], key: str) -> bool:
     )
 
 
-def check_coverage(inventory: dict[str, Any], coverage: dict[str, Any]) -> list[Finding]:
+def check_coverage(
+    inventory: dict[str, Any],
+    coverage: dict[str, Any],
+    *,
+    allow_obsolete: bool = False,
+) -> list[Finding]:
     findings: list[Finding] = []
     command_coverage = coverage.get("commands") or {}
     if not isinstance(command_coverage, dict):
@@ -333,6 +338,15 @@ def check_coverage(inventory: dict[str, Any], coverage: dict[str, Any]) -> list[
     for test in inventory["tests"]:
         test_id = test["id"]
         if test_id in obsolete_tests:
+            if allow_obsolete:
+                continue
+            findings.append(
+                Finding(
+                    "obsolete-python-test-coverage",
+                    "Python test is marked obsolete instead of mapped to Go or CLI-agnostic tests",
+                    test_id,
+                )
+            )
             continue
         entry = test_coverage.get(test_id)
         if not isinstance(entry, dict):
@@ -398,7 +412,11 @@ def cmd_extract(args: argparse.Namespace) -> int:
 def cmd_check(args: argparse.Namespace) -> int:
     inventory = _load_inventory(Path(args.inventory) if args.inventory else None)
     coverage = _load_coverage(Path(args.coverage))
-    findings = check_coverage(inventory, coverage)
+    findings = check_coverage(
+        inventory,
+        coverage,
+        allow_obsolete=args.allow_obsolete_python_tests,
+    )
     summary = render_summary(inventory, findings)
     if args.summary:
         Path(args.summary).write_text(summary, encoding="utf-8")
@@ -439,6 +457,11 @@ def main(argv: list[str] | None = None) -> int:
         action="store_true",
         help="report findings without failing when the manifest is marked incomplete",
     )
+    check.add_argument(
+        "--allow-obsolete-python-tests",
+        action="store_true",
+        help="report-only mode: allow python_tests.obsolete entries instead of requiring conversion mappings",
+    )
     check.set_defaults(func=cmd_check)
 
     args = parser.parse_args(argv)
diff --git a/tests/parity/README.md b/tests/parity/README.md
index 5ac78fb9..525e9dcd 100644
--- a/tests/parity/README.md
+++ b/tests/parity/README.md
@@ -16,7 +16,13 @@ contracts to parity evidence. The completion scorer must not reach
 `migration_score = 1.0` while any extracted command or Python test lacks mapped
 coverage.
 
+Completion coverage must be behavior-backed. `python_tests.obsolete` is allowed
+only in report-only summaries, and a Python test mapped only to help, surface,
+Python-vs-Go completion, stale-name, or coverage bookkeeping tests does not
+count as converted.
+
 `status: intentionally-incomplete` is a progress marker only. It must make
 completion scoring fail; use `--allow-intentionally-incomplete` only for
-report-only summaries. Set `APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS=1` when a
-local or CI check should hard-fail instead of reporting incomplete progress.
+report-only summaries. Use `--allow-obsolete-python-tests` only for the same
+report-only mode. Set `APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS=1` when a local or
+CI check should hard-fail instead of reporting incomplete progress.
diff --git a/tests/parity/python_contract_coverage.yml b/tests/parity/python_contract_coverage.yml
index c9aed491..d610f6ce 100644
--- a/tests/parity/python_contract_coverage.yml
+++ b/tests/parity/python_contract_coverage.yml
@@ -1,7 +1,7 @@
 schema_version: 1
-description: Coverage manifest for the Python-to-Go CLI migration. All Python unit and integration tests are listed as obsolete
-  because they are reference implementation tests, not parity evidence. Parity evidence is provided by Go and CLI-agnostic
-  contract tests mapped in the commands section.
+description: Coverage manifest for the Python-to-Go CLI migration. Completion mode requires every Python test to be mapped
+  to behavior-backed Go or CLI-agnostic parity evidence. python_tests.obsolete is report-only debt and does not count toward
+  deletion-grade readiness.
 commands:
   apm:
     go_tests:
diff --git a/tests/parity/test_python_behavior_contracts.py b/tests/parity/test_python_behavior_contracts.py
index 4bb5f696..01cd083b 100644
--- a/tests/parity/test_python_behavior_contracts.py
+++ b/tests/parity/test_python_behavior_contracts.py
@@ -161,8 +161,9 @@ def test_every_python_command_rejects_unknown_option_consistently(
 
 def test_python_contract_coverage_manifest_is_complete(inventory: dict[str, object]) -> None:
     coverage = _load_coverage(ROOT / "tests" / "parity" / "python_contract_coverage.yml")
+    enforce = os.environ.get("APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS") == "1"
     if coverage.get("status") == "intentionally-incomplete":
-        if os.environ.get("APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS") != "1":
+        if not enforce:
             pytest.xfail(
                 "Coverage manifest is intentionally incomplete; completion gate "
                 "is reported by migration_score"
@@ -172,4 +173,56 @@ def test_python_contract_coverage_manifest_is_complete(inventory: dict[str, obje
             "only after all contracts are mapped"
         )
     findings = check_coverage(inventory, coverage)
+    if findings and not enforce:
+        pytest.xfail(render_summary(inventory, findings))
     assert not findings, render_summary(inventory, findings)
+
+
+def test_python_contract_coverage_rejects_obsolete_tests_by_default() -> None:
+    inventory = {
+        "summary": {
+            "commands": 0,
+            "public_commands": 0,
+            "python_tests": 1,
+            "python_test_cases": 1,
+            "source_contracts": 0,
+        },
+        "commands": [],
+        "tests": [{"id": "tests/unit/test_example.py::test_real_behavior"}],
+        "source_contracts": [],
+    }
+    coverage = {
+        "commands": {},
+        "python_tests": {
+            "covered": {},
+            "obsolete": ["tests/unit/test_example.py::test_real_behavior"],
+        },
+    }
+
+    findings = check_coverage(inventory, coverage)
+
+    assert [finding.code for finding in findings] == ["obsolete-python-test-coverage"]
+
+
+def test_python_contract_coverage_can_allow_obsolete_tests_in_report_only_mode() -> None:
+    inventory = {
+        "summary": {
+            "commands": 0,
+            "public_commands": 0,
+            "python_tests": 1,
+            "python_test_cases": 1,
+            "source_contracts": 0,
+        },
+        "commands": [],
+        "tests": [{"id": "tests/unit/test_example.py::test_real_behavior"}],
+        "source_contracts": [],
+    }
+    coverage = {
+        "commands": {},
+        "python_tests": {
+            "covered": {},
+            "obsolete": ["tests/unit/test_example.py::test_real_behavior"],
+        },
+    }
+
+    assert check_coverage(inventory, coverage, allow_obsolete=True) == []
diff --git a/tests/unit/test_crane_score.py b/tests/unit/test_crane_score.py
index f7e958b5..243e205b 100644
--- a/tests/unit/test_crane_score.py
+++ b/tests/unit/test_crane_score.py
@@ -71,6 +71,7 @@ def _deletion_gates() -> list[str]:
         '{"crane":"gate","name":"python_reference","passed":true}',
         '{"crane":"gate","name":"surface","passing":1,"total":1}',
         '{"crane":"gate","name":"help","passing":1,"total":1}',
+        '{"crane":"gate","name":"option_parity","passing":1,"total":1}',
         '{"crane":"gate","name":"functional","passing":1,"total":1}',
         '{"crane":"gate","name":"state_diff","passing":1,"total":1}',
         '{"crane":"gate","name":"python_behavior_contracts","passing":1,"total":1}',
@@ -88,6 +89,7 @@ def _completion_gate_events() -> list[str]:
         "TestParityCompletionHardGate",
         "TestParityCompletionSurfaceParity",
         "TestParityCompletionCommandMatrix",
+        "TestParityPythonOptionsFromSource",
         "TestParityCompletionHelpIdentical",
         "TestParityCompletionFunctionalContracts",
         "TestParityCompletionStateDiffContracts",
@@ -181,6 +183,7 @@ def test_crane_score_can_reach_one_with_all_deletion_grade_gates() -> None:
         "python_reference_required": True,
         "surface_parity": 1.0,
         "help_parity": 1.0,
+        "option_parity": 1.0,
         "functional_contracts": 1.0,
         "state_diff_contracts": 1.0,
         "python_behavior_contracts": 1.0,
@@ -219,6 +222,7 @@ def test_crane_score_can_reach_one_with_no_python_all_go_replay() -> None:
     [
         '{"crane":"gate","name":"surface","passing":0,"total":1}',
         '{"crane":"gate","name":"help","passing":0,"total":1}',
+        '{"crane":"gate","name":"option_parity","passing":0,"total":1}',
         '{"crane":"gate","name":"functional","passing":0,"total":1}',
         '{"crane":"gate","name":"state_diff","passing":0,"total":1}',
         '{"crane":"gate","name":"python_behavior_contracts","passing":0,"total":1}',
diff --git a/tests/unit/test_migration_ci_workflow.py b/tests/unit/test_migration_ci_workflow.py
index 9b9fde79..15a0286c 100644
--- a/tests/unit/test_migration_ci_workflow.py
+++ b/tests/unit/test_migration_ci_workflow.py
@@ -16,6 +16,8 @@ def test_migration_ci_enforces_completion_for_crane_prs_and_explicit_manual_runs
     assert "enforce_completion:" in text
     assert "MIGRATION_COMPLETION_ENFORCED=$enforce_completion" in text
     assert "APM_ENFORCE_COMPLETION_GATES=1" in text
+    assert "APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS=1" in text
+    assert "--allow-obsolete-python-tests" in text
     assert "inputs.enforce_completion == true" in text
     assert 'github.event.pull_request.head.ref }}" == crane/*' in text
     assert "manual runs with enforce_completion=true" in text