From 3aa11ef0c8b5c12908c8bc0d4b63706842c16dbc Mon Sep 17 00:00:00 2001 From: mrjf Date: Tue, 9 Jun 2026 14:05:04 -0700 Subject: [PATCH] harden Go migration completion gates --- .crane/scripts/score.go | 15 +++ .github/workflows/migration-ci.yml | 28 ++++- cmd/apm/CUTOVER.md | 79 +++++++------ cmd/apm/go_cutover_coverage_test.go | 90 ++++++++++++++- cmd/apm/python_behavior_contracts_test.go | 76 ++++++++++--- cmd/apm/real_behavior_test.go | 107 ++++++++++++++++++ .../go_cutover/python_test_coverage.json | 4 +- scripts/ci/python_behavior_contracts.py | 27 ++++- tests/parity/README.md | 10 +- tests/parity/python_contract_coverage.yml | 6 +- .../parity/test_python_behavior_contracts.py | 55 ++++++++- tests/unit/test_crane_score.py | 4 + tests/unit/test_migration_ci_workflow.py | 2 + 13 files changed, 437 insertions(+), 66 deletions(-) diff --git a/.crane/scripts/score.go b/.crane/scripts/score.go index e6e22b14..4c1fbacc 100644 --- a/.crane/scripts/score.go +++ b/.crane/scripts/score.go @@ -60,6 +60,7 @@ type CutoverGates struct { PythonReferenceRequired bool `json:"python_reference_required"` SurfaceParity float64 `json:"surface_parity"` HelpParity float64 `json:"help_parity"` + OptionParity float64 `json:"option_parity"` FunctionalContracts float64 `json:"functional_contracts"` StateDiffContracts float64 `json:"state_diff_contracts"` PythonBehaviorContracts float64 `json:"python_behavior_contracts"` @@ -96,6 +97,7 @@ type Score struct { PythonReferencePresent bool `json:"python_reference_present"` SurfaceParity float64 `json:"surface_parity"` HelpParity float64 `json:"help_parity"` + OptionParity float64 `json:"option_parity"` FunctionalParity float64 `json:"functional_parity"` StateDiffParity float64 `json:"state_diff_parity"` KnownExceptions int `json:"known_exceptions"` @@ -146,6 +148,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { benchmarks := RatioGate{} surface := RatioGate{} help := RatioGate{} + optionParity := RatioGate{} functional := RatioGate{} stateDiff := RatioGate{} behaviorContracts := RatioGate{} @@ -165,6 +168,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { &pythonReference, &surface, &help, + &optionParity, &functional, &stateDiff, &behaviorContracts, @@ -191,6 +195,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { &pythonReference, &surface, &help, + &optionParity, &functional, &stateDiff, &behaviorContracts, @@ -266,6 +271,9 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { if !help.Seen { help = missingRatioGate() } + if !optionParity.Seen { + optionParity = missingRatioGate() + } if !functional.Seen { functional = missingRatioGate() } @@ -290,6 +298,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { PythonReferenceRequired: pythonReferenceSatisfied, SurfaceParity: surface.Percent(), HelpParity: help.Percent(), + OptionParity: optionParity.Percent(), FunctionalContracts: functional.Percent(), StateDiffContracts: stateDiff.Percent(), PythonBehaviorContracts: behaviorContracts.Percent(), @@ -315,6 +324,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { cutoverReady := gates.PythonReferenceRequired && gates.SurfaceParity == 1.0 && gates.HelpParity == 1.0 && + gates.OptionParity == 1.0 && gates.FunctionalContracts == 1.0 && gates.StateDiffContracts == 1.0 && gates.PythonBehaviorContracts == 1.0 && @@ -355,6 +365,7 @@ func computeScore(input scanInput, getenv getenvFunc) (Score, error) { PythonReferencePresent: gates.PythonReferenceRequired, SurfaceParity: gates.SurfaceParity, HelpParity: gates.HelpParity, + OptionParity: gates.OptionParity, FunctionalParity: gates.FunctionalContracts, StateDiffParity: gates.StateDiffContracts, KnownExceptions: gates.KnownExceptions, @@ -390,6 +401,7 @@ func applyGateEvent( pythonReference *BoolGate, surface *RatioGate, help *RatioGate, + optionParity *RatioGate, functional *RatioGate, stateDiff *RatioGate, behaviorContracts *RatioGate, @@ -407,6 +419,8 @@ func applyGateEvent( *surface = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} case "help": *help = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} + case "option_parity": + *optionParity = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} case "functional": *functional = RatioGate{Seen: true, Passing: gate.Passing, Total: gate.Total} case "state_diff": @@ -467,6 +481,7 @@ func gateResults(gates CutoverGates) []GateResult { {Name: "go_tests_pass", Passing: gates.GoTests == "pass"}, {Name: "surface_parity", Passing: gates.SurfaceParity == 1.0}, {Name: "help_parity", Passing: gates.HelpParity == 1.0}, + {Name: "option_parity", Passing: gates.OptionParity == 1.0}, {Name: "functional_contracts", Passing: gates.FunctionalContracts == 1.0}, {Name: "state_diff_contracts", Passing: gates.StateDiffContracts == 1.0}, {Name: "python_behavior_contracts", Passing: gates.PythonBehaviorContracts == 1.0}, diff --git a/.github/workflows/migration-ci.yml b/.github/workflows/migration-ci.yml index 9041e6d9..d5958e05 100644 --- a/.github/workflows/migration-ci.yml +++ b/.github/workflows/migration-ci.yml @@ -94,6 +94,15 @@ jobs: shell: bash run: | go build -o "$RUNNER_TEMP/apm-go" ./cmd/apm + enforce_behavior_contracts=false + if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ "${{ inputs.enforce_completion == true }}" = "true" ]; then + enforce_behavior_contracts=true + elif [ "${{ github.event_name }}" = "pull_request" ] && [[ "${{ github.event.pull_request.head.ref }}" == crane/* ]]; then + enforce_behavior_contracts=true + fi + if [ "$enforce_behavior_contracts" = "true" ]; then + export APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS=1 + fi set +e APM_GO_BIN="$RUNNER_TEMP/apm-go" \ uv run pytest tests/parity/test_python_behavior_contracts.py -q --tb=short \ @@ -141,11 +150,22 @@ jobs: - name: Compute migration score run: | go run .crane/scripts/score.go < "$RUNNER_TEMP/go-test-events.json" | tee "$RUNNER_TEMP/migration-score.json" + coverage_args=( + --inventory "$RUNNER_TEMP/python-behavior-contracts.json" + --coverage tests/parity/python_contract_coverage.yml + --summary "$RUNNER_TEMP/python-contract-coverage.md" + ) + if [ "${MIGRATION_COMPLETION_ENFORCED:-false}" != "true" ]; then + coverage_args+=(--allow-intentionally-incomplete --allow-obsolete-python-tests) + fi + set +e uv run python scripts/ci/python_behavior_contracts.py check \ - --inventory "$RUNNER_TEMP/python-behavior-contracts.json" \ - --coverage tests/parity/python_contract_coverage.yml \ - --allow-intentionally-incomplete \ - --summary "$RUNNER_TEMP/python-contract-coverage.md" || true + "${coverage_args[@]}" + coverage_status=$? + set -e + if [ "${MIGRATION_COMPLETION_ENFORCED:-false}" = "true" ] && [ "$coverage_status" != "0" ]; then + exit "$coverage_status" + fi python - "$RUNNER_TEMP/migration-score.json" "${MIGRATION_COMPLETION_ENFORCED:-false}" <<'PY' import json import sys diff --git a/cmd/apm/CUTOVER.md b/cmd/apm/CUTOVER.md index b5744eeb..aa772e66 100644 --- a/cmd/apm/CUTOVER.md +++ b/cmd/apm/CUTOVER.md @@ -6,32 +6,35 @@ framework in issue #78). ## Current State -**Deletion-grade ready.** All 13 completion gates pass as of iteration 77. +**Gate hardened; deletion-grade readiness is blocked.** The previous 13-gate +score accepted representative behavior and help-only coverage mappings. The +completion gate now requires strict option parity, behavior-backed Python test +conversion mappings, and real Go-only command fixtures before the Go binary can +be declared equivalent. -The Go binary (`cmd/apm`) has full functional parity with the Python CLI. -The Python CLI remains as the reference oracle until the explicit cutover -steps below are executed, but it is no longer required for correctness. +The Go binary (`cmd/apm`) is still evaluated against the Python CLI. It must not +be treated as the shipped `apm` command until the strict gates below pass. -Gate summary (all passing): +Gate summary: | Gate | Status | |------|--------| -| python_reference_required | pass | -| surface_parity | 100% (855/855) | -| help_parity | 100% | -| functional_contracts | 100% | -| state_diff_contracts | 100% | -| python_behavior_contracts | 100% | -| golden_fixture_corpus | pass | -| all_go_golden_tests | pass | -| no_python_runtime_dependency | pass | -| known_exceptions | 0 | -| go_tests | pass (900 tests) | -| python_tests | pass (247 tests) | -| benchmarks | pass | - -The Go binary is ready to replace Python as the shipped `apm` command once -the cutover steps below are executed. +| python_reference_required | required | +| surface_parity | required | +| help_parity | required | +| option_parity | required; every Python CLI option must appear in Go help | +| functional_contracts | required | +| state_diff_contracts | required | +| python_behavior_contracts | required; no obsolete or help-only mappings | +| golden_fixture_corpus | required | +| all_go_golden_tests | required | +| no_python_runtime_dependency | required | +| known_exceptions | must be 0 | +| go_tests | required | +| python_tests | required, or superseded by the all-Go replay | +| benchmarks | required | + +The Go binary is ready to replace Python only when all rows above pass in CI. ### Pre-Cutover Verification @@ -48,9 +51,9 @@ The output must show `"migration_score": 1` and `"cutover_ready": true`. ## Real Criteria Every completion criterion must be backed by real command execution. The scorer -does not infer completion from test names for `surface`, `help`, `functional`, -`state_diff`, `python_behavior_contracts`, or `benchmarks`; each one must emit an -explicit ratio gate. +does not infer completion from test names for `surface`, `help`, +`option_parity`, `functional`, `state_diff`, `python_behavior_contracts`, or +`benchmarks`; each one must emit an explicit ratio gate. Crane must run `APM_PYTHON_BIN= go test ./cmd/apm -run TestGoCutover -json`. These fixture-backed tests execute the built Go `apm` binary in temporary @@ -66,12 +69,20 @@ directly: {"crane":"gate","name":"no_python_runtime_dependency","passed":true} ``` +The Python-vs-Go inventory tests must also emit: + +```json +{"crane":"gate","name":"option_parity","passing":N,"total":N} +``` + `python_behavior_contracts` is not allowed to mean "the Python CLI was -available." In the final gate it means every checked-in legacy Python pytest -node under `tests/` (except the migration-specific `tests/parity/` harness) is -listed in `cmd/apm/testdata/go_cutover/python_test_coverage.json` with one or -more Go test names that replace it. An empty or partial manifest is a hard -failure. +available" or "the test was declared obsolete." In the final gate it means every +checked-in legacy Python pytest node under `tests/` (except the +migration-specific `tests/parity/` harness) is listed in +`cmd/apm/testdata/go_cutover/python_test_coverage.json` with one or more +existing real Go-only cutover behavior tests that replace it. Empty mappings, +partial mappings, stale Go test names, `python_tests.obsolete`, Python-vs-Go +completion tests, and help-only/surface-only mappings are hard failures. Crane must also run the migration benchmark test. It executes fixture-backed Python-vs-Go benchmark workloads and emits: @@ -111,7 +122,7 @@ completion. The Go binary becomes the shipped `apm` command when ALL of the following are true: -1. All 26 commands respond correctly to `--help` (done) +1. All public Python commands and options are present in Go help output 2. The representative command matrix passes functional tests: `init`, `install`, `update`, `compile`, `pack`, `run`, `audit`, `policy`, `mcp`, `runtime`, `targets`, `list`, `view`, `cache`, @@ -120,9 +131,11 @@ are true: fixture-backed real-command scenario and emits passing `functional` and `state_diff` gates 4. `TestGoCutoverPythonTestConversionCoverage` proves every legacy Python test - has an explicit Go replacement in the cutover coverage manifest -5. Python-vs-Go parity tests pass for all commands in the matrix while the - Python reference is still available + has an explicit existing Go-only behavior replacement in the cutover + coverage manifest; help-only, surface-only, coverage-only, obsolete, stale, + or Python-vs-Go completion mappings do not count +5. Python-vs-Go parity tests pass for all commands, options, and unknown-option + paths while the Python reference is still available 6. Migration benchmarks pass real fixture-backed command workloads and emit a passing counted `benchmarks` gate 7. The final Python-reference parity run has been frozen into a committed, diff --git a/cmd/apm/go_cutover_coverage_test.go b/cmd/apm/go_cutover_coverage_test.go index baeec7ef..5ee56144 100644 --- a/cmd/apm/go_cutover_coverage_test.go +++ b/cmd/apm/go_cutover_coverage_test.go @@ -26,27 +26,40 @@ type pythonClassContext struct { var ( pythonClassRE = regexp.MustCompile(`^class\s+(Test[A-Za-z0-9_]*)\b`) pythonTestRE = regexp.MustCompile(`^(?:async\s+)?def\s+(test_[A-Za-z0-9_]*)\b`) + goTestFuncRE = regexp.MustCompile(`^func\s+(Test[A-Za-z0-9_]*)\s*\(`) ) func TestGoCutoverPythonTestConversionCoverage(t *testing.T) { root := completionModuleRoot(t) pythonTests := discoverPythonTestsForCutover(t, root) + goTests := discoverGoTestsForCutover(t, root) coverage := loadGoCutoverPythonTestCoverage(t, root) - converted := 0 + behaviorBacked := 0 var missing []string + var unknown []string + var weak []string for _, id := range pythonTests { tests := coverage.ConvertedPythonTests[id] if len(tests) == 0 { missing = append(missing, id) continue } - converted++ + for _, testName := range tests { + if _, ok := goTests[testName]; !ok { + unknown = append(unknown, fmt.Sprintf("%s -> %s", id, testName)) + } + } + if !hasBehaviorBackedGoTest(tests, goTests) { + weak = append(weak, fmt.Sprintf("%s -> %s", id, strings.Join(tests, ", "))) + continue + } + behaviorBacked++ } - defer emitCraneRatioGate("python_behavior_contracts", converted, len(pythonTests)) - defer emitCraneBoolGate("golden_fixture_corpus", converted == len(pythonTests) && len(pythonTests) > 0) - defer emitCraneBoolGate("all_go_golden_tests", converted == len(pythonTests) && len(pythonTests) > 0) + defer emitCraneRatioGate("python_behavior_contracts", behaviorBacked, len(pythonTests)) + defer emitCraneBoolGate("golden_fixture_corpus", behaviorBacked == len(pythonTests) && len(pythonTests) > 0) + defer emitCraneBoolGate("all_go_golden_tests", behaviorBacked == len(pythonTests) && len(pythonTests) > 0) if len(pythonTests) == 0 { t.Fatal("no Python tests discovered under tests/; coverage gate cannot prove conversion") @@ -57,12 +70,28 @@ func TestGoCutoverPythonTestConversionCoverage(t *testing.T) { if len(missing) > 0 { t.Fatalf( "Go cutover coverage incomplete: %d/%d Python tests mapped to Go tests; %d missing.\nFirst missing tests:\n%s", - converted, + behaviorBacked, len(pythonTests), len(missing), formatCutoverMissing(missing, 80), ) } + if len(unknown) > 0 { + sort.Strings(unknown) + t.Fatalf( + "Go cutover coverage references Go tests that do not exist: %d stale mappings.\nFirst stale mappings:\n%s", + len(unknown), + formatCutoverMissing(unknown, 80), + ) + } + if len(weak) > 0 { + t.Fatalf( + "Go cutover coverage is not behavior-backed: %d/%d Python tests do not map to a real Go-only cutover behavior test.\nFirst weak mappings:\n%s", + len(weak), + len(pythonTests), + formatCutoverMissing(weak, 80), + ) + } } func TestGoCutoverNoPythonRuntimeDependency(t *testing.T) { @@ -166,6 +195,42 @@ func scanPythonTestFile(t *testing.T, root, path string) ([]string, error) { return ids, nil } +func discoverGoTestsForCutover(t *testing.T, root string) map[string]struct{} { + t.Helper() + tests := map[string]struct{}{} + err := filepath.WalkDir(filepath.Join(root, "cmd", "apm"), func(path string, entry os.DirEntry, err error) error { + if err != nil { + return err + } + if entry.IsDir() { + return nil + } + if !strings.HasSuffix(entry.Name(), "_test.go") { + return nil + } + file, openErr := os.Open(path) + if openErr != nil { + return openErr + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + match := goTestFuncRE.FindStringSubmatch(strings.TrimSpace(scanner.Text())) + if match != nil { + tests[match[1]] = struct{}{} + } + } + return scanner.Err() + }) + if err != nil { + t.Fatalf("discover Go tests: %v", err) + } + if len(tests) == 0 { + t.Fatal("no Go tests discovered under cmd/apm; coverage gate cannot prove conversion") + } + return tests +} + func loadGoCutoverPythonTestCoverage(t *testing.T, root string) goCutoverPythonTestCoverage { t.Helper() path := filepath.Join(root, "cmd", "apm", "testdata", "go_cutover", "python_test_coverage.json") @@ -197,6 +262,19 @@ func formatCutoverMissing(missing []string, limit int) string { return strings.Join(lines, "\n") } +func hasBehaviorBackedGoTest(names []string, existing map[string]struct{}) bool { + for _, name := range names { + if _, ok := existing[name]; ok && isBehaviorBackedGoTest(name) { + return true + } + } + return false +} + +func isBehaviorBackedGoTest(name string) bool { + return strings.HasPrefix(name, "TestGoCutoverReal") +} + func leadingWhitespaceWidth(line string) int { width := 0 for _, r := range line { diff --git a/cmd/apm/python_behavior_contracts_test.go b/cmd/apm/python_behavior_contracts_test.go index 38303711..5530ee76 100644 --- a/cmd/apm/python_behavior_contracts_test.go +++ b/cmd/apm/python_behavior_contracts_test.go @@ -97,6 +97,23 @@ func contractHelpArgs(command pythonCommandContract) []string { return args } +func pythonCommandOptionNames(command pythonCommandContract) []string { + var options []string + for _, param := range command.Params { + if param.Type != "Option" { + continue + } + opts := append([]string{}, param.Opts...) + opts = append(opts, param.SecondaryOpts...) + for _, opt := range opts { + if opt != "" { + options = append(options, opt) + } + } + } + return options +} + func normalizeContractHelp(text string) string { var lines []string for _, line := range strings.Split(text, "\n") { @@ -144,35 +161,68 @@ func TestParityPythonOptionsFromSource(t *testing.T) { return } inv := loadPythonBehaviorInventory(t, false) + totalOptions := 0 + missingOptions := 0 + var missingDetails []string + defer func() { + passing := totalOptions - missingOptions + if totalOptions == 0 { + emitCraneRatioGate("option_parity", 0, 1) + return + } + emitCraneRatioGate("option_parity", passing, totalOptions) + }() + for _, command := range inv.Commands { command := command if command.Hidden { continue } t.Run(command.ID, func(t *testing.T) { + commandOptions := pythonCommandOptionNames(command) + totalOptions += len(commandOptions) goOut, goErr, goCode := runGo(t, contractHelpArgs(command)...) if goCode != 0 { + missingOptions += len(commandOptions) + for _, opt := range commandOptions { + missingDetails = append(missingDetails, fmt.Sprintf("%s missing %s", command.ID, opt)) + } t.Fatalf("Go help for %s exited %d\nstdout:\n%s\nstderr:\n%s", command.ID, goCode, goOut, goErr) } help := normalizeContractHelp(goOut + goErr) - for _, param := range command.Params { - if param.Type != "Option" { - continue - } - opts := append([]string{}, param.Opts...) - opts = append(opts, param.SecondaryOpts...) - for _, opt := range opts { - if opt == "" { - continue - } - if !strings.Contains(help, opt) { - t.Logf("TRACKING: %s help missing Python option %s (migration in progress)", command.ID, opt) - } + var commandMissing []string + for _, opt := range commandOptions { + if !strings.Contains(help, opt) { + missingOptions++ + detail := fmt.Sprintf("%s missing %s", command.ID, opt) + commandMissing = append(commandMissing, detail) + missingDetails = append(missingDetails, detail) } } + if len(commandMissing) == 0 { + return + } + message := "Python option parity incomplete:\n" + formatCutoverMissing(commandMissing, 30) + if completionGatesEnforced() { + t.Error(message) + } else { + t.Logf("TRACKING: %s", message) + } }) } + if totalOptions == 0 { + completionGateFailure(t, "HARD-GATE FAILED: Python inventory exposed no options; option parity cannot be verified") + return + } + if completionGatesEnforced() && missingOptions > 0 { + t.Fatalf( + "HARD-GATE FAILED: Go help is missing %d/%d Python CLI options.\nFirst missing options:\n%s", + missingOptions, + totalOptions, + formatCutoverMissing(missingDetails, 80), + ) + } } func TestParityCompletionPythonBehaviorContracts(t *testing.T) { diff --git a/cmd/apm/real_behavior_test.go b/cmd/apm/real_behavior_test.go index cd825a09..f117d6cf 100644 --- a/cmd/apm/real_behavior_test.go +++ b/cmd/apm/real_behavior_test.go @@ -176,6 +176,71 @@ func TestGoCutoverRealFunctionalAndStateDiffContracts(t *testing.T) { return ok }, }, + { + name: "config get reads persisted configuration value", + args: []string{"config", "get", "auto-integrate"}, + env: map[string]string{"APM_CONFIG_PATH": "apm-config.yml"}, + setup: realBehaviorSetupConfigValue, + verify: func(t *testing.T, dir, stdout, stderr string, code int) bool { + ok := realBehaviorExpectExit(t, stdout, stderr, code, 0) + ok = realBehaviorExpectOutputContains(t, stdout+stderr, "false") && ok + ok = realBehaviorExpectFileContains(t, filepath.Join(dir, "apm-config.yml"), "auto-integrate") && ok + return ok + }, + }, + { + name: "config unset removes persisted configuration value", + args: []string{"config", "unset", "auto-integrate"}, + env: map[string]string{"APM_CONFIG_PATH": "apm-config.yml"}, + setup: realBehaviorSetupConfigValue, + verify: func(t *testing.T, dir, stdout, stderr string, code int) bool { + ok := realBehaviorExpectExit(t, stdout, stderr, code, 0) + ok = realBehaviorExpectFileNotContains(t, filepath.Join(dir, "apm-config.yml"), "auto-integrate") && ok + return ok + }, + }, + { + name: "mcp list reads manifest MCP dependencies", + args: []string{"mcp", "list"}, + setup: realBehaviorSetupMCPProject, + verify: func(t *testing.T, _ string, stdout, stderr string, code int) bool { + ok := realBehaviorExpectExit(t, stdout, stderr, code, 0) + ok = realBehaviorExpectOutputContains(t, stdout+stderr, "example-server") && ok + return ok + }, + }, + { + name: "marketplace remove deletes registered marketplace", + args: []string{"marketplace", "remove", "local"}, + setup: realBehaviorSetupMarketplaceProject, + verify: func(t *testing.T, dir, stdout, stderr string, code int) bool { + ok := realBehaviorExpectExit(t, stdout, stderr, code, 0) + ok = realBehaviorExpectFileNotContains(t, filepath.Join(dir, "apm.yml"), "file://./marketplace.json") && ok + return ok + }, + }, + { + name: "marketplace validate rejects missing registered marketplace", + args: []string{"marketplace", "validate", "missing"}, + verify: func(t *testing.T, _ string, stdout, stderr string, code int) bool { + if code == 0 { + realBehaviorFailure(t, "expected non-zero exit for missing marketplace validation\nstdout: %s\nstderr: %s", stdout, stderr) + return false + } + return true + }, + }, + { + name: "runtime remove deletes persisted runtime config", + args: []string{"runtime", "remove", "codex"}, + env: map[string]string{"APM_CONFIG_PATH": "apm-config.yml"}, + setup: realBehaviorSetupRuntimeConfig, + verify: func(t *testing.T, dir, stdout, stderr string, code int) bool { + ok := realBehaviorExpectExit(t, stdout, stderr, code, 0) + ok = realBehaviorExpectFileNotContains(t, filepath.Join(dir, "apm-config.yml"), "codex") && ok + return ok + }, + }, { name: "cache clean removes entries but preserves cache root", args: []string{"cache", "clean"}, @@ -437,6 +502,39 @@ policy: `) } +func realBehaviorSetupConfigValue(t *testing.T, dir string) { + t.Helper() + realBehaviorWriteFile(t, filepath.Join(dir, "apm-config.yml"), "auto-integrate: false\n") +} + +func realBehaviorSetupMCPProject(t *testing.T, dir string) { + t.Helper() + realBehaviorWriteFile(t, filepath.Join(dir, "apm.yml"), `name: mcp-fixture +version: 1.0.0 +dependencies: + apm: [] + mcp: + - example-server +`) +} + +func realBehaviorSetupMarketplaceProject(t *testing.T, dir string) { + t.Helper() + realBehaviorWriteFile(t, filepath.Join(dir, "apm.yml"), `name: marketplace-fixture +version: 1.0.0 +dependencies: + apm: [] + mcp: [] +marketplace: + local: file://./marketplace.json +`) +} + +func realBehaviorSetupRuntimeConfig(t *testing.T, dir string) { + t.Helper() + realBehaviorWriteFile(t, filepath.Join(dir, "apm-config.yml"), "runtime: codex\n") +} + func realBehaviorSetupCacheRoot(t *testing.T, dir string) { t.Helper() realBehaviorWriteFile(t, filepath.Join(dir, "cache-root", "http_v1", "old", "body"), "cached\n") @@ -503,6 +601,15 @@ func realBehaviorExpectFileNotContains(t *testing.T, path, needle string) bool { return true } +func realBehaviorExpectOutputContains(t *testing.T, output, needle string) bool { + t.Helper() + if !strings.Contains(output, needle) { + realBehaviorFailure(t, "expected command output to contain %q, got:\n%s", needle, output) + return false + } + return true +} + func realBehaviorExpectPathExists(t *testing.T, path string) bool { t.Helper() if _, err := os.Stat(path); err != nil { diff --git a/cmd/apm/testdata/go_cutover/python_test_coverage.json b/cmd/apm/testdata/go_cutover/python_test_coverage.json index 25e3b5fa..6bc4d9d6 100644 --- a/cmd/apm/testdata/go_cutover/python_test_coverage.json +++ b/cmd/apm/testdata/go_cutover/python_test_coverage.json @@ -73518,6 +73518,6 @@ "TestGoCutoverRealFunctionalAndStateDiffContracts" ] }, - "description": "Go cutover coverage manifest. Every legacy Python pytest node under tests/ (except tests/parity/) must appear here with one or more Go test names before the Go CLI can be declared a 100% migration.", + "description": "Go cutover coverage manifest. Every legacy Python pytest node under tests/ (except tests/parity/) must appear here with one or more existing real Go-only cutover behavior tests before the Go CLI can be declared a 100% migration; help-only, surface-only, coverage-only, Python-vs-Go completion, stale-name, and obsolete mappings do not count.", "schema_version": 1 -} \ No newline at end of file +} diff --git a/scripts/ci/python_behavior_contracts.py b/scripts/ci/python_behavior_contracts.py index 56cd982e..91b64ed1 100644 --- a/scripts/ci/python_behavior_contracts.py +++ b/scripts/ci/python_behavior_contracts.py @@ -299,7 +299,12 @@ def _has_tests(entry: dict[str, Any], key: str) -> bool: ) -def check_coverage(inventory: dict[str, Any], coverage: dict[str, Any]) -> list[Finding]: +def check_coverage( + inventory: dict[str, Any], + coverage: dict[str, Any], + *, + allow_obsolete: bool = False, +) -> list[Finding]: findings: list[Finding] = [] command_coverage = coverage.get("commands") or {} if not isinstance(command_coverage, dict): @@ -333,6 +338,15 @@ def check_coverage(inventory: dict[str, Any], coverage: dict[str, Any]) -> list[ for test in inventory["tests"]: test_id = test["id"] if test_id in obsolete_tests: + if allow_obsolete: + continue + findings.append( + Finding( + "obsolete-python-test-coverage", + "Python test is marked obsolete instead of mapped to Go or CLI-agnostic tests", + test_id, + ) + ) continue entry = test_coverage.get(test_id) if not isinstance(entry, dict): @@ -398,7 +412,11 @@ def cmd_extract(args: argparse.Namespace) -> int: def cmd_check(args: argparse.Namespace) -> int: inventory = _load_inventory(Path(args.inventory) if args.inventory else None) coverage = _load_coverage(Path(args.coverage)) - findings = check_coverage(inventory, coverage) + findings = check_coverage( + inventory, + coverage, + allow_obsolete=args.allow_obsolete_python_tests, + ) summary = render_summary(inventory, findings) if args.summary: Path(args.summary).write_text(summary, encoding="utf-8") @@ -439,6 +457,11 @@ def main(argv: list[str] | None = None) -> int: action="store_true", help="report findings without failing when the manifest is marked incomplete", ) + check.add_argument( + "--allow-obsolete-python-tests", + action="store_true", + help="report-only mode: allow python_tests.obsolete entries instead of requiring conversion mappings", + ) check.set_defaults(func=cmd_check) args = parser.parse_args(argv) diff --git a/tests/parity/README.md b/tests/parity/README.md index 5ac78fb9..525e9dcd 100644 --- a/tests/parity/README.md +++ b/tests/parity/README.md @@ -16,7 +16,13 @@ contracts to parity evidence. The completion scorer must not reach `migration_score = 1.0` while any extracted command or Python test lacks mapped coverage. +Completion coverage must be behavior-backed. `python_tests.obsolete` is allowed +only in report-only summaries, and a Python test mapped only to help, surface, +Python-vs-Go completion, stale-name, or coverage bookkeeping tests does not +count as converted. + `status: intentionally-incomplete` is a progress marker only. It must make completion scoring fail; use `--allow-intentionally-incomplete` only for -report-only summaries. Set `APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS=1` when a -local or CI check should hard-fail instead of reporting incomplete progress. +report-only summaries. Use `--allow-obsolete-python-tests` only for the same +report-only mode. Set `APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS=1` when a local or +CI check should hard-fail instead of reporting incomplete progress. diff --git a/tests/parity/python_contract_coverage.yml b/tests/parity/python_contract_coverage.yml index c9aed491..d610f6ce 100644 --- a/tests/parity/python_contract_coverage.yml +++ b/tests/parity/python_contract_coverage.yml @@ -1,7 +1,7 @@ schema_version: 1 -description: Coverage manifest for the Python-to-Go CLI migration. All Python unit and integration tests are listed as obsolete - because they are reference implementation tests, not parity evidence. Parity evidence is provided by Go and CLI-agnostic - contract tests mapped in the commands section. +description: Coverage manifest for the Python-to-Go CLI migration. Completion mode requires every Python test to be mapped + to behavior-backed Go or CLI-agnostic parity evidence. python_tests.obsolete is report-only debt and does not count toward + deletion-grade readiness. commands: apm: go_tests: diff --git a/tests/parity/test_python_behavior_contracts.py b/tests/parity/test_python_behavior_contracts.py index 4bb5f696..01cd083b 100644 --- a/tests/parity/test_python_behavior_contracts.py +++ b/tests/parity/test_python_behavior_contracts.py @@ -161,8 +161,9 @@ def test_every_python_command_rejects_unknown_option_consistently( def test_python_contract_coverage_manifest_is_complete(inventory: dict[str, object]) -> None: coverage = _load_coverage(ROOT / "tests" / "parity" / "python_contract_coverage.yml") + enforce = os.environ.get("APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS") == "1" if coverage.get("status") == "intentionally-incomplete": - if os.environ.get("APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS") != "1": + if not enforce: pytest.xfail( "Coverage manifest is intentionally incomplete; completion gate " "is reported by migration_score" @@ -172,4 +173,56 @@ def test_python_contract_coverage_manifest_is_complete(inventory: dict[str, obje "only after all contracts are mapped" ) findings = check_coverage(inventory, coverage) + if findings and not enforce: + pytest.xfail(render_summary(inventory, findings)) assert not findings, render_summary(inventory, findings) + + +def test_python_contract_coverage_rejects_obsolete_tests_by_default() -> None: + inventory = { + "summary": { + "commands": 0, + "public_commands": 0, + "python_tests": 1, + "python_test_cases": 1, + "source_contracts": 0, + }, + "commands": [], + "tests": [{"id": "tests/unit/test_example.py::test_real_behavior"}], + "source_contracts": [], + } + coverage = { + "commands": {}, + "python_tests": { + "covered": {}, + "obsolete": ["tests/unit/test_example.py::test_real_behavior"], + }, + } + + findings = check_coverage(inventory, coverage) + + assert [finding.code for finding in findings] == ["obsolete-python-test-coverage"] + + +def test_python_contract_coverage_can_allow_obsolete_tests_in_report_only_mode() -> None: + inventory = { + "summary": { + "commands": 0, + "public_commands": 0, + "python_tests": 1, + "python_test_cases": 1, + "source_contracts": 0, + }, + "commands": [], + "tests": [{"id": "tests/unit/test_example.py::test_real_behavior"}], + "source_contracts": [], + } + coverage = { + "commands": {}, + "python_tests": { + "covered": {}, + "obsolete": ["tests/unit/test_example.py::test_real_behavior"], + }, + } + + assert check_coverage(inventory, coverage, allow_obsolete=True) == [] diff --git a/tests/unit/test_crane_score.py b/tests/unit/test_crane_score.py index f7e958b5..243e205b 100644 --- a/tests/unit/test_crane_score.py +++ b/tests/unit/test_crane_score.py @@ -71,6 +71,7 @@ def _deletion_gates() -> list[str]: '{"crane":"gate","name":"python_reference","passed":true}', '{"crane":"gate","name":"surface","passing":1,"total":1}', '{"crane":"gate","name":"help","passing":1,"total":1}', + '{"crane":"gate","name":"option_parity","passing":1,"total":1}', '{"crane":"gate","name":"functional","passing":1,"total":1}', '{"crane":"gate","name":"state_diff","passing":1,"total":1}', '{"crane":"gate","name":"python_behavior_contracts","passing":1,"total":1}', @@ -88,6 +89,7 @@ def _completion_gate_events() -> list[str]: "TestParityCompletionHardGate", "TestParityCompletionSurfaceParity", "TestParityCompletionCommandMatrix", + "TestParityPythonOptionsFromSource", "TestParityCompletionHelpIdentical", "TestParityCompletionFunctionalContracts", "TestParityCompletionStateDiffContracts", @@ -181,6 +183,7 @@ def test_crane_score_can_reach_one_with_all_deletion_grade_gates() -> None: "python_reference_required": True, "surface_parity": 1.0, "help_parity": 1.0, + "option_parity": 1.0, "functional_contracts": 1.0, "state_diff_contracts": 1.0, "python_behavior_contracts": 1.0, @@ -219,6 +222,7 @@ def test_crane_score_can_reach_one_with_no_python_all_go_replay() -> None: [ '{"crane":"gate","name":"surface","passing":0,"total":1}', '{"crane":"gate","name":"help","passing":0,"total":1}', + '{"crane":"gate","name":"option_parity","passing":0,"total":1}', '{"crane":"gate","name":"functional","passing":0,"total":1}', '{"crane":"gate","name":"state_diff","passing":0,"total":1}', '{"crane":"gate","name":"python_behavior_contracts","passing":0,"total":1}', diff --git a/tests/unit/test_migration_ci_workflow.py b/tests/unit/test_migration_ci_workflow.py index 9b9fde79..15a0286c 100644 --- a/tests/unit/test_migration_ci_workflow.py +++ b/tests/unit/test_migration_ci_workflow.py @@ -16,6 +16,8 @@ def test_migration_ci_enforces_completion_for_crane_prs_and_explicit_manual_runs assert "enforce_completion:" in text assert "MIGRATION_COMPLETION_ENFORCED=$enforce_completion" in text assert "APM_ENFORCE_COMPLETION_GATES=1" in text + assert "APM_ENFORCE_PYTHON_BEHAVIOR_CONTRACTS=1" in text + assert "--allow-obsolete-python-tests" in text assert "inputs.enforce_completion == true" in text assert 'github.event.pull_request.head.ref }}" == crane/*' in text assert "manual runs with enforce_completion=true" in text