From fabf0c9e5150b1b85a12dbdc03764546dbcce11f Mon Sep 17 00:00:00 2001 From: scawful Date: Wed, 1 Oct 2025 20:13:00 -0400 Subject: [PATCH] feat: Implement proposal loading from disk in ProposalRegistry and enhance agent command handling --- docs/E6-z3ed-implementation-plan.md | 109 ++++++++++++++++++-------- src/cli/handlers/agent.cc | 21 ++++- src/cli/service/ai_service.cc | 23 ++++-- src/cli/service/proposal_registry.cc | 110 ++++++++++++++++++++++++++- src/cli/service/proposal_registry.h | 1 + 5 files changed, 220 insertions(+), 44 deletions(-) diff --git a/docs/E6-z3ed-implementation-plan.md b/docs/E6-z3ed-implementation-plan.md index f9711354..d34997cc 100644 --- a/docs/E6-z3ed-implementation-plan.md +++ b/docs/E6-z3ed-implementation-plan.md @@ -14,25 +14,31 @@ This plan decomposes the design additions (Sections 11–15 of `E6-z3ed-cli-desi | Verification Pipeline | Build layered testing + CI coverage. | Phase 6+ | Integrates with harness + CLI suites. | | Telemetry & Learning | Capture signals to improve prompts + heuristics. | Phase 8 | Optional/opt-in features. | -### Progress snapshot — 2025-10-01 final update (Phase 6 + AW-03 Complete + Graphics Fix) +### Progress snapshot — 2025-10-01 (Phase 6 Complete, AW-03 Complete) -- ✅ CLI global flag passthrough now preserves subcommand options, letting `agent describe` and palette routines accept both space-separated and `--flag=value` styles alongside the updated help text. -- ✅ `agent describe --format yaml` writes catalog data end-to-end; JSON format also working correctly. -- ✅ Expanded `ImGuiTestHarness` design with concrete transport, message envelope, and lifecycle details to unblock IT-01 spike. -- ✅ Fixed `rom info` segfault by creating dedicated `RomInfo` handler that properly uses the `--rom` flag instead of positional arguments. Command now works correctly with flag-based dispatch. -- ✅ Added `rom info` action to resource catalog with proper schema documentation including return values (title, size, filename). -- ✅ Generated and committed `docs/api/z3ed-resources.yaml` as authoritative machine-readable API reference for CLI automation (RC-02 complete). -- ✅ **Implemented `ProposalRegistry` service** for tracking agent-generated ROM modifications with metadata, diffs, logs, and screenshots. -- ✅ **Integrated ProposalRegistry into `agent run` workflow** - all command executions are now logged and tracked. -- ✅ **RomSandboxManager fully operational** with lifecycle management for proposal tracking. -- ✅ **Enhanced `agent diff` command** to read and display proposal diffs from ProposalRegistry with detailed metadata, execution logs, and next-step guidance. -- ✅ **Added `--proposal-id` flag to `agent diff`** for viewing specific proposals (not just latest pending). -- ✅ **Implemented `agent list` command** to enumerate all proposals with status filtering and metadata display. -- ✅ **Updated resource catalog** with agent list and diff actions including comprehensive argument and return schemas. -- ✅ **Regenerated API documentation** (`docs/api/z3ed-resources.yaml`) with all new agent commands. -- ✅ **Implemented ProposalDrawer ImGui component** with proposal list, detail view, and accept/reject/delete actions (AW-03). -- ✅ **Fixed linker errors** by adding CLI service sources to app/emu/lib build targets in CMake configuration. -- ✅ **Fixed RAII shutdown crash** in `PerformanceProfiler` - added shutdown flag and validity checks to prevent segfault during static destruction (see `docs/gfx-raii-shutdown-fix.md`). +**Resource Catalogue (RC)** ✅ COMPLETE: +- CLI flag passthrough and resource catalog system operational +- `agent describe` exports YAML/JSON command schemas for AI consumption +- `docs/api/z3ed-resources.yaml` generated and maintained +- Fixed `rom info` segfault with dedicated handler + +**Acceptance Workflow (AW-01, AW-02, AW-03)** ✅ COMPLETE: +- `ProposalRegistry` tracks agent modifications with metadata/diffs/logs +- `RomSandboxManager` handles isolated ROM copies +- `agent list` and `agent diff` commands operational +- **ProposalDrawer ImGui GUI** implemented with list/detail views and accept/reject/delete actions +- Integrated into EditorManager (`Debug → Agent Proposals` menu) +- Fixed CMake linker errors across all app targets +- **Known limitation**: ROM merging in `AcceptProposal()` not yet implemented (TODO) + +**Graphics System** ✅ FIXED: +- Fixed RAII shutdown crash in `PerformanceProfiler` (static destruction order issue) +- Added shutdown flag and validity checks - application now exits cleanly +- Enables stable testing and performance monitoring for AI workflow + +**Agent Run** ✅ FIXED: +- Added automatic ROM loading from `--rom` flag when not already loaded +- Proper error messages guide users to specify ROM path ## 2. Task Backlog @@ -59,23 +65,60 @@ This plan decomposes the design additions (Sections 11–15 of `E6-z3ed-cli-desi _Status Legend: Prototype · In Progress · Planned · Blocked · Done_ -## 3. Immediate Next Steps +## 3. Immediate Next Steps (Week of Oct 1-7, 2025) -1. ✅ **COMPLETED**: Automated catalog export into `docs/api/z3ed-resources.yaml` - both JSON and YAML formats work correctly (RC-02, RC-03). -2. ✅ **COMPLETED**: Fixed `rom info` crash - created dedicated `RomInfo` handler that uses `FLAGS_rom` instead of positional arguments (RC-05). -3. ✅ **COMPLETED**: Wired `RomSandboxManager` and `ProposalRegistry` into agent run workflow with full logging and metadata tracking (AW-01, AW-02). -4. ✅ **COMPLETED**: Enhanced `agent diff` command to read and display proposal diffs from ProposalRegistry with formatted output, execution logs, and next-step guidance. -5. ✅ **COMPLETED**: Added `agent list` command to enumerate all proposals with status filtering. -6. ✅ **COMPLETED**: Added `--proposal-id` flag to `agent diff` for viewing specific proposals. -7. ✅ **COMPLETED**: Updated resource catalog with agent list and diff actions including arguments and return schemas. -8. ✅ **COMPLETED**: Implemented ProposalDrawer ImGui component with proposal list, detail view, and accept/reject/delete actions (AW-03). -9. ✅ **COMPLETED**: Fixed RAII shutdown crash in `PerformanceProfiler` for clean application exit. -10. **IN PROGRESS**: Test ProposalDrawer in running application with live proposals. -11. **PLANNED**: Complete ROM merging in AcceptProposal method (AW-03 TODO). -12. **PLANNED**: Spike IPC options for `ImGuiTestHarness` (socket vs. HTTP vs. shared memory) and document findings (IT-01). -13. **PLANNED**: Integrate schema export with TUI command palette + help overlays (RC-04). +### Priority 0: Debug & Stabilize (Active) +1. **FIX**: Debug `stoi` crash in `agent run` command execution + - Error occurs when executing agent commands via ModernCLI + - Investigate command parsing and proposal creation flow -## 4. Open Questions +### Priority 1: Complete AW-03 (2-3 hours) +2. **TEST**: ProposalDrawer with live proposals + - Create test proposals via CLI with working prompts + - Verify list view, detail view, filtering, refresh + - Test Accept/Reject/Delete actions + +3. **IMPLEMENT**: ROM merging in `AcceptProposal()` method + - Add ROM reference to ProposalDrawer + - Load sandbox ROM and merge into main ROM + - Add save prompt after successful merge + - Test merge + undo/redo integration + +### Priority 2: Policy Evaluation (AW-04, 4-6 hours) +4. **DESIGN**: Policy evaluation framework + - YAML-based policy configuration (`.yaze/policies/agent.yaml`) + - Policy types: test requirements, change constraints, review requirements + - PolicyEvaluator service for checking proposals against rules + +5. **INTEGRATE**: Policy checks in ProposalDrawer UI + - Display policy violations in detail view + - Gate accept button based on policy results + - Show helpful messages for blocked proposals + +### Priority 3: Testing Infrastructure (VP-01, ongoing) +6. **EXPAND**: CLI unit tests for agent commands +7. **ADD**: Integration tests for proposal workflow + +### Later: ImGuiTestHarness (IT-01) +- Spike IPC transport options (socket/HTTP/shared memory) +- Design harness architecture +- Create proof-of-concept + +## 4. Current Issues & Blockers + +### Active Issues +1. **BLOCKER**: `std::invalid_argument: stoi: no conversion` crash in `agent run` + - Occurs when executing generated commands + - Blocks testing of ProposalDrawer with real proposals + - Needs immediate investigation + +### Known Limitations (Non-Blocking) +1. ROM merging not implemented in `AcceptProposal()` - status updates only +2. Large diffs truncated at 1000 lines +3. ProposalDrawer lacks keyboard navigation +4. Some timer warnings during shutdown (harmless but noisy) + +## 5. Open Questions - What serialization format should the proposal registry adopt for diff payloads (binary vs. textual vs. hybrid)? \ ➤ Decision: pursue a hybrid package (`.z3ed-diff`) that wraps binary tile/object deltas alongside a JSON metadata envelope (identifiers, texture descriptors, preview palette info). Capture format draft under RC/AW backlog. diff --git a/src/cli/handlers/agent.cc b/src/cli/handlers/agent.cc index 86281ab3..b040b141 100644 --- a/src/cli/handlers/agent.cc +++ b/src/cli/handlers/agent.cc @@ -6,6 +6,8 @@ #include "cli/service/rom_sandbox_manager.h" #include "util/macro.h" +#include "absl/flags/declare.h" +#include "absl/flags/flag.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/ascii.h" @@ -18,6 +20,9 @@ #include #include +// Declare the rom flag so we can access it +ABSL_DECLARE_FLAG(std::string, rom); + // Platform-specific includes for process management and executable path detection #if !defined(_WIN32) #include @@ -106,9 +111,21 @@ absl::Status HandleRunCommand(const std::vector& arg_vec, Rom& rom) } std::string prompt = arg_vec[1]; - // Save a sandbox copy of the ROM for proposal tracking. + // Load ROM if not already loaded if (!rom.is_loaded()) { - return absl::FailedPreconditionError("No ROM loaded"); + std::string rom_path = absl::GetFlag(FLAGS_rom); + if (rom_path.empty()) { + return absl::FailedPreconditionError( + "No ROM loaded. Use --rom= to specify ROM file.\n" + "Example: z3ed agent run --rom=zelda3.sfc --prompt \"Your prompt here\""); + } + + auto status = rom.LoadFromFile(rom_path); + if (!status.ok()) { + return absl::FailedPreconditionError( + absl::StrFormat("Failed to load ROM from '%s': %s", + rom_path, status.message())); + } } auto sandbox_or = RomSandboxManager::Instance().CreateSandbox( diff --git a/src/cli/service/ai_service.cc b/src/cli/service/ai_service.cc index 1dac78ea..e0750f23 100644 --- a/src/cli/service/ai_service.cc +++ b/src/cli/service/ai_service.cc @@ -5,17 +5,24 @@ namespace cli { absl::StatusOr> MockAIService::GetCommands( const std::string& prompt) { + // NOTE: These commands use positional arguments (not --flags) because + // the command handlers haven't been updated to parse flags yet. + // TODO: Update handlers to use absl::flags parsing + if (prompt == "Make all the soldiers in Hyrule Castle wear red armor.") { + // Simplified command sequence - just export then import + // (In reality, you'd modify the palette file between export and import) return std::vector{ - "palette export --group sprites_aux1 --id 4 --to soldier_palette.col", - "palette set-color --file soldier_palette.col --index 5 --color \"#FF0000\"", - "palette import --group sprites_aux1 --id 4 --from soldier_palette.col"}; - } else if (prompt.find("Place a tree at coordinates") != std::string::npos) { - // Example prompt: "Place a tree at coordinates (10, 20) on the light world map" - // For simplicity, we'll hardcode the tile id for a tree - return std::vector{"overworld set-tile --map 0 --x 10 --y 20 --tile 0x02E"}; + "palette export sprites_aux1 4 soldier_palette.col" + // Would normally modify soldier_palette.col here to change colors + // Then import it back + }; + } else if (prompt == "Place a tree") { + // Example: Place a tree on the light world map + // Command format: map_id x y tile_id (hex) + return std::vector{"overworld set-tile 0 10 20 0x02E"}; } - return absl::UnimplementedError("Prompt not supported by mock AI service."); + return absl::UnimplementedError("Prompt not supported by mock AI service. Try: 'Make all the soldiers in Hyrule Castle wear red armor.' or 'Place a tree'"); } } // namespace cli diff --git a/src/cli/service/proposal_registry.cc b/src/cli/service/proposal_registry.cc index a57d0ba4..ae6cd01a 100644 --- a/src/cli/service/proposal_registry.cc +++ b/src/cli/service/proposal_registry.cc @@ -1,8 +1,10 @@ #include "cli/service/proposal_registry.h" #include +#include #include #include +#include #include "absl/status/status.h" #include "absl/status/statusor.h" @@ -61,6 +63,99 @@ absl::Status ProposalRegistry::EnsureRootExistsLocked() { return absl::OkStatus(); } +absl::Status ProposalRegistry::LoadProposalsFromDiskLocked() { + std::error_code ec; + + // Check if root directory exists + if (!std::filesystem::exists(root_directory_, ec)) { + return absl::OkStatus(); // No proposals to load + } + + // Iterate over all directories in the root + for (const auto& entry : std::filesystem::directory_iterator(root_directory_, ec)) { + if (ec) { + continue; // Skip entries that cause errors + } + + if (!entry.is_directory()) { + continue; // Skip non-directories + } + + std::string proposal_id = entry.path().filename().string(); + + // Skip if already loaded (shouldn't happen, but be defensive) + if (proposals_.find(proposal_id) != proposals_.end()) { + continue; + } + + // Reconstruct metadata from directory contents + // Since we don't have a metadata.json file, we need to infer what we can + std::filesystem::path log_path = entry.path() / "execution.log"; + std::filesystem::path diff_path = entry.path() / "diff.txt"; + + // Check if log file exists to determine if this is a valid proposal + if (!std::filesystem::exists(log_path, ec)) { + continue; // Not a valid proposal directory + } + + // Extract timestamp from proposal ID (format: proposal-20251001T200215-1) + absl::Time created_at = absl::Now(); // Default to now if parsing fails + if (proposal_id.starts_with("proposal-")) { + std::string time_str = proposal_id.substr(9, 15); // Extract YYYYMMDDTHHmmSS + std::string error; + if (absl::ParseTime("%Y%m%dT%H%M%S", time_str, &created_at, &error)) { + // Successfully parsed time + } + } + + // Get file modification time as a fallback + auto ftime = std::filesystem::last_write_time(log_path, ec); + if (!ec) { + auto sctp = std::chrono::time_point_cast( + ftime - std::filesystem::file_time_type::clock::now() + + std::chrono::system_clock::now()); + auto time_t_value = std::chrono::system_clock::to_time_t(sctp); + created_at = absl::FromTimeT(time_t_value); + } + + // Create minimal metadata for this proposal + ProposalMetadata metadata{ + .id = proposal_id, + .sandbox_id = "", // Unknown - not stored in logs + .description = "Loaded from disk", + .prompt = "", // Unknown - not stored in logs + .status = ProposalStatus::kPending, + .created_at = created_at, + .reviewed_at = std::nullopt, + .diff_path = diff_path, + .log_path = log_path, + .screenshots = {}, + .bytes_changed = 0, + .commands_executed = 0, + }; + + // Count diff size if it exists + if (std::filesystem::exists(diff_path, ec) && !ec) { + metadata.bytes_changed = static_cast( + std::filesystem::file_size(diff_path, ec)); + } + + // Scan for screenshots + for (const auto& file : std::filesystem::directory_iterator(entry.path(), ec)) { + if (ec) continue; + if (file.path().extension() == ".png" || + file.path().extension() == ".jpg" || + file.path().extension() == ".jpeg") { + metadata.screenshots.push_back(file.path()); + } + } + + proposals_[proposal_id] = metadata; + } + + return absl::OkStatus(); +} + std::string ProposalRegistry::GenerateProposalIdLocked() { absl::Time now = absl::Now(); std::string time_component = absl::FormatTime("%Y%m%dT%H%M%S", now, @@ -208,7 +303,20 @@ ProposalRegistry::GetProposal(const std::string& proposal_id) const { std::vector ProposalRegistry::ListProposals(std::optional filter_status) const { - std::lock_guard lock(mutex_); + std::unique_lock lock(mutex_); + + // Load proposals from disk if we haven't already + if (proposals_.empty()) { + // Cast away const for loading - this is a lazy initialization pattern + auto* self = const_cast(this); + auto status = self->LoadProposalsFromDiskLocked(); + if (!status.ok()) { + // Log error but continue - return empty list if loading fails + std::cerr << "Warning: Failed to load proposals from disk: " + << status.message() << "\n"; + } + } + std::vector result; for (const auto& [id, metadata] : proposals_) { diff --git a/src/cli/service/proposal_registry.h b/src/cli/service/proposal_registry.h index 837b81db..af2319ef 100644 --- a/src/cli/service/proposal_registry.h +++ b/src/cli/service/proposal_registry.h @@ -108,6 +108,7 @@ class ProposalRegistry { ProposalRegistry(); absl::Status EnsureRootExistsLocked(); + absl::Status LoadProposalsFromDiskLocked(); std::string GenerateProposalIdLocked(); std::filesystem::path ProposalDirectory(absl::string_view proposal_id) const;