Implement recording functionality in agent test commands

- Added RecordingState structure to manage recording session details.
- Implemented SaveRecordingState and LoadRecordingState functions to handle recording state persistence.
- Enhanced HandleTestRecordCommand to support starting and stopping recording sessions with various options.
- Integrated gRPC calls for starting and stopping recordings in GuiAutomationClient.
- Updated ProposalRegistry to include sandbox directory and ROM path in ProposalMetadata.
- Implemented JSON parsing for Tile16Proposal to handle proposal creation from JSON input.
- Updated CMakeLists.txt to ensure proper linking of emulator targets.
This commit is contained in:
scawful
2025-10-03 21:16:08 -04:00
parent 5419633c52
commit 2c45453dd0
9 changed files with 973 additions and 109 deletions

View File

@@ -1,8 +1,10 @@
#include "cli/handlers/agent/commands.h"
#include <algorithm>
#include <filesystem>
#include <fstream>
#include <optional>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
@@ -154,7 +156,8 @@ absl::Status HandleRunCommand(const std::vector<std::string>& arg_vec,
if (!response_or.ok()) {
return response_or.status();
}
std::vector<std::string> commands = response_or.value().commands;
AgentResponse response = std::move(response_or.value());
const std::vector<std::string>& commands = response.commands;
// 3. Generate a structured proposal from the commands
Tile16ProposalGenerator generator;
@@ -186,11 +189,77 @@ absl::Status HandleRunCommand(const std::vector<std::string>& arg_vec,
absl::StrCat("Failed to save sandbox ROM: ", save_status.message()));
}
// 6. Save the proposal metadata for later use (accept/reject)
// For now, we'll just use the proposal generator's save function.
// A better approach would be to integrate with ProposalRegistry.
auto proposal_path =
RomSandboxManager::Instance().RootDirectory() / (proposal.id + ".json");
// 6. Persist the proposal metadata and artifacts.
auto& registry = ProposalRegistry::Instance();
int executed_commands = 0;
for (const auto& command : commands) {
if (command.empty() || command[0] == '#') {
continue;
}
++executed_commands;
}
std::string description = absl::StrFormat(
"Tile16 overworld edits (%d change%s)", proposal.changes.size(),
proposal.changes.size() == 1 ? "" : "s");
ASSIGN_OR_RETURN(
auto metadata,
registry.CreateProposal(sandbox.id, sandbox.rom_path, prompt, description));
proposal.id = metadata.id;
std::ostringstream diff_stream;
diff_stream << "Tile16 Proposal ID: " << metadata.id << "\n";
diff_stream << "Sandbox ID: " << sandbox.id << "\n";
diff_stream << "Sandbox ROM: " << sandbox.rom_path << "\n\n";
diff_stream << "Changes (" << proposal.changes.size() << "):\n";
for (const auto& change : proposal.changes) {
diff_stream << " - " << change.ToString() << "\n";
}
RETURN_IF_ERROR(registry.RecordDiff(metadata.id, diff_stream.str()));
RETURN_IF_ERROR(registry.AppendLog(
metadata.id, absl::StrCat("Prompt: ", prompt)));
if (!response.text_response.empty()) {
RETURN_IF_ERROR(registry.AppendLog(
metadata.id, absl::StrCat("AI Response: ", response.text_response)));
}
if (!response.reasoning.empty()) {
RETURN_IF_ERROR(registry.AppendLog(
metadata.id, absl::StrCat("Reasoning: ", response.reasoning)));
}
if (!response.tool_calls.empty()) {
RETURN_IF_ERROR(registry.AppendLog(
metadata.id,
absl::StrCat("Tool Calls: ", response.tool_calls.size())));
}
for (const auto& command : commands) {
if (command.empty() || command[0] == '#') {
continue;
}
RETURN_IF_ERROR(registry.AppendLog(
metadata.id, absl::StrCat("Command: ", command)));
}
RETURN_IF_ERROR(registry.AppendLog(
metadata.id,
absl::StrCat("Sandbox ROM saved to ", sandbox.rom_path.string())));
RETURN_IF_ERROR(
registry.UpdateCommandStats(metadata.id, executed_commands));
RETURN_IF_ERROR(registry.AppendLog(
metadata.id,
absl::StrCat("Commands executed: ", executed_commands)));
std::filesystem::path proposal_dir = metadata.log_path.parent_path();
std::filesystem::path proposal_path = proposal_dir / "proposal.json";
auto save_proposal_status =
generator.SaveProposal(proposal, proposal_path.string());
if (!save_proposal_status.ok()) {
@@ -198,16 +267,22 @@ absl::Status HandleRunCommand(const std::vector<std::string>& arg_vec,
save_proposal_status.message()));
}
RETURN_IF_ERROR(registry.AppendLog(
metadata.id,
absl::StrCat("Saved proposal JSON to ", proposal_path.string())));
std::cout
<< "✅ Agent successfully planned and executed changes in a sandbox."
<< std::endl;
std::cout << " Proposal ID: " << proposal.id << std::endl;
std::cout << " Proposal ID: " << metadata.id << std::endl;
std::cout << " Sandbox ROM: " << sandbox.rom_path << std::endl;
std::cout << " Proposal file: " << proposal_path << std::endl;
std::cout << " Proposal dir: " << proposal_dir << std::endl;
std::cout << " Diff file: " << metadata.diff_path << std::endl;
std::cout << " Log file: " << metadata.log_path << std::endl;
std::cout << "\nTo review the changes, run:\n";
std::cout << " z3ed agent diff --proposal-id " << proposal.id << std::endl;
std::cout << " z3ed agent diff --proposal-id " << metadata.id << std::endl;
std::cout << "\nTo accept the changes, run:\n";
std::cout << " z3ed agent accept --proposal-id " << proposal.id << std::endl;
std::cout << " z3ed agent accept --proposal-id " << metadata.id << std::endl;
return absl::OkStatus();
}
@@ -287,6 +362,14 @@ absl::Status HandleDiffCommand(Rom& rom, const std::vector<std::string>& args) {
std::cout << "Commands Executed: " << proposal.commands_executed << "\n";
std::cout << "Bytes Changed: " << proposal.bytes_changed << "\n\n";
if (!proposal.sandbox_rom_path.empty()) {
std::cout << "Sandbox ROM: " << proposal.sandbox_rom_path << "\n";
}
std::cout << "Proposal directory: "
<< proposal.log_path.parent_path() << "\n";
std::cout << "Diff file: " << proposal.diff_path << "\n";
std::cout << "Log file: " << proposal.log_path << "\n\n";
if (std::filesystem::exists(proposal.diff_path)) {
std::cout << "--- Diff Content ---\n";
std::ifstream diff_file(proposal.diff_path);
@@ -490,45 +573,90 @@ absl::Status HandleChatCommand(Rom& rom) {
absl::Status HandleAcceptCommand(const std::vector<std::string>& arg_vec,
Rom& rom) {
if (arg_vec.empty() || arg_vec[0] != "--proposal-id") {
std::optional<std::string> proposal_id;
for (size_t i = 0; i < arg_vec.size(); ++i) {
const std::string& token = arg_vec[i];
if (absl::StartsWith(token, "--proposal-id=")) {
proposal_id = token.substr(14);
break;
}
if (token == "--proposal-id" && i + 1 < arg_vec.size()) {
proposal_id = arg_vec[i + 1];
break;
}
}
if (!proposal_id.has_value() || proposal_id->empty()) {
return absl::InvalidArgumentError(
"Usage: agent accept --proposal-id <proposal_id>");
}
std::string proposal_id = arg_vec[1];
// 1. Load the proposal from disk.
Tile16ProposalGenerator generator;
auto proposal_path =
RomSandboxManager::Instance().RootDirectory() / (proposal_id + ".json");
auto proposal_or = generator.LoadProposal(proposal_path.string());
if (!proposal_or.ok()) {
return absl::InternalError(
absl::StrCat("Failed to load proposal file '", proposal_path.string(),
"': ", proposal_or.status().message()));
auto& registry = ProposalRegistry::Instance();
ASSIGN_OR_RETURN(auto metadata, registry.GetProposal(*proposal_id));
if (metadata.status == ProposalRegistry::ProposalStatus::kAccepted) {
std::cout << "Proposal '" << *proposal_id << "' is already accepted."
<< std::endl;
return absl::OkStatus();
}
auto proposal = proposal_or.value();
// 2. Ensure the main ROM is loaded.
RETURN_IF_ERROR(EnsureRomLoaded(rom, "agent accept --proposal-id <id>"));
if (metadata.sandbox_rom_path.empty()) {
return absl::FailedPreconditionError(absl::StrCat(
"Proposal '", *proposal_id,
"' is missing sandbox ROM metadata. Cannot accept."));
}
// 3. Apply the proposal to the main ROM.
auto apply_status = generator.ApplyProposal(proposal, &rom);
if (!apply_status.ok()) {
if (!std::filesystem::exists(metadata.sandbox_rom_path)) {
return absl::NotFoundError(absl::StrCat(
"Sandbox ROM not found at ", metadata.sandbox_rom_path.string()));
}
RETURN_IF_ERROR(
EnsureRomLoaded(rom, "agent accept --proposal-id <proposal_id>"));
Rom sandbox_rom;
auto sandbox_load_status = sandbox_rom.LoadFromFile(
metadata.sandbox_rom_path.string(), RomLoadOptions::CliDefaults());
if (!sandbox_load_status.ok()) {
return absl::InternalError(absl::StrCat(
"Failed to apply proposal to main ROM: ", apply_status.message()));
"Failed to load sandbox ROM: ", sandbox_load_status.message()));
}
if (rom.size() != sandbox_rom.size()) {
rom.Expand(static_cast<int>(sandbox_rom.size()));
}
auto copy_status = rom.WriteVector(0, sandbox_rom.vector());
if (!copy_status.ok()) {
return absl::InternalError(absl::StrCat(
"Failed to copy sandbox ROM data: ", copy_status.message()));
}
// 4. Save the changes to the main ROM file.
auto save_status = rom.SaveToFile({.save_new = false});
if (!save_status.ok()) {
return absl::InternalError(absl::StrCat(
"Failed to save changes to main ROM: ", save_status.message()));
}
std::cout << "✅ Proposal '" << proposal_id << "' accepted and applied to '"
<< rom.filename() << "'." << std::endl;
RETURN_IF_ERROR(registry.UpdateStatus(
*proposal_id, ProposalRegistry::ProposalStatus::kAccepted));
RETURN_IF_ERROR(registry.AppendLog(
*proposal_id,
absl::StrCat("Proposal accepted and applied to ", rom.filename())));
// TODO: Clean up sandbox and proposal files.
if (!metadata.sandbox_id.empty()) {
auto remove_status =
RomSandboxManager::Instance().RemoveSandbox(metadata.sandbox_id);
if (!remove_status.ok()) {
std::cerr << "Warning: Failed to remove sandbox '" << metadata.sandbox_id
<< "': " << remove_status.message() << "\n";
}
}
std::cout << "✅ Proposal '" << *proposal_id << "' accepted and applied to '"
<< rom.filename() << "'." << std::endl;
std::cout << " Source sandbox ROM: " << metadata.sandbox_rom_path
<< std::endl;
return absl::OkStatus();
}

View File

@@ -1,12 +1,21 @@
#include "cli/handlers/agent/commands.h"
#include <filesystem>
#include <fstream>
#include <iostream>
#include <optional>
#include <string>
#include <vector>
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/time/time.h"
#include "cli/handlers/agent/common.h"
#include "nlohmann/json.hpp"
#include "util/macro.h"
#ifdef YAZE_WITH_GRPC
#include "cli/service/gui/gui_automation_client.h"
@@ -19,6 +28,100 @@ namespace agent {
#ifdef YAZE_WITH_GRPC
namespace {
struct RecordingState {
std::string recording_id;
std::string host = "localhost";
int port = 50052;
std::string output_path;
};
std::filesystem::path RecordingStateFilePath() {
std::error_code ec;
std::filesystem::path base =
std::filesystem::temp_directory_path(ec);
if (ec) {
base = std::filesystem::current_path();
}
return base / "yaze" / "agent" / "recording_state.json";
}
absl::Status SaveRecordingState(const RecordingState& state) {
auto path = RecordingStateFilePath();
std::error_code ec;
std::filesystem::create_directories(path.parent_path(), ec);
nlohmann::json json;
json["recording_id"] = state.recording_id;
json["host"] = state.host;
json["port"] = state.port;
json["output_path"] = state.output_path;
std::ofstream out(path, std::ios::out | std::ios::trunc);
if (!out.is_open()) {
return absl::InternalError(absl::StrCat("Failed to write recording state to ",
path.string()));
}
out << json.dump(2);
if (!out.good()) {
return absl::InternalError(
absl::StrCat("Failed to flush recording state to ", path.string()));
}
return absl::OkStatus();
}
absl::StatusOr<RecordingState> LoadRecordingState() {
auto path = RecordingStateFilePath();
std::ifstream in(path);
if (!in.is_open()) {
return absl::NotFoundError("No active recording session found. Run 'z3ed agent test record start' first.");
}
nlohmann::json json;
try {
in >> json;
} catch (const nlohmann::json::parse_error& error) {
return absl::InternalError(
absl::StrCat("Failed to parse recording state at ", path.string(),
": ", error.what()));
}
RecordingState state;
state.recording_id = json.value("recording_id", "");
state.host = json.value("host", "localhost");
state.port = json.value("port", 50052);
state.output_path = json.value("output_path", "");
if (state.recording_id.empty()) {
return absl::InvalidArgumentError(
absl::StrCat("Recording state at ", path.string(),
" is missing a recording_id"));
}
return state;
}
absl::Status ClearRecordingState() {
auto path = RecordingStateFilePath();
std::error_code ec;
std::filesystem::remove(path, ec);
if (ec && ec != std::errc::no_such_file_or_directory) {
return absl::InternalError(absl::StrCat("Failed to clear recording state: ",
ec.message()));
}
return absl::OkStatus();
}
std::string DefaultRecordingOutputPath() {
absl::Time now = absl::Now();
return absl::StrCat("tests/gui/recording-",
absl::FormatTime("%Y%m%dT%H%M%S", now,
absl::LocalTimeZone()),
".json");
}
} // namespace
// Forward declarations for subcommand handlers
absl::Status HandleTestRunCommand(const std::vector<std::string>& args);
absl::Status HandleTestReplayCommand(const std::vector<std::string>& args);
@@ -307,7 +410,10 @@ absl::Status HandleTestResultsCommand(const std::vector<std::string>& args) {
absl::Status HandleTestRecordCommand(const std::vector<std::string>& args) {
if (args.empty()) {
return absl::InvalidArgumentError(
"Usage: agent test record <start|stop> [--output <file>]");
"Usage: agent test record <start|stop> [options]\n"
" start [--output <file>] [--description <text>] [--session <id>]\n"
" [--host <host>] [--port <port>]\n"
" stop [--validate] [--discard] [--host <host>] [--port <port>]");
}
std::string action = args[0];
@@ -315,10 +421,167 @@ absl::Status HandleTestRecordCommand(const std::vector<std::string>& args) {
return absl::InvalidArgumentError("Record action must be 'start' or 'stop'");
}
// TODO: Implement recording functionality
return absl::UnimplementedError(
"Test recording is not yet implemented.\n"
"This feature will allow capturing GUI interactions for replay.");
if (action == "start") {
std::string host = "localhost";
int port = 50052;
std::string description;
std::string session_name;
std::string output_path;
for (size_t i = 1; i < args.size(); ++i) {
const std::string& token = args[i];
if (token == "--output" && i + 1 < args.size()) {
output_path = args[++i];
} else if (token == "--description" && i + 1 < args.size()) {
description = args[++i];
} else if (token == "--session" && i + 1 < args.size()) {
session_name = args[++i];
} else if (token == "--host" && i + 1 < args.size()) {
host = args[++i];
} else if (token == "--port" && i + 1 < args.size()) {
std::string port_value = args[++i];
int parsed_port = 0;
if (!absl::SimpleAtoi(port_value, &parsed_port)) {
return absl::InvalidArgumentError(
absl::StrCat("Invalid --port value: ", port_value));
}
port = parsed_port;
}
}
if (output_path.empty()) {
output_path = DefaultRecordingOutputPath();
}
std::filesystem::path absolute_output =
std::filesystem::absolute(output_path);
std::error_code ec;
std::filesystem::create_directories(absolute_output.parent_path(), ec);
GuiAutomationClient client(absl::StrCat(host, ":", port));
RETURN_IF_ERROR(client.Connect());
if (session_name.empty()) {
session_name = std::filesystem::path(output_path).stem().string();
}
ASSIGN_OR_RETURN(auto start_result,
client.StartRecording(absolute_output.string(),
session_name, description));
if (!start_result.success) {
return absl::InternalError(
absl::StrCat("Harness rejected start-recording request: ",
start_result.message));
}
RecordingState state;
state.recording_id = start_result.recording_id;
state.host = host;
state.port = port;
state.output_path = absolute_output.string();
RETURN_IF_ERROR(SaveRecordingState(state));
std::cout << "\n=== Recording Session Started ===\n";
std::cout << "Recording ID: " << start_result.recording_id << "\n";
std::cout << "Server: " << host << ":" << port << "\n";
std::cout << "Output: " << absolute_output << "\n";
if (!description.empty()) {
std::cout << "Description: " << description << "\n";
}
if (start_result.started_at.has_value()) {
std::cout << "Started: "
<< absl::FormatTime("%Y-%m-%d %H:%M:%S",
*start_result.started_at,
absl::LocalTimeZone())
<< "\n";
}
std::cout << "\nPress Ctrl+C to abort the recording session.\n";
return absl::OkStatus();
}
// Stop
bool validate = false;
bool discard = false;
std::optional<std::string> host_override;
std::optional<int> port_override;
for (size_t i = 1; i < args.size(); ++i) {
const std::string& token = args[i];
if (token == "--validate") {
validate = true;
} else if (token == "--discard") {
discard = true;
} else if (token == "--host" && i + 1 < args.size()) {
host_override = args[++i];
} else if (token == "--port" && i + 1 < args.size()) {
std::string port_value = args[++i];
int parsed_port = 0;
if (!absl::SimpleAtoi(port_value, &parsed_port)) {
return absl::InvalidArgumentError(
absl::StrCat("Invalid --port value: ", port_value));
}
port_override = parsed_port;
}
}
if (discard && validate) {
return absl::InvalidArgumentError(
"Cannot use --validate and --discard together");
}
ASSIGN_OR_RETURN(auto state, LoadRecordingState());
if (host_override.has_value()) {
state.host = *host_override;
}
if (port_override.has_value()) {
state.port = *port_override;
}
GuiAutomationClient client(absl::StrCat(state.host, ":", state.port));
RETURN_IF_ERROR(client.Connect());
ASSIGN_OR_RETURN(auto stop_result,
client.StopRecording(state.recording_id, discard));
if (!stop_result.success) {
return absl::InternalError(
absl::StrCat("Stop recording failed: ", stop_result.message));
}
RETURN_IF_ERROR(ClearRecordingState());
std::cout << "\n=== Recording Session Completed ===\n";
std::cout << "Recording ID: " << state.recording_id << "\n";
std::cout << "Server: " << state.host << ":" << state.port << "\n";
std::cout << "Steps captured: " << stop_result.step_count << "\n";
std::cout << "Duration: " << stop_result.duration.count() << " ms\n";
if (!stop_result.message.empty()) {
std::cout << "Message: " << stop_result.message << "\n";
}
if (!discard && !stop_result.output_path.empty()) {
std::cout << "Output saved to: " << stop_result.output_path << "\n";
}
if (discard) {
std::cout << "Recording discarded; no script file was produced." << std::endl;
return absl::OkStatus();
}
if (!validate || stop_result.output_path.empty()) {
std::cout << std::endl;
return absl::OkStatus();
}
std::cout << "\nReplaying recorded script to validate...\n";
ASSIGN_OR_RETURN(auto replay_result,
client.ReplayTest(stop_result.output_path, false, {}));
if (!replay_result.success) {
return absl::InternalError(
absl::StrCat("Replay failed: ", replay_result.message));
}
std::cout << "Replay succeeded. Steps executed: "
<< replay_result.steps_executed << "\n";
return absl::OkStatus();
}
#endif // YAZE_WITH_GRPC