feat: Implement Proposal Saving and GUI Automation Enhancements

- Added functionality to save AI agent plans to disk using ProposalRegistry, including directory creation and error handling for failed saves.
- Enhanced AIGUIController with new gRPC GUI automation actions, including click, type, wait, and verify actions, improving interaction capabilities.
- Introduced new command parsing for set-area and replace-tile commands in Tile16ProposalGenerator, allowing for more complex tile modifications.
- Added integration and unit tests for AIGUIController and Tile16ProposalGenerator to ensure robust functionality and error handling.
This commit is contained in:
scawful
2025-10-05 13:07:03 -04:00
parent c3df55d787
commit 3b7a961884
8 changed files with 1143 additions and 30 deletions

View File

@@ -261,10 +261,23 @@ absl::Status HandlePlanCommand(const std::vector<std::string>& arg_vec) {
}
auto proposal = proposal_or.value();
// TODO: Save the proposal to disk using ProposalRegistry
// For now, just print it.
auto& registry = ProposalRegistry::Instance();
auto plans_dir = registry.RootDirectory() / "plans";
std::error_code ec;
std::filesystem::create_directories(plans_dir, ec);
if (ec) {
return absl::InternalError(absl::StrCat("Failed to create plans directory: ", ec.message()));
}
auto plan_path = plans_dir / (proposal.id + ".json");
auto save_status = generator.SaveProposal(proposal, plan_path.string());
if (!save_status.ok()) {
return save_status;
}
std::cout << "AI Agent Plan (Proposal ID: " << proposal.id << "):\n";
std::cout << proposal.ToJson() << std::endl;
std::cout << "\n✅ Plan saved to: " << plan_path.string() << std::endl;
return absl::OkStatus();
}

View File

@@ -3,6 +3,7 @@
#include <chrono>
#include <thread>
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
@@ -18,7 +19,7 @@ namespace cli {
namespace ai {
AIGUIController::AIGUIController(GeminiAIService* gemini_service,
gui::GuiAutomationClient* gui_client)
GuiAutomationClient* gui_client)
: gemini_service_(gemini_service),
gui_client_(gui_client),
vision_refiner_(std::make_unique<VisionActionRefiner>(gemini_service)) {
@@ -244,11 +245,174 @@ absl::Status AIGUIController::ExecuteGRPCAction(const AIAction& action) {
return test_script_result.status();
}
// TODO: Implement gRPC GUI automation when GuiAutomationClient is ready
// For now, just log the generated test script
#ifdef YAZE_WITH_GRPC
if (!gui_client_) {
return absl::FailedPreconditionError("GUI automation client not initialized");
}
// Execute the action based on its type
if (action.type == AIActionType::kClickButton) {
// Extract target from parameters
std::string target = "button:Unknown";
if (action.parameters.count("target") > 0) {
target = action.parameters.at("target");
}
// Determine click type
ClickType click_type = ClickType::kLeft;
if (action.parameters.count("click_type") > 0) {
const std::string& type = action.parameters.at("click_type");
if (type == "right") {
click_type = ClickType::kRight;
} else if (type == "middle") {
click_type = ClickType::kMiddle;
} else if (type == "double") {
click_type = ClickType::kDouble;
}
}
auto result = gui_client_->Click(target, click_type);
if (!result.ok()) {
return result.status();
}
if (!result->success) {
return absl::InternalError(
absl::StrCat("Click action failed: ", result->message));
}
return absl::OkStatus();
}
else if (action.type == AIActionType::kSelectTile) {
// Extract target and text from parameters (treating select as a type-like action)
std::string target = "input:Unknown";
std::string text = "";
bool clear_first = true;
if (action.parameters.count("target") > 0) {
target = action.parameters.at("target");
}
if (action.parameters.count("text") > 0) {
text = action.parameters.at("text");
}
if (action.parameters.count("clear_first") > 0) {
clear_first = (action.parameters.at("clear_first") == "true");
}
auto result = gui_client_->Type(target, text, clear_first);
if (!result.ok()) {
return result.status();
}
if (!result->success) {
return absl::InternalError(
absl::StrCat("Type action failed: ", result->message));
}
return absl::OkStatus();
}
else if (action.type == AIActionType::kWait) {
// Extract condition and timeout from parameters
std::string condition = "visible";
int timeout_ms = 5000;
int poll_interval_ms = 100;
if (action.parameters.count("condition") > 0) {
condition = action.parameters.at("condition");
}
if (action.parameters.count("timeout_ms") > 0) {
timeout_ms = std::stoi(action.parameters.at("timeout_ms"));
}
if (action.parameters.count("poll_interval_ms") > 0) {
poll_interval_ms = std::stoi(action.parameters.at("poll_interval_ms"));
}
auto result = gui_client_->Wait(condition, timeout_ms, poll_interval_ms);
if (!result.ok()) {
return result.status();
}
if (!result->success) {
return absl::InternalError(
absl::StrCat("Wait action failed: ", result->message));
}
return absl::OkStatus();
}
else if (action.type == AIActionType::kVerifyTile) {
// Extract condition from parameters (treating verify as assert)
std::string condition = "";
if (action.parameters.count("condition") > 0) {
condition = action.parameters.at("condition");
}
auto result = gui_client_->Assert(condition);
if (!result.ok()) {
return result.status();
}
if (!result->success) {
return absl::InternalError(
absl::StrCat("Assert action failed: ", result->message,
" (expected: ", result->expected_value,
", actual: ", result->actual_value, ")"));
}
return absl::OkStatus();
}
else if (action.type == AIActionType::kPlaceTile) {
// This is a special action for setting overworld tiles
// Extract map_id, x, y, tile from parameters
if (action.parameters.count("map_id") == 0 ||
action.parameters.count("x") == 0 ||
action.parameters.count("y") == 0 ||
action.parameters.count("tile") == 0) {
return absl::InvalidArgumentError(
"set_tile action requires map_id, x, y, and tile parameters");
}
int map_id = std::stoi(action.parameters.at("map_id"));
int x = std::stoi(action.parameters.at("x"));
int y = std::stoi(action.parameters.at("y"));
std::string tile_str = action.parameters.at("tile");
// Navigate to overworld editor
auto click_result = gui_client_->Click("menu:Overworld", ClickType::kLeft);
if (!click_result.ok() || !click_result->success) {
return absl::InternalError("Failed to open Overworld editor");
}
// Wait for overworld editor to be visible
auto wait_result = gui_client_->Wait("window:Overworld Editor", 2000, 100);
if (!wait_result.ok() || !wait_result->success) {
return absl::InternalError("Overworld editor did not appear");
}
// Set the map ID
auto type_result = gui_client_->Type("input:Map ID", std::to_string(map_id), true);
if (!type_result.ok() || !type_result->success) {
return absl::InternalError("Failed to set map ID");
}
// Click on the tile position (approximate based on editor layout)
// This is a simplified implementation
std::string target = absl::StrCat("canvas:overworld@", x * 16, ",", y * 16);
click_result = gui_client_->Click(target, ClickType::kLeft);
if (!click_result.ok() || !click_result->success) {
return absl::InternalError("Failed to click tile position");
}
return absl::OkStatus();
}
else {
return absl::UnimplementedError(
absl::StrCat("Action type not implemented: ",
static_cast<int>(action.type)));
}
#else
return absl::UnimplementedError(
"gRPC GUI automation not yet fully implemented. "
"GuiAutomationClient integration pending.");
"gRPC GUI automation requires building with -DYAZE_WITH_GRPC=ON");
#endif
}

View File

@@ -17,9 +17,7 @@ namespace cli {
// Forward declares
class GeminiAIService;
namespace gui {
class GuiAutomationClient;
}
class GuiAutomationClient; // In cli namespace, not cli::gui
namespace ai {
@@ -87,7 +85,7 @@ class AIGUIController {
* @param gui_client gRPC client for GUI automation
*/
AIGUIController(GeminiAIService* gemini_service,
gui::GuiAutomationClient* gui_client);
GuiAutomationClient* gui_client);
~AIGUIController() = default;
@@ -141,7 +139,7 @@ class AIGUIController {
private:
GeminiAIService* gemini_service_; // Not owned
gui::GuiAutomationClient* gui_client_; // Not owned
GuiAutomationClient* gui_client_; // Not owned
std::unique_ptr<VisionActionRefiner> vision_refiner_;
gui::GuiActionGenerator action_generator_;
ControlLoopConfig config_;

View File

@@ -299,11 +299,12 @@ absl::StatusOr<AgentResponse> GeminiAIService::GenerateResponse(
return absl::UnimplementedError(
"Gemini AI service requires JSON support. Build with -DYAZE_WITH_JSON=ON");
#else
// TODO: Implement history-aware prompting.
if (history.empty()) {
return absl::InvalidArgumentError("History cannot be empty.");
}
// Build a structured conversation history for better context
// Gemini supports multi-turn conversations via the contents array
std::string prompt = prompt_builder_.BuildPromptFromHistory(history);
// Skip availability check - causes segfault with current SSL setup
@@ -319,6 +320,40 @@ absl::StatusOr<AgentResponse> GeminiAIService::GenerateResponse(
try {
if (config_.verbose) {
std::cerr << "[DEBUG] Using curl for HTTPS request" << std::endl;
std::cerr << "[DEBUG] Processing " << history.size() << " messages in history" << std::endl;
}
// Build conversation history for multi-turn context
// Gemini supports alternating user/model messages for better context
nlohmann::json contents = nlohmann::json::array();
// Add conversation history (up to last 10 messages for context window)
int start_idx = std::max(0, static_cast<int>(history.size()) - 10);
for (size_t i = start_idx; i < history.size(); ++i) {
const auto& msg = history[i];
std::string role = (msg.sender == agent::ChatMessage::Sender::kUser) ? "user" : "model";
nlohmann::json message = {
{"role", role},
{"parts", {{
{"text", msg.message}
}}}
};
contents.push_back(message);
}
// If the last message is from the model, we need to ensure the conversation
// ends with a user message for Gemini
if (!history.empty() &&
history.back().sender == agent::ChatMessage::Sender::kAgent) {
// Add a continuation prompt
nlohmann::json user_continuation = {
{"role", "user"},
{"parts", {{
{"text", "Please continue or clarify your response."}
}}}
};
contents.push_back(user_continuation);
}
// Build request with proper Gemini API v1beta format
@@ -328,17 +363,17 @@ absl::StatusOr<AgentResponse> GeminiAIService::GenerateResponse(
{"text", config_.system_instruction}
}}
}},
{"contents", {{
{"parts", {{
{"text", prompt}
}}}
}}},
{"contents", contents},
{"generationConfig", {
{"temperature", config_.temperature},
{"maxOutputTokens", config_.max_output_tokens}
}}
};
if (config_.verbose) {
std::cerr << "[DEBUG] Sending " << contents.size() << " conversation turns to Gemini" << std::endl;
}
// Only add responseMimeType if NOT using function calling
// (Gemini doesn't support both at the same time)
if (!function_calling_enabled_) {

View File

@@ -302,6 +302,189 @@ absl::StatusOr<Tile16Change> Tile16ProposalGenerator::ParseSetTileCommand(
return change;
}
absl::StatusOr<std::vector<Tile16Change>> Tile16ProposalGenerator::ParseSetAreaCommand(
const std::string& command,
Rom* rom) {
// Expected format: "overworld set-area --map 0 --x 10 --y 20 --width 5 --height 3 --tile 0x02E"
std::vector<std::string> parts = absl::StrSplit(command, ' ');
if (parts.size() < 12) {
return absl::InvalidArgumentError(
absl::StrCat("Invalid set-area command format: ", command));
}
if (parts[0] != "overworld" || parts[1] != "set-area") {
return absl::InvalidArgumentError(
absl::StrCat("Not a set-area command: ", command));
}
int map_id = 0, x = 0, y = 0, width = 1, height = 1;
uint16_t new_tile = 0;
// Parse arguments
for (size_t i = 2; i < parts.size(); i += 2) {
if (i + 1 >= parts.size()) break;
const std::string& flag = parts[i];
const std::string& value = parts[i + 1];
if (flag == "--map") {
map_id = std::stoi(value);
} else if (flag == "--x") {
x = std::stoi(value);
} else if (flag == "--y") {
y = std::stoi(value);
} else if (flag == "--width") {
width = std::stoi(value);
} else if (flag == "--height") {
height = std::stoi(value);
} else if (flag == "--tile") {
new_tile = static_cast<uint16_t>(std::stoi(value, nullptr, 16));
}
}
// Load the ROM to get the old tile values
std::vector<Tile16Change> changes;
if (rom && rom->is_loaded()) {
zelda3::Overworld overworld(rom);
auto status = overworld.Load(rom);
if (!status.ok()) {
return status;
}
// Set the correct world based on map_id
if (map_id < 0x40) {
overworld.set_current_world(0); // Light World
} else if (map_id < 0x80) {
overworld.set_current_world(1); // Dark World
} else {
overworld.set_current_world(2); // Special World
}
// Generate changes for each tile in the area
for (int dy = 0; dy < height; ++dy) {
for (int dx = 0; dx < width; ++dx) {
Tile16Change change;
change.map_id = map_id;
change.x = x + dx;
change.y = y + dy;
change.new_tile = new_tile;
change.old_tile = overworld.GetTile(change.x, change.y);
changes.push_back(change);
}
}
} else {
// If ROM not loaded, just create changes with unknown old values
for (int dy = 0; dy < height; ++dy) {
for (int dx = 0; dx < width; ++dx) {
Tile16Change change;
change.map_id = map_id;
change.x = x + dx;
change.y = y + dy;
change.new_tile = new_tile;
change.old_tile = 0x0000; // Unknown
changes.push_back(change);
}
}
}
return changes;
}
absl::StatusOr<std::vector<Tile16Change>> Tile16ProposalGenerator::ParseReplaceTileCommand(
const std::string& command,
Rom* rom) {
// Expected format: "overworld replace-tile --map 0 --old-tile 0x02E --new-tile 0x030"
// Optional bounds: --x-min 0 --y-min 0 --x-max 31 --y-max 31
std::vector<std::string> parts = absl::StrSplit(command, ' ');
if (parts.size() < 8) {
return absl::InvalidArgumentError(
absl::StrCat("Invalid replace-tile command format: ", command));
}
if (parts[0] != "overworld" || parts[1] != "replace-tile") {
return absl::InvalidArgumentError(
absl::StrCat("Not a replace-tile command: ", command));
}
int map_id = 0;
uint16_t old_tile = 0, new_tile = 0;
int x_min = 0, y_min = 0, x_max = 31, y_max = 31;
// Parse arguments
for (size_t i = 2; i < parts.size(); i += 2) {
if (i + 1 >= parts.size()) break;
const std::string& flag = parts[i];
const std::string& value = parts[i + 1];
if (flag == "--map") {
map_id = std::stoi(value);
} else if (flag == "--old-tile") {
old_tile = static_cast<uint16_t>(std::stoi(value, nullptr, 16));
} else if (flag == "--new-tile") {
new_tile = static_cast<uint16_t>(std::stoi(value, nullptr, 16));
} else if (flag == "--x-min") {
x_min = std::stoi(value);
} else if (flag == "--y-min") {
y_min = std::stoi(value);
} else if (flag == "--x-max") {
x_max = std::stoi(value);
} else if (flag == "--y-max") {
y_max = std::stoi(value);
}
}
if (!rom || !rom->is_loaded()) {
return absl::FailedPreconditionError(
"ROM must be loaded to scan for tiles to replace");
}
zelda3::Overworld overworld(rom);
auto status = overworld.Load(rom);
if (!status.ok()) {
return status;
}
// Set the correct world based on map_id
if (map_id < 0x40) {
overworld.set_current_world(0); // Light World
} else if (map_id < 0x80) {
overworld.set_current_world(1); // Dark World
} else {
overworld.set_current_world(2); // Special World
}
// Scan the specified area for tiles to replace
std::vector<Tile16Change> changes;
for (int y = y_min; y <= y_max; ++y) {
for (int x = x_min; x <= x_max; ++x) {
uint16_t current_tile = overworld.GetTile(x, y);
if (current_tile == old_tile) {
Tile16Change change;
change.map_id = map_id;
change.x = x;
change.y = y;
change.old_tile = old_tile;
change.new_tile = new_tile;
changes.push_back(change);
}
}
}
if (changes.empty()) {
std::ostringstream oss;
oss << "0x" << std::hex << old_tile;
return absl::NotFoundError(
absl::StrCat("No tiles matching ", oss.str(), " found in specified area"));
}
return changes;
}
absl::StatusOr<Tile16Proposal> Tile16ProposalGenerator::GenerateFromCommands(
const std::string& prompt,
const std::vector<std::string>& commands,
@@ -322,7 +505,7 @@ absl::StatusOr<Tile16Proposal> Tile16ProposalGenerator::GenerateFromCommands(
continue;
}
// Check if it's a set-tile command
// Check for different command types
if (absl::StrContains(command, "overworld set-tile")) {
auto change_or = ParseSetTileCommand(command, rom);
if (change_or.ok()) {
@@ -330,8 +513,25 @@ absl::StatusOr<Tile16Proposal> Tile16ProposalGenerator::GenerateFromCommands(
} else {
return change_or.status();
}
} else if (absl::StrContains(command, "overworld set-area")) {
auto changes_or = ParseSetAreaCommand(command, rom);
if (changes_or.ok()) {
proposal.changes.insert(proposal.changes.end(),
changes_or.value().begin(),
changes_or.value().end());
} else {
return changes_or.status();
}
} else if (absl::StrContains(command, "overworld replace-tile")) {
auto changes_or = ParseReplaceTileCommand(command, rom);
if (changes_or.ok()) {
proposal.changes.insert(proposal.changes.end(),
changes_or.value().begin(),
changes_or.value().end());
} else {
return changes_or.status();
}
}
// TODO: Add support for other command types (set-area, replace-tile, etc.)
}
if (proposal.changes.empty()) {
@@ -381,18 +581,121 @@ absl::Status Tile16ProposalGenerator::ApplyProposal(
}
absl::StatusOr<gfx::Bitmap> Tile16ProposalGenerator::GenerateDiff(
const Tile16Proposal& /* proposal */,
Rom* /* before_rom */,
Rom* /* after_rom */) {
const Tile16Proposal& proposal,
Rom* before_rom,
Rom* after_rom) {
// TODO: Implement visual diff generation
// This would:
// 1. Load overworld from both ROMs
// 2. Render the affected regions
// 3. Create side-by-side or overlay comparison
// 4. Highlight changed tiles
if (!before_rom || !before_rom->is_loaded()) {
return absl::FailedPreconditionError("Before ROM not loaded");
}
return absl::UnimplementedError("Visual diff generation not yet implemented");
if (!after_rom || !after_rom->is_loaded()) {
return absl::FailedPreconditionError("After ROM not loaded");
}
if (proposal.changes.empty()) {
return absl::InvalidArgumentError("No changes to visualize");
}
// Find the bounding box of all changes
int min_x = INT_MAX, min_y = INT_MAX;
int max_x = INT_MIN, max_y = INT_MIN;
int map_id = proposal.changes[0].map_id;
for (const auto& change : proposal.changes) {
if (change.x < min_x) min_x = change.x;
if (change.y < min_y) min_y = change.y;
if (change.x > max_x) max_x = change.x;
if (change.y > max_y) max_y = change.y;
}
// Add some padding around the changes
int padding = 2;
min_x = std::max(0, min_x - padding);
min_y = std::max(0, min_y - padding);
max_x = std::min(31, max_x + padding);
max_y = std::min(31, max_y + padding);
int width = (max_x - min_x + 1) * 16;
int height = (max_y - min_y + 1) * 16;
// Create a side-by-side diff bitmap (before on left, after on right)
int diff_width = width * 2 + 8; // 8 pixels separator
int diff_height = height;
std::vector<uint8_t> diff_data(diff_width * diff_height, 0x00);
gfx::Bitmap diff_bitmap(diff_width, diff_height, 8, diff_data);
// Load overworld data from both ROMs
zelda3::Overworld before_overworld(before_rom);
zelda3::Overworld after_overworld(after_rom);
auto before_status = before_overworld.Load(before_rom);
if (!before_status.ok()) {
return before_status;
}
auto after_status = after_overworld.Load(after_rom);
if (!after_status.ok()) {
return after_status;
}
// Set the correct world for both overworlds
int world = 0;
if (map_id < 0x40) {
world = 0; // Light World
} else if (map_id < 0x80) {
world = 1; // Dark World
} else {
world = 2; // Special World
}
before_overworld.set_current_world(world);
after_overworld.set_current_world(world);
// For now, create a simple colored diff representation
// Red = changed tiles, Green = unchanged tiles
// This is a placeholder until full tile rendering is implemented
gfx::SnesColor red_color(31, 0, 0); // Red for changed
gfx::SnesColor green_color(0, 31, 0); // Green for unchanged
gfx::SnesColor separator_color(15, 15, 15); // Gray separator
for (int y = min_y; y <= max_y; ++y) {
for (int x = min_x; x <= max_x; ++x) {
uint16_t before_tile = before_overworld.GetTile(x, y);
uint16_t after_tile = after_overworld.GetTile(x, y);
bool is_changed = (before_tile != after_tile);
gfx::SnesColor color = is_changed ? red_color : green_color;
// Draw "before" tile on left side
int pixel_x = (x - min_x) * 16;
int pixel_y = (y - min_y) * 16;
for (int py = 0; py < 16; ++py) {
for (int px = 0; px < 16; ++px) {
diff_bitmap.SetPixel(pixel_x + px, pixel_y + py, color);
}
}
// Draw "after" tile on right side
int right_offset = width + 8;
for (int py = 0; py < 16; ++py) {
for (int px = 0; px < 16; ++px) {
diff_bitmap.SetPixel(right_offset + pixel_x + px, pixel_y + py, color);
}
}
}
}
// Draw separator line
for (int y = 0; y < diff_height; ++y) {
for (int x = 0; x < 8; ++x) {
diff_bitmap.SetPixel(width + x, y, separator_color);
}
}
return diff_bitmap;
}
absl::Status Tile16ProposalGenerator::SaveProposal(

View File

@@ -64,6 +64,9 @@ class Tile16ProposalGenerator {
public:
Tile16ProposalGenerator() = default;
// Allow testing of private methods
friend class Tile16ProposalGeneratorTest;
/**
* @brief Generate a tile16 proposal from an AI-generated command list.
*
@@ -134,6 +137,25 @@ class Tile16ProposalGenerator {
const std::string& command,
Rom* rom);
/**
* @brief Parse a "overworld set-area" command into multiple Tile16Changes.
*
* Expected format: "overworld set-area --map 0 --x 10 --y 20 --width 5 --height 3 --tile 0x02E"
*/
absl::StatusOr<std::vector<Tile16Change>> ParseSetAreaCommand(
const std::string& command,
Rom* rom);
/**
* @brief Parse a "overworld replace-tile" command into multiple Tile16Changes.
*
* Expected format: "overworld replace-tile --map 0 --old-tile 0x02E --new-tile 0x030"
* Can also specify optional bounds: --x-min 0 --y-min 0 --x-max 31 --y-max 31
*/
absl::StatusOr<std::vector<Tile16Change>> ParseReplaceTileCommand(
const std::string& command,
Rom* rom);
/**
* @brief Generate a unique proposal ID.
*/