feat: Implement Proposal Saving and GUI Automation Enhancements

- Added functionality to save AI agent plans to disk using ProposalRegistry, including directory creation and error handling for failed saves.
- Enhanced AIGUIController with new gRPC GUI automation actions, including click, type, wait, and verify actions, improving interaction capabilities.
- Introduced new command parsing for set-area and replace-tile commands in Tile16ProposalGenerator, allowing for more complex tile modifications.
- Added integration and unit tests for AIGUIController and Tile16ProposalGenerator to ensure robust functionality and error handling.
This commit is contained in:
scawful
2025-10-05 13:07:03 -04:00
parent c3df55d787
commit 3b7a961884
8 changed files with 1143 additions and 30 deletions

View File

@@ -0,0 +1,334 @@
// Integration tests for AIGUIController
// Tests the gRPC GUI automation with vision feedback
#include "cli/service/ai/ai_gui_controller.h"
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "cli/service/ai/gemini_ai_service.h"
#include "cli/service/gui/gui_automation_client.h"
namespace yaze {
namespace cli {
namespace ai {
namespace {
using ::testing::_;
using ::testing::Return;
// Mock GuiAutomationClient for testing without actual GUI
class MockGuiAutomationClient : public GuiAutomationClient {
public:
MockGuiAutomationClient() : GuiAutomationClient("localhost:50052") {}
MOCK_METHOD(absl::Status, Connect, ());
MOCK_METHOD(absl::StatusOr<AutomationResult>, Ping, (const std::string&));
MOCK_METHOD(absl::StatusOr<AutomationResult>, Click,
(const std::string&, ClickType));
MOCK_METHOD(absl::StatusOr<AutomationResult>, Type,
(const std::string&, const std::string&, bool));
MOCK_METHOD(absl::StatusOr<AutomationResult>, Wait,
(const std::string&, int, int));
MOCK_METHOD(absl::StatusOr<AutomationResult>, Assert,
(const std::string&));
};
class AIGUIControllerTest : public ::testing::Test {
protected:
void SetUp() override {
// Create mock services
GeminiConfig config;
config.api_key = "test_key";
config.model = "gemini-2.5-flash";
gemini_service_ = std::make_unique<GeminiAIService>(config);
gui_client_ = std::make_unique<MockGuiAutomationClient>();
controller_ = std::make_unique<AIGUIController>(
gemini_service_.get(), gui_client_.get());
ControlLoopConfig loop_config;
loop_config.max_iterations = 5;
loop_config.enable_vision_verification = false; // Disable for unit tests
loop_config.enable_iterative_refinement = false;
controller_->Initialize(loop_config);
}
std::unique_ptr<GeminiAIService> gemini_service_;
std::unique_ptr<MockGuiAutomationClient> gui_client_;
std::unique_ptr<AIGUIController> controller_;
};
// ============================================================================
// Basic Action Execution Tests
// ============================================================================
TEST_F(AIGUIControllerTest, ExecuteClickAction_Success) {
AIAction action(AIActionType::kClickButton);
action.parameters["target"] = "button:Test";
action.parameters["click_type"] = "left";
AutomationResult result;
result.success = true;
result.message = "Click successful";
EXPECT_CALL(*gui_client_, Click("button:Test", ClickType::kLeft))
.WillOnce(Return(result));
auto status = controller_->ExecuteSingleAction(action, false);
ASSERT_TRUE(status.ok()) << status.status().message();
EXPECT_TRUE(status->action_successful);
}
TEST_F(AIGUIControllerTest, ExecuteClickAction_Failure) {
AIAction action(AIActionType::kClickButton);
action.parameters["target"] = "button:NonExistent";
AutomationResult result;
result.success = false;
result.message = "Button not found";
EXPECT_CALL(*gui_client_, Click("button:NonExistent", ClickType::kLeft))
.WillOnce(Return(result));
auto status = controller_->ExecuteSingleAction(action, false);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.status().message(),
::testing::HasSubstr("Click action failed"));
}
// ============================================================================
// Type Action Tests
// ============================================================================
TEST_F(AIGUIControllerTest, ExecuteTypeAction_Success) {
AIAction action(AIActionType::kSelectTile); // Using SelectTile as a type action
action.parameters["target"] = "input:TileID";
action.parameters["text"] = "0x42";
action.parameters["clear_first"] = "true";
AutomationResult result;
result.success = true;
result.message = "Text entered";
EXPECT_CALL(*gui_client_, Type("input:TileID", "0x42", true))
.WillOnce(Return(result));
auto status = controller_->ExecuteSingleAction(action, false);
ASSERT_TRUE(status.ok());
EXPECT_TRUE(status->action_successful);
}
// ============================================================================
// Wait Action Tests
// ============================================================================
TEST_F(AIGUIControllerTest, ExecuteWaitAction_Success) {
AIAction action(AIActionType::kWait);
action.parameters["condition"] = "window:OverworldEditor";
action.parameters["timeout_ms"] = "2000";
AutomationResult result;
result.success = true;
result.message = "Condition met";
EXPECT_CALL(*gui_client_, Wait("window:OverworldEditor", 2000, 100))
.WillOnce(Return(result));
auto status = controller_->ExecuteSingleAction(action, false);
ASSERT_TRUE(status.ok());
EXPECT_TRUE(status->action_successful);
}
TEST_F(AIGUIControllerTest, ExecuteWaitAction_Timeout) {
AIAction action(AIActionType::kWait);
action.parameters["condition"] = "window:NonExistentWindow";
action.parameters["timeout_ms"] = "100";
AutomationResult result;
result.success = false;
result.message = "Timeout waiting for condition";
EXPECT_CALL(*gui_client_, Wait("window:NonExistentWindow", 100, 100))
.WillOnce(Return(result));
auto status = controller_->ExecuteSingleAction(action, false);
EXPECT_FALSE(status.ok());
}
// ============================================================================
// Verify/Assert Action Tests
// ============================================================================
TEST_F(AIGUIControllerTest, ExecuteVerifyAction_Success) {
AIAction action(AIActionType::kVerifyTile);
action.parameters["condition"] = "tile_placed";
AutomationResult result;
result.success = true;
result.message = "Assertion passed";
result.expected_value = "0x42";
result.actual_value = "0x42";
EXPECT_CALL(*gui_client_, Assert("tile_placed"))
.WillOnce(Return(result));
auto status = controller_->ExecuteSingleAction(action, false);
ASSERT_TRUE(status.ok());
EXPECT_TRUE(status->action_successful);
}
TEST_F(AIGUIControllerTest, ExecuteVerifyAction_Failure) {
AIAction action(AIActionType::kVerifyTile);
action.parameters["condition"] = "tile_placed";
AutomationResult result;
result.success = false;
result.message = "Assertion failed";
result.expected_value = "0x42";
result.actual_value = "0x00";
EXPECT_CALL(*gui_client_, Assert("tile_placed"))
.WillOnce(Return(result));
auto status = controller_->ExecuteSingleAction(action, false);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.status().message(),
::testing::HasSubstr("Assert action failed"));
EXPECT_THAT(status.status().message(),
::testing::HasSubstr("expected: 0x42"));
EXPECT_THAT(status.status().message(),
::testing::HasSubstr("actual: 0x00"));
}
// ============================================================================
// Complex Tile Placement Action Tests
// ============================================================================
TEST_F(AIGUIControllerTest, ExecutePlaceTileAction_CompleteFlow) {
AIAction action(AIActionType::kPlaceTile);
action.parameters["map_id"] = "5";
action.parameters["x"] = "10";
action.parameters["y"] = "20";
action.parameters["tile"] = "0x42";
AutomationResult result;
result.success = true;
// Expect sequence: open menu, wait for window, set map ID, click position
testing::InSequence seq;
EXPECT_CALL(*gui_client_, Click("menu:Overworld", ClickType::kLeft))
.WillOnce(Return(result));
EXPECT_CALL(*gui_client_, Wait("window:Overworld Editor", 2000, 100))
.WillOnce(Return(result));
EXPECT_CALL(*gui_client_, Type("input:Map ID", "5", true))
.WillOnce(Return(result));
EXPECT_CALL(*gui_client_, Click(::testing::_, ClickType::kLeft))
.WillOnce(Return(result));
auto status = controller_->ExecuteSingleAction(action, false);
ASSERT_TRUE(status.ok()) << status.status().message();
EXPECT_TRUE(status->action_successful);
}
// ============================================================================
// Multiple Actions Execution Tests
// ============================================================================
TEST_F(AIGUIControllerTest, ExecuteActions_MultipleActionsSuccess) {
std::vector<AIAction> actions;
AIAction action1(AIActionType::kClickButton);
action1.parameters["target"] = "button:Overworld";
actions.push_back(action1);
AIAction action2(AIActionType::kWait);
action2.parameters["condition"] = "window:OverworldEditor";
actions.push_back(action2);
AutomationResult success_result;
success_result.success = true;
EXPECT_CALL(*gui_client_, Click("button:Overworld", ClickType::kLeft))
.WillOnce(Return(success_result));
EXPECT_CALL(*gui_client_, Wait("window:OverworldEditor", 5000, 100))
.WillOnce(Return(success_result));
auto result = controller_->ExecuteActions(actions);
ASSERT_TRUE(result.ok()) << result.status().message();
EXPECT_TRUE(result->success);
EXPECT_EQ(result->actions_executed.size(), 2);
}
TEST_F(AIGUIControllerTest, ExecuteActions_StopsOnFirstFailure) {
std::vector<AIAction> actions;
AIAction action1(AIActionType::kClickButton);
action1.parameters["target"] = "button:Test";
actions.push_back(action1);
AIAction action2(AIActionType::kClickButton);
action2.parameters["target"] = "button:NeverReached";
actions.push_back(action2);
AutomationResult failure_result;
failure_result.success = false;
failure_result.message = "First action failed";
EXPECT_CALL(*gui_client_, Click("button:Test", ClickType::kLeft))
.WillOnce(Return(failure_result));
// Second action should never be called
EXPECT_CALL(*gui_client_, Click("button:NeverReached", _))
.Times(0);
auto result = controller_->ExecuteActions(actions);
EXPECT_FALSE(result.ok());
EXPECT_EQ(result->actions_executed.size(), 1);
}
// ============================================================================
// Error Handling Tests
// ============================================================================
TEST_F(AIGUIControllerTest, ExecuteAction_InvalidActionType) {
AIAction action(AIActionType::kInvalidAction);
auto status = controller_->ExecuteSingleAction(action, false);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.status().message(),
::testing::HasSubstr("Action type not implemented"));
}
TEST_F(AIGUIControllerTest, ExecutePlaceTileAction_MissingParameters) {
AIAction action(AIActionType::kPlaceTile);
// Missing required parameters
auto status = controller_->ExecuteSingleAction(action, false);
EXPECT_FALSE(status.ok());
EXPECT_THAT(status.status().message(),
::testing::HasSubstr("requires map_id, x, y, and tile"));
}
} // namespace
} // namespace ai
} // namespace cli
} // namespace yaze

View File

@@ -0,0 +1,244 @@
// Test suite for Tile16ProposalGenerator
// Tests the new ParseSetAreaCommand and ParseReplaceTileCommand functionality
#include "cli/service/planning/tile16_proposal_generator.h"
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "app/rom.h"
#include "test/mocks/mock_rom.h"
namespace yaze {
namespace cli {
namespace {
using ::testing::_;
using ::testing::Return;
class Tile16ProposalGeneratorTest : public ::testing::Test {
protected:
void SetUp() override {
generator_ = std::make_unique<Tile16ProposalGenerator>();
}
std::unique_ptr<Tile16ProposalGenerator> generator_;
};
// ============================================================================
// ParseSetTileCommand Tests
// ============================================================================
TEST_F(Tile16ProposalGeneratorTest, ParseSetTileCommand_ValidCommand) {
std::string command = "overworld set-tile --map 0 --x 10 --y 20 --tile 0x02E";
auto result = generator_->ParseSetTileCommand(command, nullptr);
ASSERT_TRUE(result.ok()) << result.status().message();
EXPECT_EQ(result->map_id, 0);
EXPECT_EQ(result->x, 10);
EXPECT_EQ(result->y, 20);
EXPECT_EQ(result->new_tile, 0x02E);
}
TEST_F(Tile16ProposalGeneratorTest, ParseSetTileCommand_InvalidFormat) {
std::string command = "overworld set-tile --map 0"; // Missing required args
auto result = generator_->ParseSetTileCommand(command, nullptr);
EXPECT_FALSE(result.ok());
EXPECT_THAT(result.status().message(),
::testing::HasSubstr("Invalid command format"));
}
TEST_F(Tile16ProposalGeneratorTest, ParseSetTileCommand_WrongCommandType) {
std::string command = "overworld get-tile --map 0 --x 10 --y 20";
auto result = generator_->ParseSetTileCommand(command, nullptr);
EXPECT_FALSE(result.ok());
EXPECT_THAT(result.status().message(),
::testing::HasSubstr("Not a set-tile command"));
}
// ============================================================================
// ParseSetAreaCommand Tests
// ============================================================================
TEST_F(Tile16ProposalGeneratorTest, ParseSetAreaCommand_ValidCommand) {
std::string command =
"overworld set-area --map 0 --x 10 --y 20 --width 5 --height 3 --tile 0x02E";
auto result = generator_->ParseSetAreaCommand(command, nullptr);
ASSERT_TRUE(result.ok()) << result.status().message();
EXPECT_EQ(result->size(), 15); // 5 width * 3 height = 15 tiles
// Check first tile
EXPECT_EQ((*result)[0].map_id, 0);
EXPECT_EQ((*result)[0].x, 10);
EXPECT_EQ((*result)[0].y, 20);
EXPECT_EQ((*result)[0].new_tile, 0x02E);
// Check last tile
EXPECT_EQ((*result)[14].x, 14); // 10 + 4
EXPECT_EQ((*result)[14].y, 22); // 20 + 2
}
TEST_F(Tile16ProposalGeneratorTest, ParseSetAreaCommand_SingleTile) {
std::string command =
"overworld set-area --map 0 --x 10 --y 20 --width 1 --height 1 --tile 0x02E";
auto result = generator_->ParseSetAreaCommand(command, nullptr);
ASSERT_TRUE(result.ok());
EXPECT_EQ(result->size(), 1);
}
TEST_F(Tile16ProposalGeneratorTest, ParseSetAreaCommand_LargeArea) {
std::string command =
"overworld set-area --map 0 --x 0 --y 0 --width 32 --height 32 --tile 0x000";
auto result = generator_->ParseSetAreaCommand(command, nullptr);
ASSERT_TRUE(result.ok());
EXPECT_EQ(result->size(), 1024); // 32 * 32
}
TEST_F(Tile16ProposalGeneratorTest, ParseSetAreaCommand_InvalidFormat) {
std::string command = "overworld set-area --map 0 --x 10"; // Missing args
auto result = generator_->ParseSetAreaCommand(command, nullptr);
EXPECT_FALSE(result.ok());
EXPECT_THAT(result.status().message(),
::testing::HasSubstr("Invalid set-area command format"));
}
// ============================================================================
// ParseReplaceTileCommand Tests
// ============================================================================
TEST_F(Tile16ProposalGeneratorTest, ParseReplaceTileCommand_NoROM) {
std::string command =
"overworld replace-tile --map 0 --old-tile 0x02E --new-tile 0x030";
auto result = generator_->ParseReplaceTileCommand(command, nullptr);
EXPECT_FALSE(result.ok());
EXPECT_THAT(result.status().message(),
::testing::HasSubstr("ROM must be loaded"));
}
TEST_F(Tile16ProposalGeneratorTest, ParseReplaceTileCommand_InvalidFormat) {
std::string command = "overworld replace-tile --map 0"; // Missing tiles
auto result = generator_->ParseReplaceTileCommand(command, nullptr);
EXPECT_FALSE(result.ok());
EXPECT_THAT(result.status().message(),
::testing::HasSubstr("Invalid replace-tile command format"));
}
// ============================================================================
// GenerateFromCommands Tests
// ============================================================================
TEST_F(Tile16ProposalGeneratorTest, GenerateFromCommands_MultipleCommands) {
std::vector<std::string> commands = {
"overworld set-tile --map 0 --x 10 --y 20 --tile 0x02E",
"overworld set-area --map 0 --x 5 --y 5 --width 2 --height 2 --tile 0x030"
};
auto result = generator_->GenerateFromCommands(
"Test prompt", commands, "test_ai", nullptr);
ASSERT_TRUE(result.ok()) << result.status().message();
EXPECT_EQ(result->changes.size(), 5); // 1 from set-tile + 4 from set-area
EXPECT_EQ(result->prompt, "Test prompt");
EXPECT_EQ(result->ai_service, "test_ai");
EXPECT_EQ(result->status, Tile16Proposal::Status::PENDING);
}
TEST_F(Tile16ProposalGeneratorTest, GenerateFromCommands_EmptyCommands) {
std::vector<std::string> commands = {};
auto result = generator_->GenerateFromCommands(
"Test prompt", commands, "test_ai", nullptr);
EXPECT_FALSE(result.ok());
EXPECT_THAT(result.status().message(),
::testing::HasSubstr("No valid tile16 changes found"));
}
TEST_F(Tile16ProposalGeneratorTest, GenerateFromCommands_IgnoresComments) {
std::vector<std::string> commands = {
"# This is a comment",
"overworld set-tile --map 0 --x 10 --y 20 --tile 0x02E",
"# Another comment",
"" // Empty line
};
auto result = generator_->GenerateFromCommands(
"Test prompt", commands, "test_ai", nullptr);
ASSERT_TRUE(result.ok());
EXPECT_EQ(result->changes.size(), 1); // Only the valid command
}
// ============================================================================
// Tile16Change Tests
// ============================================================================
TEST_F(Tile16ProposalGeneratorTest, Tile16Change_ToString) {
Tile16Change change;
change.map_id = 5;
change.x = 10;
change.y = 20;
change.old_tile = 0x02E;
change.new_tile = 0x030;
std::string result = change.ToString();
EXPECT_THAT(result, ::testing::HasSubstr("Map 5"));
EXPECT_THAT(result, ::testing::HasSubstr("(10,20)"));
EXPECT_THAT(result, ::testing::HasSubstr("0x2e"));
EXPECT_THAT(result, ::testing::HasSubstr("0x30"));
}
// ============================================================================
// Proposal Serialization Tests
// ============================================================================
TEST_F(Tile16ProposalGeneratorTest, Proposal_ToJsonAndFromJson) {
Tile16Proposal original;
original.id = "test_id_123";
original.prompt = "Test prompt";
original.ai_service = "gemini";
original.reasoning = "Test reasoning";
original.status = Tile16Proposal::Status::PENDING;
Tile16Change change;
change.map_id = 5;
change.x = 10;
change.y = 20;
change.old_tile = 0x02E;
change.new_tile = 0x030;
original.changes.push_back(change);
std::string json = original.ToJson();
auto result = Tile16Proposal::FromJson(json);
ASSERT_TRUE(result.ok()) << result.status().message();
EXPECT_EQ(result->id, original.id);
EXPECT_EQ(result->prompt, original.prompt);
EXPECT_EQ(result->ai_service, original.ai_service);
EXPECT_EQ(result->reasoning, original.reasoning);
EXPECT_EQ(result->status, original.status);
EXPECT_EQ(result->changes.size(), 1);
EXPECT_EQ(result->changes[0].map_id, 5);
}
} // namespace
} // namespace cli
} // namespace yaze