From 13bbe8078a2c00d82d1212bb8f2f8c3889108416 Mon Sep 17 00:00:00 2001 From: scawful Date: Sat, 4 Oct 2025 22:23:31 -0400 Subject: [PATCH] feat: Enhance GUI action generation and testing for AI tile placement - Introduced `GuiActionGenerator` class to convert high-level AI actions into executable GUI test scripts, supporting JSON output for automated testing. - Added comprehensive test suite for AI tile placement commands, validating command parsing and generated test scripts. - Updated `README.md` to reflect recent enhancements and completed features in the project. - Improved CMake configuration to include new source files for GUI action generation and related services. --- docs/z3ed/README.md | 12 +- src/cli/agent.cmake | 2 + src/cli/handlers/agent/general_commands.cc | 7 +- src/cli/service/gui/gui_action_generator.cc | 227 ++++++++++++++++++++ src/cli/service/gui/gui_action_generator.h | 71 ++++++ src/cli/z3ed.cmake | 2 + test/integration/test_ai_tile_placement.cc | 168 +++++++++++++++ 7 files changed, 481 insertions(+), 8 deletions(-) create mode 100644 src/cli/service/gui/gui_action_generator.cc create mode 100644 src/cli/service/gui/gui_action_generator.h create mode 100644 test/integration/test_ai_tile_placement.cc diff --git a/docs/z3ed/README.md b/docs/z3ed/README.md index d90718b0..4374b3c1 100644 --- a/docs/z3ed/README.md +++ b/docs/z3ed/README.md @@ -887,11 +887,15 @@ The AI response appears in your chat history and can reference specific details 4. **Collaboration UI Enhancements (1 day)**: Add UI elements for ROM sync, snapshot sharing, and proposal management in the Agent Chat widget. 5. **Windows Cross-Platform Testing (8-10h)**: Validate `z3ed` and the test harness on Windows. -### ✅ Recently Completed (v0.2.0-alpha) +### ✅ Recently Completed (v0.2.0-alpha - October 5, 2025) -- **Enhanced System Prompt (v3)**: Proactive tool chaining with implicit iteration to minimize back-and-forth -- **Learn Command**: Full implementation with preferences, ROM patterns, project context, and conversation memory -- **gRPC Windows Build Optimization**: Documented vcpkg approach and optimization strategies +- **Enhanced System Prompt (v3)**: Proactive tool chaining with implicit iteration to minimize back-and-forth conversations +- **Learn Command**: Full implementation with preferences, ROM patterns, project context, and conversation memory storage +- **Native Gemini Function Calling**: Upgraded from manual curl to native function calling API with automatic tool schema generation +- **Multimodal Vision Testing**: Comprehensive test suite for Gemini vision capabilities with screenshot integration +- **AI-Controlled GUI Automation**: Natural language parsing (`AIActionParser`) and test script generation (`GuiActionGenerator`) for automated tile placement +- **gRPC Windows Build Optimization**: vcpkg integration for 10-20x faster Windows builds, removed abseil-cpp submodule +- **Improved Documentation**: Consolidated architecture, enhancement plans, and build instructions with JSON-first approach ## 12. Troubleshooting diff --git a/src/cli/agent.cmake b/src/cli/agent.cmake index e85980f0..7b17f33f 100644 --- a/src/cli/agent.cmake +++ b/src/cli/agent.cmake @@ -72,9 +72,11 @@ set(YAZE_AGENT_SOURCES cli/service/agent/tool_dispatcher.cc cli/service/agent/learned_knowledge_service.cc cli/service/ai/ai_service.cc + cli/service/ai/ai_action_parser.cc cli/service/ai/ollama_ai_service.cc cli/service/ai/prompt_builder.cc cli/service/ai/service_factory.cc + cli/service/gui/gui_action_generator.cc cli/service/planning/policy_evaluator.cc cli/service/planning/proposal_registry.cc cli/service/planning/tile16_proposal_generator.cc diff --git a/src/cli/handlers/agent/general_commands.cc b/src/cli/handlers/agent/general_commands.cc index 5b1eca99..df87bcfa 100644 --- a/src/cli/handlers/agent/general_commands.cc +++ b/src/cli/handlers/agent/general_commands.cc @@ -29,6 +29,7 @@ #include "cli/service/ai/gemini_ai_service.h" #include "cli/service/ai/ollama_ai_service.h" #include "cli/service/ai/service_factory.h" +#include "cli/service/agent/learned_knowledge_service.h" #include "cli/service/agent/proposal_executor.h" #include "cli/service/agent/simple_chat_session.h" #include "cli/service/planning/proposal_registry.h" @@ -386,9 +387,7 @@ absl::Status HandleDiffCommand(Rom& rom, const std::vector& args) { } absl::Status HandleLearnCommand(const std::vector& args) { - using namespace yaze::cli::agent; - - static LearnedKnowledgeService learn_service; + static yaze::cli::agent::LearnedKnowledgeService learn_service; static bool initialized = false; if (!initialized) { @@ -545,7 +544,7 @@ absl::Status HandleLearnCommand(const std::vector& args) { std::cout << "\n"; } } - return absl::OkStatus(); + return absl::OkStatus(); } return absl::InvalidArgumentError("Unknown learn command. Use 'z3ed agent learn' for usage."); diff --git a/src/cli/service/gui/gui_action_generator.cc b/src/cli/service/gui/gui_action_generator.cc new file mode 100644 index 00000000..7ef01f50 --- /dev/null +++ b/src/cli/service/gui/gui_action_generator.cc @@ -0,0 +1,227 @@ +#include "cli/service/gui/gui_action_generator.h" + +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" + +namespace yaze { +namespace cli { +namespace gui { + +absl::StatusOr GuiActionGenerator::GenerateTestScript( + const std::vector& actions) { +#ifdef YAZE_WITH_JSON + auto json_result = GenerateTestJSON(actions); + if (!json_result.ok()) { + return json_result.status(); + } + return json_result->dump(2); // Pretty-print with 2-space indent +#else + return absl::UnimplementedError("JSON support required for test script generation"); +#endif +} + +#ifdef YAZE_WITH_JSON +absl::StatusOr GuiActionGenerator::GenerateTestJSON( + const std::vector& actions) { + nlohmann::json test_script; + test_script["test_name"] = "ai_generated_test"; + test_script["description"] = "Automatically generated from AI actions"; + test_script["steps"] = nlohmann::json::array(); + + for (size_t i = 0; i < actions.size(); ++i) { + nlohmann::json step = ActionToJSON(actions[i]); + step["step_number"] = i + 1; + test_script["steps"].push_back(step); + } + + return test_script; +} + +nlohmann::json GuiActionGenerator::ActionToJSON(const ai::AIAction& action) { + nlohmann::json step; + + switch (action.type) { + case ai::AIActionType::kOpenEditor: { + step["action"] = "click"; + auto it = action.parameters.find("editor"); + if (it != action.parameters.end()) { + step["target"] = absl::StrCat("button:", it->second, " Editor"); + step["wait_after"] = 500; // Wait 500ms for editor to open + } + break; + } + + case ai::AIActionType::kSelectTile: { + step["action"] = "click"; + auto it = action.parameters.find("tile_id"); + if (it != action.parameters.end()) { + int tile_id = std::stoi(it->second); + // Calculate position in tile selector (8 tiles per row, 16x16 pixels each) + int tile_x = (tile_id % 8) * 16 + 8; // Center of tile + int tile_y = (tile_id / 8) * 16 + 8; + + step["target"] = "canvas:tile16_selector"; + step["position"] = {{"x", tile_x}, {"y", tile_y}}; + step["wait_after"] = 200; + } + break; + } + + case ai::AIActionType::kPlaceTile: { + step["action"] = "click"; + auto x_it = action.parameters.find("x"); + auto y_it = action.parameters.find("y"); + + if (x_it != action.parameters.end() && y_it != action.parameters.end()) { + // Convert map coordinates to screen coordinates + // Assuming 16x16 tile size and some offset for the canvas + int screen_x = std::stoi(x_it->second) * 16 + 8; + int screen_y = std::stoi(y_it->second) * 16 + 8; + + step["target"] = "canvas:overworld_map"; + step["position"] = {{"x", screen_x}, {"y", screen_y}}; + step["wait_after"] = 200; + } + break; + } + + case ai::AIActionType::kSaveTile: { + step["action"] = "click"; + step["target"] = "button:Save to ROM"; + step["wait_after"] = 300; + break; + } + + case ai::AIActionType::kClickButton: { + step["action"] = "click"; + auto it = action.parameters.find("button"); + if (it != action.parameters.end()) { + step["target"] = absl::StrCat("button:", it->second); + step["wait_after"] = 200; + } + break; + } + + case ai::AIActionType::kWait: { + step["action"] = "wait"; + auto it = action.parameters.find("duration_ms"); + int duration = it != action.parameters.end() ? std::stoi(it->second) : 500; + step["duration_ms"] = duration; + break; + } + + case ai::AIActionType::kScreenshot: { + step["action"] = "screenshot"; + auto it = action.parameters.find("filename"); + if (it != action.parameters.end()) { + step["filename"] = it->second; + } else { + step["filename"] = "verification.png"; + } + break; + } + + case ai::AIActionType::kVerifyTile: { + step["action"] = "verify"; + step["target"] = "tile_placement"; + // Add verification parameters from action.parameters + for (const auto& [key, value] : action.parameters) { + step[key] = value; + } + break; + } + + case ai::AIActionType::kInvalidAction: + step["action"] = "error"; + step["message"] = "Invalid action type"; + break; + } + + return step; +} +#endif + +std::string GuiActionGenerator::ActionToTestStep(const ai::AIAction& action, + int step_number) { + switch (action.type) { + case ai::AIActionType::kOpenEditor: + return GenerateOpenEditorStep(action); + case ai::AIActionType::kSelectTile: + return GenerateSelectTileStep(action); + case ai::AIActionType::kPlaceTile: + return GeneratePlaceTileStep(action); + case ai::AIActionType::kSaveTile: + return GenerateSaveTileStep(action); + case ai::AIActionType::kClickButton: + return GenerateClickButtonStep(action); + case ai::AIActionType::kWait: + return GenerateWaitStep(action); + case ai::AIActionType::kScreenshot: + return GenerateScreenshotStep(action); + default: + return absl::StrFormat("# Step %d: Unknown action", step_number); + } +} + +std::string GuiActionGenerator::GenerateOpenEditorStep(const ai::AIAction& action) { + auto it = action.parameters.find("editor"); + if (it != action.parameters.end()) { + return absl::StrFormat("Click button:'%s Editor'\nWait 500ms", it->second); + } + return "Click button:'Editor'\nWait 500ms"; +} + +std::string GuiActionGenerator::GenerateSelectTileStep(const ai::AIAction& action) { + auto it = action.parameters.find("tile_id"); + if (it != action.parameters.end()) { + int tile_id = std::stoi(it->second); + int tile_x = (tile_id % 8) * 16 + 8; + int tile_y = (tile_id / 8) * 16 + 8; + return absl::StrFormat("Click canvas:'tile16_selector' at (%d, %d)\nWait 200ms", + tile_x, tile_y); + } + return "Click canvas:'tile16_selector'\nWait 200ms"; +} + +std::string GuiActionGenerator::GeneratePlaceTileStep(const ai::AIAction& action) { + auto x_it = action.parameters.find("x"); + auto y_it = action.parameters.find("y"); + + if (x_it != action.parameters.end() && y_it != action.parameters.end()) { + int screen_x = std::stoi(x_it->second) * 16 + 8; + int screen_y = std::stoi(y_it->second) * 16 + 8; + return absl::StrFormat("Click canvas:'overworld_map' at (%d, %d)\nWait 200ms", + screen_x, screen_y); + } + return "Click canvas:'overworld_map'\nWait 200ms"; +} + +std::string GuiActionGenerator::GenerateSaveTileStep(const ai::AIAction& action) { + return "Click button:'Save to ROM'\nWait 300ms"; +} + +std::string GuiActionGenerator::GenerateClickButtonStep(const ai::AIAction& action) { + auto it = action.parameters.find("button"); + if (it != action.parameters.end()) { + return absl::StrFormat("Click button:'%s'\nWait 200ms", it->second); + } + return "Click button\nWait 200ms"; +} + +std::string GuiActionGenerator::GenerateWaitStep(const ai::AIAction& action) { + auto it = action.parameters.find("duration_ms"); + int duration = it != action.parameters.end() ? std::stoi(it->second) : 500; + return absl::StrFormat("Wait %dms", duration); +} + +std::string GuiActionGenerator::GenerateScreenshotStep(const ai::AIAction& action) { + auto it = action.parameters.find("filename"); + if (it != action.parameters.end()) { + return absl::StrFormat("Screenshot '%s'", it->second); + } + return "Screenshot 'verification.png'"; +} + +} // namespace gui +} // namespace cli +} // namespace yaze diff --git a/src/cli/service/gui/gui_action_generator.h b/src/cli/service/gui/gui_action_generator.h new file mode 100644 index 00000000..3a470740 --- /dev/null +++ b/src/cli/service/gui/gui_action_generator.h @@ -0,0 +1,71 @@ +#ifndef YAZE_CLI_SERVICE_GUI_GUI_ACTION_GENERATOR_H_ +#define YAZE_CLI_SERVICE_GUI_GUI_ACTION_GENERATOR_H_ + +#include +#include + +#include "absl/status/statusor.h" +#include "cli/service/ai/ai_action_parser.h" + +#ifdef YAZE_WITH_JSON +#include "nlohmann/json.hpp" +#endif + +namespace yaze { +namespace cli { +namespace gui { + +/** + * @class GuiActionGenerator + * @brief Converts high-level AI actions into executable GUI test scripts + * + * Takes parsed AI actions and generates gRPC test harness commands or + * JSON test scripts that can be executed to control the GUI. + */ +class GuiActionGenerator { + public: + GuiActionGenerator() = default; + + /** + * Generate a test script from a sequence of AI actions + * @param actions Vector of actions to convert + * @return JSON-formatted test script, or error status + */ + absl::StatusOr GenerateTestScript( + const std::vector& actions); + +#ifdef YAZE_WITH_JSON + /** + * Generate a JSON test object from actions + * @param actions Vector of actions to convert + * @return JSON object with test steps + */ + absl::StatusOr GenerateTestJSON( + const std::vector& actions); +#endif + + /** + * Convert a single action to a test step + */ + std::string ActionToTestStep(const ai::AIAction& action, int step_number); + + private: + // Helper functions for specific action types + std::string GenerateOpenEditorStep(const ai::AIAction& action); + std::string GenerateSelectTileStep(const ai::AIAction& action); + std::string GeneratePlaceTileStep(const ai::AIAction& action); + std::string GenerateSaveTileStep(const ai::AIAction& action); + std::string GenerateClickButtonStep(const ai::AIAction& action); + std::string GenerateWaitStep(const ai::AIAction& action); + std::string GenerateScreenshotStep(const ai::AIAction& action); + +#ifdef YAZE_WITH_JSON + nlohmann::json ActionToJSON(const ai::AIAction& action); +#endif +}; + +} // namespace gui +} // namespace cli +} // namespace yaze + +#endif // YAZE_CLI_SERVICE_GUI_GUI_ACTION_GENERATOR_H_ diff --git a/src/cli/z3ed.cmake b/src/cli/z3ed.cmake index 8d6900a6..9a98d938 100644 --- a/src/cli/z3ed.cmake +++ b/src/cli/z3ed.cmake @@ -136,12 +136,14 @@ if(YAZE_USE_MODULAR_BUILD) target_link_libraries( z3ed PRIVATE yaze_core + yaze_agent ftxui::component ) else() target_link_libraries( z3ed PRIVATE yaze_core + yaze_agent ftxui::component absl::flags absl::flags_parse diff --git a/test/integration/test_ai_tile_placement.cc b/test/integration/test_ai_tile_placement.cc new file mode 100644 index 00000000..a72307c6 --- /dev/null +++ b/test/integration/test_ai_tile_placement.cc @@ -0,0 +1,168 @@ +#include +#include + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "cli/service/ai/ai_action_parser.h" +#include "cli/service/gui/gui_action_generator.h" + +#ifdef YAZE_WITH_GRPC +#include "cli/service/gui/gui_automation_client.h" +#endif + +namespace yaze { +namespace test { + +class AITilePlacementTest : public ::testing::Test { + protected: + void SetUp() override { + test_dir_ = std::filesystem::temp_directory_path() / "yaze_ai_tile_test"; + std::filesystem::create_directories(test_dir_); + } + + void TearDown() override { + if (std::filesystem::exists(test_dir_)) { + std::filesystem::remove_all(test_dir_); + } + } + + std::filesystem::path test_dir_; +}; + +TEST_F(AITilePlacementTest, ParsePlaceTileCommand) { + std::string command = "Place tile 0x42 at position (5, 7)"; + + auto actions = cli::ai::AIActionParser::ParseCommand(command); + ASSERT_TRUE(actions.ok()) << actions.status().message(); + + // Should generate: SelectTile, PlaceTile, SaveTile + ASSERT_EQ(actions->size(), 3); + + EXPECT_EQ((*actions)[0].type, cli::ai::AIActionType::kSelectTile); + EXPECT_EQ((*actions)[0].parameters.at("tile_id"), "66"); // 0x42 = 66 + + EXPECT_EQ((*actions)[1].type, cli::ai::AIActionType::kPlaceTile); + EXPECT_EQ((*actions)[1].parameters.at("x"), "5"); + EXPECT_EQ((*actions)[1].parameters.at("y"), "7"); + + EXPECT_EQ((*actions)[2].type, cli::ai::AIActionType::kSaveTile); +} + +TEST_F(AITilePlacementTest, GenerateTestScript) { + std::string command = "Place tile 100 at position (10, 15)"; + + auto actions = cli::ai::AIActionParser::ParseCommand(command); + ASSERT_TRUE(actions.ok()); + + cli::gui::GuiActionGenerator generator; + auto script = generator.GenerateTestScript(*actions); + + ASSERT_TRUE(script.ok()) << script.status().message(); + + // Verify it's valid JSON + #ifdef YAZE_WITH_JSON + nlohmann::json parsed; + ASSERT_NO_THROW(parsed = nlohmann::json::parse(*script)); + + ASSERT_TRUE(parsed.contains("steps")); + ASSERT_TRUE(parsed["steps"].is_array()); + EXPECT_EQ(parsed["steps"].size(), 3); + + // Verify first step is select tile + EXPECT_EQ(parsed["steps"][0]["action"], "click"); + EXPECT_EQ(parsed["steps"][0]["target"], "canvas:tile16_selector"); + + // Verify second step is place tile + EXPECT_EQ(parsed["steps"][1]["action"], "click"); + EXPECT_EQ(parsed["steps"][1]["target"], "canvas:overworld_map"); + EXPECT_EQ(parsed["steps"][1]["position"]["x"], 168); // 10 * 16 + 8 + EXPECT_EQ(parsed["steps"][1]["position"]["y"], 248); // 15 * 16 + 8 + + // Verify third step is save + EXPECT_EQ(parsed["steps"][2]["action"], "click"); + EXPECT_EQ(parsed["steps"][2]["target"], "button:Save to ROM"); + #endif +} + +TEST_F(AITilePlacementTest, ParseMultipleFormats) { + std::vector commands = { + "Place tile 0x10 at (3, 4)", + "Put tile 20 at position 3,4", + "Set tile 30 at x=3 y=4", + "Place tile 40 at overworld 0 position (3, 4)" + }; + + for (const auto& cmd : commands) { + auto actions = cli::ai::AIActionParser::ParseCommand(cmd); + EXPECT_TRUE(actions.ok()) << "Failed to parse: " << cmd + << " - " << actions.status().message(); + if (actions.ok()) { + EXPECT_GE(actions->size(), 2) << "Command: " << cmd; + } + } +} + +TEST_F(AITilePlacementTest, GenerateActionDescription) { + cli::ai::AIAction select_action(cli::ai::AIActionType::kSelectTile); + select_action.parameters["tile_id"] = "42"; + + std::string desc = cli::ai::AIActionParser::ActionToString(select_action); + EXPECT_EQ(desc, "Select tile 42"); + + cli::ai::AIAction place_action(cli::ai::AIActionType::kPlaceTile); + place_action.parameters["x"] = "5"; + place_action.parameters["y"] = "7"; + + desc = cli::ai::AIActionParser::ActionToString(place_action); + EXPECT_EQ(desc, "Place tile at position (5, 7)"); +} + +#ifdef YAZE_WITH_GRPC +// Integration test with actual gRPC test harness +// This test requires YAZE to be running with test harness enabled +TEST_F(AITilePlacementTest, DISABLED_ExecuteViaGRPC) { + // This test is disabled by default as it requires YAZE to be running + // Enable it manually when testing with a running instance + + std::string command = "Place tile 50 at position (2, 3)"; + + // Parse command + auto actions = cli::ai::AIActionParser::ParseCommand(command); + ASSERT_TRUE(actions.ok()); + + // Generate test script + cli::gui::GuiActionGenerator generator; + auto script_json = generator.GenerateTestJSON(*actions); + ASSERT_TRUE(script_json.ok()); + + // Connect to test harness + cli::gui::GuiAutomationClient client("localhost:50051"); + + // Execute each step + for (const auto& step : (*script_json)["steps"]) { + if (step["action"] == "click") { + std::string target = step["target"]; + // Execute click via gRPC + // (Implementation depends on GuiAutomationClient interface) + } else if (step["action"] == "wait") { + int duration_ms = step["duration_ms"]; + std::this_thread::sleep_for(std::chrono::milliseconds(duration_ms)); + } + } + + // Verify tile was placed + // (Would require ROM inspection via gRPC) +} +#endif + +} // namespace test +} // namespace yaze + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + + std::cout << "\n=== AI Tile Placement Tests ===" << std::endl; + std::cout << "Testing AI command parsing and GUI action generation.\n" << std::endl; + + return RUN_ALL_TESTS(); +}