feat: Enhance GUI action generation and testing for AI tile placement

- Introduced `GuiActionGenerator` class to convert high-level AI actions into executable GUI test scripts, supporting JSON output for automated testing.
- Added comprehensive test suite for AI tile placement commands, validating command parsing and generated test scripts.
- Updated `README.md` to reflect recent enhancements and completed features in the project.
- Improved CMake configuration to include new source files for GUI action generation and related services.
This commit is contained in:
scawful
2025-10-04 22:23:31 -04:00
parent 9d5919adb5
commit 13bbe8078a
7 changed files with 481 additions and 8 deletions

View File

@@ -887,11 +887,15 @@ The AI response appears in your chat history and can reference specific details
4. **Collaboration UI Enhancements (1 day)**: Add UI elements for ROM sync, snapshot sharing, and proposal management in the Agent Chat widget.
5. **Windows Cross-Platform Testing (8-10h)**: Validate `z3ed` and the test harness on Windows.
### ✅ Recently Completed (v0.2.0-alpha)
### ✅ Recently Completed (v0.2.0-alpha - October 5, 2025)
- **Enhanced System Prompt (v3)**: Proactive tool chaining with implicit iteration to minimize back-and-forth
- **Learn Command**: Full implementation with preferences, ROM patterns, project context, and conversation memory
- **gRPC Windows Build Optimization**: Documented vcpkg approach and optimization strategies
- **Enhanced System Prompt (v3)**: Proactive tool chaining with implicit iteration to minimize back-and-forth conversations
- **Learn Command**: Full implementation with preferences, ROM patterns, project context, and conversation memory storage
- **Native Gemini Function Calling**: Upgraded from manual curl to native function calling API with automatic tool schema generation
- **Multimodal Vision Testing**: Comprehensive test suite for Gemini vision capabilities with screenshot integration
- **AI-Controlled GUI Automation**: Natural language parsing (`AIActionParser`) and test script generation (`GuiActionGenerator`) for automated tile placement
- **gRPC Windows Build Optimization**: vcpkg integration for 10-20x faster Windows builds, removed abseil-cpp submodule
- **Improved Documentation**: Consolidated architecture, enhancement plans, and build instructions with JSON-first approach
## 12. Troubleshooting

View File

@@ -72,9 +72,11 @@ set(YAZE_AGENT_SOURCES
cli/service/agent/tool_dispatcher.cc
cli/service/agent/learned_knowledge_service.cc
cli/service/ai/ai_service.cc
cli/service/ai/ai_action_parser.cc
cli/service/ai/ollama_ai_service.cc
cli/service/ai/prompt_builder.cc
cli/service/ai/service_factory.cc
cli/service/gui/gui_action_generator.cc
cli/service/planning/policy_evaluator.cc
cli/service/planning/proposal_registry.cc
cli/service/planning/tile16_proposal_generator.cc

View File

@@ -29,6 +29,7 @@
#include "cli/service/ai/gemini_ai_service.h"
#include "cli/service/ai/ollama_ai_service.h"
#include "cli/service/ai/service_factory.h"
#include "cli/service/agent/learned_knowledge_service.h"
#include "cli/service/agent/proposal_executor.h"
#include "cli/service/agent/simple_chat_session.h"
#include "cli/service/planning/proposal_registry.h"
@@ -386,9 +387,7 @@ absl::Status HandleDiffCommand(Rom& rom, const std::vector<std::string>& args) {
}
absl::Status HandleLearnCommand(const std::vector<std::string>& args) {
using namespace yaze::cli::agent;
static LearnedKnowledgeService learn_service;
static yaze::cli::agent::LearnedKnowledgeService learn_service;
static bool initialized = false;
if (!initialized) {
@@ -545,7 +544,7 @@ absl::Status HandleLearnCommand(const std::vector<std::string>& args) {
std::cout << "\n";
}
}
return absl::OkStatus();
return absl::OkStatus();
}
return absl::InvalidArgumentError("Unknown learn command. Use 'z3ed agent learn' for usage.");

View File

@@ -0,0 +1,227 @@
#include "cli/service/gui/gui_action_generator.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
namespace yaze {
namespace cli {
namespace gui {
absl::StatusOr<std::string> GuiActionGenerator::GenerateTestScript(
const std::vector<ai::AIAction>& actions) {
#ifdef YAZE_WITH_JSON
auto json_result = GenerateTestJSON(actions);
if (!json_result.ok()) {
return json_result.status();
}
return json_result->dump(2); // Pretty-print with 2-space indent
#else
return absl::UnimplementedError("JSON support required for test script generation");
#endif
}
#ifdef YAZE_WITH_JSON
absl::StatusOr<nlohmann::json> GuiActionGenerator::GenerateTestJSON(
const std::vector<ai::AIAction>& actions) {
nlohmann::json test_script;
test_script["test_name"] = "ai_generated_test";
test_script["description"] = "Automatically generated from AI actions";
test_script["steps"] = nlohmann::json::array();
for (size_t i = 0; i < actions.size(); ++i) {
nlohmann::json step = ActionToJSON(actions[i]);
step["step_number"] = i + 1;
test_script["steps"].push_back(step);
}
return test_script;
}
nlohmann::json GuiActionGenerator::ActionToJSON(const ai::AIAction& action) {
nlohmann::json step;
switch (action.type) {
case ai::AIActionType::kOpenEditor: {
step["action"] = "click";
auto it = action.parameters.find("editor");
if (it != action.parameters.end()) {
step["target"] = absl::StrCat("button:", it->second, " Editor");
step["wait_after"] = 500; // Wait 500ms for editor to open
}
break;
}
case ai::AIActionType::kSelectTile: {
step["action"] = "click";
auto it = action.parameters.find("tile_id");
if (it != action.parameters.end()) {
int tile_id = std::stoi(it->second);
// Calculate position in tile selector (8 tiles per row, 16x16 pixels each)
int tile_x = (tile_id % 8) * 16 + 8; // Center of tile
int tile_y = (tile_id / 8) * 16 + 8;
step["target"] = "canvas:tile16_selector";
step["position"] = {{"x", tile_x}, {"y", tile_y}};
step["wait_after"] = 200;
}
break;
}
case ai::AIActionType::kPlaceTile: {
step["action"] = "click";
auto x_it = action.parameters.find("x");
auto y_it = action.parameters.find("y");
if (x_it != action.parameters.end() && y_it != action.parameters.end()) {
// Convert map coordinates to screen coordinates
// Assuming 16x16 tile size and some offset for the canvas
int screen_x = std::stoi(x_it->second) * 16 + 8;
int screen_y = std::stoi(y_it->second) * 16 + 8;
step["target"] = "canvas:overworld_map";
step["position"] = {{"x", screen_x}, {"y", screen_y}};
step["wait_after"] = 200;
}
break;
}
case ai::AIActionType::kSaveTile: {
step["action"] = "click";
step["target"] = "button:Save to ROM";
step["wait_after"] = 300;
break;
}
case ai::AIActionType::kClickButton: {
step["action"] = "click";
auto it = action.parameters.find("button");
if (it != action.parameters.end()) {
step["target"] = absl::StrCat("button:", it->second);
step["wait_after"] = 200;
}
break;
}
case ai::AIActionType::kWait: {
step["action"] = "wait";
auto it = action.parameters.find("duration_ms");
int duration = it != action.parameters.end() ? std::stoi(it->second) : 500;
step["duration_ms"] = duration;
break;
}
case ai::AIActionType::kScreenshot: {
step["action"] = "screenshot";
auto it = action.parameters.find("filename");
if (it != action.parameters.end()) {
step["filename"] = it->second;
} else {
step["filename"] = "verification.png";
}
break;
}
case ai::AIActionType::kVerifyTile: {
step["action"] = "verify";
step["target"] = "tile_placement";
// Add verification parameters from action.parameters
for (const auto& [key, value] : action.parameters) {
step[key] = value;
}
break;
}
case ai::AIActionType::kInvalidAction:
step["action"] = "error";
step["message"] = "Invalid action type";
break;
}
return step;
}
#endif
std::string GuiActionGenerator::ActionToTestStep(const ai::AIAction& action,
int step_number) {
switch (action.type) {
case ai::AIActionType::kOpenEditor:
return GenerateOpenEditorStep(action);
case ai::AIActionType::kSelectTile:
return GenerateSelectTileStep(action);
case ai::AIActionType::kPlaceTile:
return GeneratePlaceTileStep(action);
case ai::AIActionType::kSaveTile:
return GenerateSaveTileStep(action);
case ai::AIActionType::kClickButton:
return GenerateClickButtonStep(action);
case ai::AIActionType::kWait:
return GenerateWaitStep(action);
case ai::AIActionType::kScreenshot:
return GenerateScreenshotStep(action);
default:
return absl::StrFormat("# Step %d: Unknown action", step_number);
}
}
std::string GuiActionGenerator::GenerateOpenEditorStep(const ai::AIAction& action) {
auto it = action.parameters.find("editor");
if (it != action.parameters.end()) {
return absl::StrFormat("Click button:'%s Editor'\nWait 500ms", it->second);
}
return "Click button:'Editor'\nWait 500ms";
}
std::string GuiActionGenerator::GenerateSelectTileStep(const ai::AIAction& action) {
auto it = action.parameters.find("tile_id");
if (it != action.parameters.end()) {
int tile_id = std::stoi(it->second);
int tile_x = (tile_id % 8) * 16 + 8;
int tile_y = (tile_id / 8) * 16 + 8;
return absl::StrFormat("Click canvas:'tile16_selector' at (%d, %d)\nWait 200ms",
tile_x, tile_y);
}
return "Click canvas:'tile16_selector'\nWait 200ms";
}
std::string GuiActionGenerator::GeneratePlaceTileStep(const ai::AIAction& action) {
auto x_it = action.parameters.find("x");
auto y_it = action.parameters.find("y");
if (x_it != action.parameters.end() && y_it != action.parameters.end()) {
int screen_x = std::stoi(x_it->second) * 16 + 8;
int screen_y = std::stoi(y_it->second) * 16 + 8;
return absl::StrFormat("Click canvas:'overworld_map' at (%d, %d)\nWait 200ms",
screen_x, screen_y);
}
return "Click canvas:'overworld_map'\nWait 200ms";
}
std::string GuiActionGenerator::GenerateSaveTileStep(const ai::AIAction& action) {
return "Click button:'Save to ROM'\nWait 300ms";
}
std::string GuiActionGenerator::GenerateClickButtonStep(const ai::AIAction& action) {
auto it = action.parameters.find("button");
if (it != action.parameters.end()) {
return absl::StrFormat("Click button:'%s'\nWait 200ms", it->second);
}
return "Click button\nWait 200ms";
}
std::string GuiActionGenerator::GenerateWaitStep(const ai::AIAction& action) {
auto it = action.parameters.find("duration_ms");
int duration = it != action.parameters.end() ? std::stoi(it->second) : 500;
return absl::StrFormat("Wait %dms", duration);
}
std::string GuiActionGenerator::GenerateScreenshotStep(const ai::AIAction& action) {
auto it = action.parameters.find("filename");
if (it != action.parameters.end()) {
return absl::StrFormat("Screenshot '%s'", it->second);
}
return "Screenshot 'verification.png'";
}
} // namespace gui
} // namespace cli
} // namespace yaze

View File

@@ -0,0 +1,71 @@
#ifndef YAZE_CLI_SERVICE_GUI_GUI_ACTION_GENERATOR_H_
#define YAZE_CLI_SERVICE_GUI_GUI_ACTION_GENERATOR_H_
#include <string>
#include <vector>
#include "absl/status/statusor.h"
#include "cli/service/ai/ai_action_parser.h"
#ifdef YAZE_WITH_JSON
#include "nlohmann/json.hpp"
#endif
namespace yaze {
namespace cli {
namespace gui {
/**
* @class GuiActionGenerator
* @brief Converts high-level AI actions into executable GUI test scripts
*
* Takes parsed AI actions and generates gRPC test harness commands or
* JSON test scripts that can be executed to control the GUI.
*/
class GuiActionGenerator {
public:
GuiActionGenerator() = default;
/**
* Generate a test script from a sequence of AI actions
* @param actions Vector of actions to convert
* @return JSON-formatted test script, or error status
*/
absl::StatusOr<std::string> GenerateTestScript(
const std::vector<ai::AIAction>& actions);
#ifdef YAZE_WITH_JSON
/**
* Generate a JSON test object from actions
* @param actions Vector of actions to convert
* @return JSON object with test steps
*/
absl::StatusOr<nlohmann::json> GenerateTestJSON(
const std::vector<ai::AIAction>& actions);
#endif
/**
* Convert a single action to a test step
*/
std::string ActionToTestStep(const ai::AIAction& action, int step_number);
private:
// Helper functions for specific action types
std::string GenerateOpenEditorStep(const ai::AIAction& action);
std::string GenerateSelectTileStep(const ai::AIAction& action);
std::string GeneratePlaceTileStep(const ai::AIAction& action);
std::string GenerateSaveTileStep(const ai::AIAction& action);
std::string GenerateClickButtonStep(const ai::AIAction& action);
std::string GenerateWaitStep(const ai::AIAction& action);
std::string GenerateScreenshotStep(const ai::AIAction& action);
#ifdef YAZE_WITH_JSON
nlohmann::json ActionToJSON(const ai::AIAction& action);
#endif
};
} // namespace gui
} // namespace cli
} // namespace yaze
#endif // YAZE_CLI_SERVICE_GUI_GUI_ACTION_GENERATOR_H_

View File

@@ -136,12 +136,14 @@ if(YAZE_USE_MODULAR_BUILD)
target_link_libraries(
z3ed PRIVATE
yaze_core
yaze_agent
ftxui::component
)
else()
target_link_libraries(
z3ed PRIVATE
yaze_core
yaze_agent
ftxui::component
absl::flags
absl::flags_parse

View File

@@ -0,0 +1,168 @@
#include <filesystem>
#include <fstream>
#include "gtest/gtest.h"
#include "absl/strings/str_cat.h"
#include "cli/service/ai/ai_action_parser.h"
#include "cli/service/gui/gui_action_generator.h"
#ifdef YAZE_WITH_GRPC
#include "cli/service/gui/gui_automation_client.h"
#endif
namespace yaze {
namespace test {
class AITilePlacementTest : public ::testing::Test {
protected:
void SetUp() override {
test_dir_ = std::filesystem::temp_directory_path() / "yaze_ai_tile_test";
std::filesystem::create_directories(test_dir_);
}
void TearDown() override {
if (std::filesystem::exists(test_dir_)) {
std::filesystem::remove_all(test_dir_);
}
}
std::filesystem::path test_dir_;
};
TEST_F(AITilePlacementTest, ParsePlaceTileCommand) {
std::string command = "Place tile 0x42 at position (5, 7)";
auto actions = cli::ai::AIActionParser::ParseCommand(command);
ASSERT_TRUE(actions.ok()) << actions.status().message();
// Should generate: SelectTile, PlaceTile, SaveTile
ASSERT_EQ(actions->size(), 3);
EXPECT_EQ((*actions)[0].type, cli::ai::AIActionType::kSelectTile);
EXPECT_EQ((*actions)[0].parameters.at("tile_id"), "66"); // 0x42 = 66
EXPECT_EQ((*actions)[1].type, cli::ai::AIActionType::kPlaceTile);
EXPECT_EQ((*actions)[1].parameters.at("x"), "5");
EXPECT_EQ((*actions)[1].parameters.at("y"), "7");
EXPECT_EQ((*actions)[2].type, cli::ai::AIActionType::kSaveTile);
}
TEST_F(AITilePlacementTest, GenerateTestScript) {
std::string command = "Place tile 100 at position (10, 15)";
auto actions = cli::ai::AIActionParser::ParseCommand(command);
ASSERT_TRUE(actions.ok());
cli::gui::GuiActionGenerator generator;
auto script = generator.GenerateTestScript(*actions);
ASSERT_TRUE(script.ok()) << script.status().message();
// Verify it's valid JSON
#ifdef YAZE_WITH_JSON
nlohmann::json parsed;
ASSERT_NO_THROW(parsed = nlohmann::json::parse(*script));
ASSERT_TRUE(parsed.contains("steps"));
ASSERT_TRUE(parsed["steps"].is_array());
EXPECT_EQ(parsed["steps"].size(), 3);
// Verify first step is select tile
EXPECT_EQ(parsed["steps"][0]["action"], "click");
EXPECT_EQ(parsed["steps"][0]["target"], "canvas:tile16_selector");
// Verify second step is place tile
EXPECT_EQ(parsed["steps"][1]["action"], "click");
EXPECT_EQ(parsed["steps"][1]["target"], "canvas:overworld_map");
EXPECT_EQ(parsed["steps"][1]["position"]["x"], 168); // 10 * 16 + 8
EXPECT_EQ(parsed["steps"][1]["position"]["y"], 248); // 15 * 16 + 8
// Verify third step is save
EXPECT_EQ(parsed["steps"][2]["action"], "click");
EXPECT_EQ(parsed["steps"][2]["target"], "button:Save to ROM");
#endif
}
TEST_F(AITilePlacementTest, ParseMultipleFormats) {
std::vector<std::string> commands = {
"Place tile 0x10 at (3, 4)",
"Put tile 20 at position 3,4",
"Set tile 30 at x=3 y=4",
"Place tile 40 at overworld 0 position (3, 4)"
};
for (const auto& cmd : commands) {
auto actions = cli::ai::AIActionParser::ParseCommand(cmd);
EXPECT_TRUE(actions.ok()) << "Failed to parse: " << cmd
<< " - " << actions.status().message();
if (actions.ok()) {
EXPECT_GE(actions->size(), 2) << "Command: " << cmd;
}
}
}
TEST_F(AITilePlacementTest, GenerateActionDescription) {
cli::ai::AIAction select_action(cli::ai::AIActionType::kSelectTile);
select_action.parameters["tile_id"] = "42";
std::string desc = cli::ai::AIActionParser::ActionToString(select_action);
EXPECT_EQ(desc, "Select tile 42");
cli::ai::AIAction place_action(cli::ai::AIActionType::kPlaceTile);
place_action.parameters["x"] = "5";
place_action.parameters["y"] = "7";
desc = cli::ai::AIActionParser::ActionToString(place_action);
EXPECT_EQ(desc, "Place tile at position (5, 7)");
}
#ifdef YAZE_WITH_GRPC
// Integration test with actual gRPC test harness
// This test requires YAZE to be running with test harness enabled
TEST_F(AITilePlacementTest, DISABLED_ExecuteViaGRPC) {
// This test is disabled by default as it requires YAZE to be running
// Enable it manually when testing with a running instance
std::string command = "Place tile 50 at position (2, 3)";
// Parse command
auto actions = cli::ai::AIActionParser::ParseCommand(command);
ASSERT_TRUE(actions.ok());
// Generate test script
cli::gui::GuiActionGenerator generator;
auto script_json = generator.GenerateTestJSON(*actions);
ASSERT_TRUE(script_json.ok());
// Connect to test harness
cli::gui::GuiAutomationClient client("localhost:50051");
// Execute each step
for (const auto& step : (*script_json)["steps"]) {
if (step["action"] == "click") {
std::string target = step["target"];
// Execute click via gRPC
// (Implementation depends on GuiAutomationClient interface)
} else if (step["action"] == "wait") {
int duration_ms = step["duration_ms"];
std::this_thread::sleep_for(std::chrono::milliseconds(duration_ms));
}
}
// Verify tile was placed
// (Would require ROM inspection via gRPC)
}
#endif
} // namespace test
} // namespace yaze
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
std::cout << "\n=== AI Tile Placement Tests ===" << std::endl;
std::cout << "Testing AI command parsing and GUI action generation.\n" << std::endl;
return RUN_ALL_TESTS();
}