feat: Implement widget discovery feature in GUI automation

- Added `DiscoverWidgets` RPC to the ImGuiTestHarness service for enumerating GUI widgets.
- Introduced `WidgetDiscoveryService` to handle widget collection and filtering based on various criteria.
- Updated `agent gui discover` command to support new options for filtering and output formats.
- Enhanced `GuiAutomationClient` to facilitate widget discovery requests and responses.
- Added necessary protobuf messages for widget discovery in `imgui_test_harness.proto`.
- Updated CLI command handling to include new GUI discovery functionality.
- Improved documentation for the `agent gui discover` command with examples and output formats.
This commit is contained in:
scawful
2025-10-02 16:56:15 -04:00
parent 3944861b38
commit 22f0e5006b
11 changed files with 947 additions and 31 deletions

View File

@@ -25,6 +25,8 @@
#include <thread>
#include <algorithm>
#include <limits>
#include <utility>
// Declare the rom flag so we can access it
ABSL_DECLARE_FLAG(std::string, rom);
@@ -105,6 +107,45 @@ std::optional<TestRunStatus> ParseStatusFilter(absl::string_view value) {
return std::nullopt;
}
std::optional<WidgetTypeFilter> ParseWidgetTypeFilter(
absl::string_view value) {
std::string lower = std::string(absl::AsciiStrToLower(value));
if (lower.empty() || lower == "unspecified" || lower == "any") {
return WidgetTypeFilter::kUnspecified;
}
if (lower == "all") {
return WidgetTypeFilter::kAll;
}
if (lower == "button" || lower == "buttons") {
return WidgetTypeFilter::kButton;
}
if (lower == "input" || lower == "textbox" || lower == "field") {
return WidgetTypeFilter::kInput;
}
if (lower == "menu" || lower == "menuitem" || lower == "menu-item") {
return WidgetTypeFilter::kMenu;
}
if (lower == "tab" || lower == "tabs") {
return WidgetTypeFilter::kTab;
}
if (lower == "checkbox" || lower == "toggle") {
return WidgetTypeFilter::kCheckbox;
}
if (lower == "slider" || lower == "drag" || lower == "sliderfloat") {
return WidgetTypeFilter::kSlider;
}
if (lower == "canvas" || lower == "viewport") {
return WidgetTypeFilter::kCanvas;
}
if (lower == "selectable" || lower == "list-item") {
return WidgetTypeFilter::kSelectable;
}
if (lower == "other") {
return WidgetTypeFilter::kOther;
}
return std::nullopt;
}
std::string HarnessAddress(const std::string& host, int port) {
return absl::StrFormat("%s:%d", host, port);
}
@@ -1051,6 +1092,327 @@ absl::Status HandleTestCommand(const std::vector<std::string>& arg_vec) {
return HandleTestRunCommand(arg_vec);
}
absl::Status HandleGuiDiscoverCommand(const std::vector<std::string>& arg_vec) {
std::string host = "localhost";
int port = 50052;
std::string window_filter;
std::string path_prefix;
std::optional<WidgetTypeFilter> type_filter;
std::optional<std::string> type_filter_label;
bool include_invisible = false;
bool include_disabled = false;
std::string format = "table";
int limit = -1;
auto require_value = [&](const std::vector<std::string>& args, size_t& index,
absl::string_view flag) -> absl::StatusOr<std::string> {
if (index + 1 >= args.size()) {
return absl::InvalidArgumentError(
absl::StrFormat("Flag %s requires a value", flag));
}
return args[++index];
};
for (size_t i = 0; i < arg_vec.size(); ++i) {
const std::string& token = arg_vec[i];
if (token == "--host") {
ASSIGN_OR_RETURN(auto value, require_value(arg_vec, i, "--host"));
host = std::move(value);
} else if (absl::StartsWith(token, "--host=")) {
host = token.substr(7);
} else if (token == "--port") {
ASSIGN_OR_RETURN(auto value, require_value(arg_vec, i, "--port"));
port = std::stoi(value);
} else if (absl::StartsWith(token, "--port=")) {
port = std::stoi(token.substr(7));
} else if (token == "--window" || token == "--window-filter") {
ASSIGN_OR_RETURN(auto value,
require_value(arg_vec, i, token.c_str()));
window_filter = std::move(value);
} else if (absl::StartsWith(token, "--window=")) {
window_filter = token.substr(9);
} else if (token == "--path-prefix") {
ASSIGN_OR_RETURN(auto value,
require_value(arg_vec, i, "--path-prefix"));
path_prefix = std::move(value);
} else if (absl::StartsWith(token, "--path-prefix=")) {
path_prefix = token.substr(14);
} else if (token == "--type") {
ASSIGN_OR_RETURN(auto value,
require_value(arg_vec, i, "--type"));
auto parsed = ParseWidgetTypeFilter(value);
if (!parsed.has_value()) {
return absl::InvalidArgumentError(
absl::StrFormat("Unknown widget type filter: %s", value));
}
type_filter = parsed;
type_filter_label = absl::AsciiStrToLower(value);
} else if (absl::StartsWith(token, "--type=")) {
std::string value = token.substr(7);
auto parsed = ParseWidgetTypeFilter(value);
if (!parsed.has_value()) {
return absl::InvalidArgumentError(
absl::StrFormat("Unknown widget type filter: %s", value));
}
type_filter = parsed;
type_filter_label = absl::AsciiStrToLower(value);
} else if (token == "--include-invisible") {
include_invisible = true;
} else if (token == "--include-disabled") {
include_disabled = true;
} else if (token == "--format") {
ASSIGN_OR_RETURN(auto value,
require_value(arg_vec, i, "--format"));
format = std::move(value);
} else if (absl::StartsWith(token, "--format=")) {
format = token.substr(9);
} else if (token == "--limit") {
ASSIGN_OR_RETURN(auto value,
require_value(arg_vec, i, "--limit"));
limit = std::stoi(value);
} else if (absl::StartsWith(token, "--limit=")) {
limit = std::stoi(token.substr(8));
} else if (token == "--help" || token == "-h") {
std::cout << "Usage: agent gui discover [options]\n"
<< " --host <host>\n"
<< " --port <port>\n"
<< " --window <name>\n"
<< " --type <widget-type>\n"
<< " --path-prefix <path>\n"
<< " --include-invisible\n"
<< " --include-disabled\n"
<< " --format <table|json>\n"
<< " --limit <n>\n";
return absl::OkStatus();
} else {
return absl::InvalidArgumentError(
absl::StrFormat("Unknown flag for agent gui discover: %s",
token));
}
}
format = absl::AsciiStrToLower(format);
if (format != "table" && format != "json") {
return absl::InvalidArgumentError(
"--format must be either 'table' or 'json'");
}
if (limit == 0) {
return absl::InvalidArgumentError("--limit must be positive");
}
#ifndef YAZE_WITH_GRPC
(void)host;
(void)port;
(void)window_filter;
(void)path_prefix;
(void)type_filter;
(void)include_invisible;
(void)include_disabled;
(void)format;
(void)limit;
return absl::UnimplementedError(
"GUI automation requires YAZE_WITH_GRPC=ON at build time.\n"
"Rebuild with: cmake -B build -DYAZE_WITH_GRPC=ON");
#else
GuiAutomationClient client(HarnessAddress(host, port));
RETURN_IF_ERROR(client.Connect());
DiscoverWidgetsQuery query;
query.window_filter = window_filter;
query.path_prefix = path_prefix;
if (type_filter.has_value()) {
query.type_filter = type_filter.value();
}
query.include_invisible = include_invisible;
query.include_disabled = include_disabled;
ASSIGN_OR_RETURN(auto response, client.DiscoverWidgets(query));
int max_items = limit > 0 ? limit : std::numeric_limits<int>::max();
int remaining = max_items;
std::vector<DiscoveredWindowInfo> trimmed_windows;
trimmed_windows.reserve(response.windows.size());
int rendered_widgets = 0;
for (const auto& window : response.windows) {
if (remaining <= 0) {
break;
}
DiscoveredWindowInfo trimmed;
trimmed.name = window.name;
trimmed.visible = window.visible;
for (const auto& widget : window.widgets) {
if (remaining <= 0) {
break;
}
trimmed.widgets.push_back(widget);
--remaining;
++rendered_widgets;
}
if (!trimmed.widgets.empty()) {
trimmed_windows.push_back(std::move(trimmed));
}
}
bool truncated = rendered_widgets < response.total_widgets;
if (format == "json") {
std::cout << "{\n";
std::cout << " \"server\": \""
<< JsonEscape(HarnessAddress(host, port)) << "\",\n";
std::cout << " \"totalWidgets\": " << response.total_widgets << ",\n";
std::cout << " \"returnedWidgets\": " << rendered_widgets << ",\n";
std::cout << " \"truncated\": " << (truncated ? "true" : "false")
<< ",\n";
std::cout << " \"generatedAt\": "
<< (response.generated_at.has_value()
? absl::StrCat("\"",
JsonEscape(absl::FormatTime(
"%Y-%m-%dT%H:%M:%SZ",
*response.generated_at,
absl::UTCTimeZone())),
"\"")
: std::string("null"))
<< ",\n";
std::cout << " \"windows\": [\n";
for (size_t w = 0; w < trimmed_windows.size(); ++w) {
const auto& window = trimmed_windows[w];
std::cout << " {\n";
std::cout << " \"name\": \"" << JsonEscape(window.name)
<< "\",\n";
std::cout << " \"visible\": "
<< (window.visible ? "true" : "false") << ",\n";
std::cout << " \"widgets\": [\n";
for (size_t i = 0; i < window.widgets.size(); ++i) {
const auto& widget = window.widgets[i];
std::cout << " {\n";
std::cout << " \"path\": \""
<< JsonEscape(widget.path) << "\",\n";
std::cout << " \"label\": \""
<< JsonEscape(widget.label) << "\",\n";
std::cout << " \"type\": \""
<< JsonEscape(widget.type) << "\",\n";
std::cout << " \"description\": \""
<< JsonEscape(widget.description) << "\",\n";
std::cout << " \"suggestedAction\": \""
<< JsonEscape(widget.suggested_action) << "\",\n";
std::cout << " \"visible\": "
<< (widget.visible ? "true" : "false") << ",\n";
std::cout << " \"enabled\": "
<< (widget.enabled ? "true" : "false") << ",\n";
std::cout << " \"bounds\": { \"min\": ["
<< widget.bounds.min_x << ", "
<< widget.bounds.min_y << "], \"max\": ["
<< widget.bounds.max_x << ", "
<< widget.bounds.max_y << "] },\n";
std::cout << " \"widgetId\": " << widget.widget_id
<< "\n";
std::cout << " }";
if (i + 1 < window.widgets.size()) {
std::cout << ",";
}
std::cout << "\n";
}
std::cout << " ]\n";
std::cout << " }";
if (w + 1 < trimmed_windows.size()) {
std::cout << ",";
}
std::cout << "\n";
}
std::cout << " ]\n";
std::cout << "}\n";
return absl::OkStatus();
}
std::cout << "\n=== Widget Discovery ===\n";
std::cout << "Server: " << HarnessAddress(host, port) << "\n";
if (!window_filter.empty()) {
std::cout << "Window filter: " << window_filter << "\n";
}
if (!path_prefix.empty()) {
std::cout << "Path prefix: " << path_prefix << "\n";
}
if (type_filter_label.has_value()) {
std::cout << "Type filter: " << *type_filter_label << "\n";
}
std::cout << "Include invisible: " << (include_invisible ? "yes" : "no")
<< "\n";
std::cout << "Include disabled: " << (include_disabled ? "yes" : "no")
<< "\n\n";
if (trimmed_windows.empty()) {
std::cout << "No widgets matched the provided filters." << std::endl;
return absl::OkStatus();
}
for (const auto& window : trimmed_windows) {
std::cout << "Window: " << window.name
<< (window.visible ? " (visible)" : " (hidden)")
<< "\n";
for (const auto& widget : window.widgets) {
std::cout << " • [" << widget.type << "] " << widget.label
<< "\n";
std::cout << " Path: " << widget.path << "\n";
if (!widget.description.empty()) {
std::cout << " Description: " << widget.description
<< "\n";
}
std::cout << " Suggested: " << widget.suggested_action << "\n";
std::cout << " State: "
<< (widget.visible ? "visible" : "hidden") << ", "
<< (widget.enabled ? "enabled" : "disabled") << "\n";
std::cout << absl::StrFormat(
" Bounds: (%.1f, %.1f) → (%.1f, %.1f)\n",
widget.bounds.min_x, widget.bounds.min_y,
widget.bounds.max_x, widget.bounds.max_y);
std::cout << " Widget ID: 0x" << std::hex << widget.widget_id
<< std::dec << "\n";
}
std::cout << "\n";
}
std::cout << "Widgets shown: " << rendered_widgets << " of "
<< response.total_widgets;
if (truncated) {
std::cout << " (truncated)";
}
std::cout << "\n";
if (response.generated_at.has_value()) {
std::cout << "Snapshot: "
<< absl::FormatTime("%Y-%m-%d %H:%M:%S",
*response.generated_at,
absl::LocalTimeZone())
<< "\n";
}
return absl::OkStatus();
#endif
}
absl::Status HandleGuiCommand(const std::vector<std::string>& arg_vec) {
if (arg_vec.empty()) {
return absl::InvalidArgumentError(
"Usage: agent gui <discover> [options]");
}
const std::string& subcommand = arg_vec[0];
std::vector<std::string> tail(arg_vec.begin() + 1, arg_vec.end());
if (subcommand == "discover") {
return HandleGuiDiscoverCommand(tail);
}
return absl::InvalidArgumentError(
absl::StrFormat("Unknown agent gui subcommand: %s", subcommand));
}
absl::Status HandleLearnCommand() {
std::cout << "Agent learn not yet implemented." << std::endl;
return absl::OkStatus();
@@ -1184,7 +1546,7 @@ absl::Status HandleDescribeCommand(const std::vector<std::string>& arg_vec) {
absl::Status Agent::Run(const std::vector<std::string>& arg_vec) {
if (arg_vec.empty()) {
return absl::InvalidArgumentError(
"Usage: agent <run|plan|diff|test|learn|list|commit|revert|describe> [options]");
"Usage: agent <run|plan|diff|test|gui|learn|list|commit|revert|describe> [options]");
}
std::string subcommand = arg_vec[0];
@@ -1198,6 +1560,8 @@ absl::Status Agent::Run(const std::vector<std::string>& arg_vec) {
return HandleDiffCommand(rom_, subcommand_args);
} else if (subcommand == "test") {
return HandleTestCommand(subcommand_args);
} else if (subcommand == "gui") {
return HandleGuiCommand(subcommand_args);
} else if (subcommand == "learn") {
return HandleLearnCommand();
} else if (subcommand == "list") {