feat: Enhance Agent Chat Widget with SDL Image Loading and Screenshot Capture

- Integrated SDL for image loading in the Agent Chat Widget, allowing for dynamic screenshot previews.
- Updated the screenshot capture functionality to save selected regions and display previews, improving user interaction.
- Refactored theme color usage to utilize the new text_secondary_color for consistent styling across the widget.
- Added error handling for image loading and rendering processes, enhancing robustness and user feedback.
This commit is contained in:
scawful
2025-10-06 01:20:14 -04:00
parent 8d2f8e478e
commit 7ba8d5b443
4 changed files with 244 additions and 51 deletions

View File

@@ -27,9 +27,10 @@
#include "app/gui/icons.h"
#include "app/rom.h"
#include "imgui/imgui.h"
#include "imgui/misc/cpp/imgui_stdlib.h"
#include "util/file_util.h"
#include <SDL.h>
#if defined(YAZE_WITH_GRPC)
#include "app/test/test_manager.h"
#endif
@@ -1561,7 +1562,7 @@ void AgentChatWidget::RenderAutomationPanel() {
action_color = theme.status_error;
status_icon = ICON_MD_ERROR;
} else {
action_color = theme.text_secondary;
action_color = theme.text_secondary_color;
status_icon = ICON_MD_HELP;
}
@@ -2741,8 +2742,9 @@ void AgentChatWidget::UpdateHarnessTelemetry(
}
}
void AgentChatWidget::SetLastPlanSummary(const std::string& summary) {
void AgentChatWidget::SetLastPlanSummary(const std::string& /* summary */) {
// Store the plan summary for display in the automation panel
// TODO: Implement plan summary storage and display
// This could be shown in the harness panel or logged
if (toast_manager_) {
toast_manager_->Show("Plan summary received", ToastType::kInfo, 2.0f);
@@ -2763,23 +2765,70 @@ void AgentChatWidget::SyncHistoryToPopup() {
// Screenshot Preview Implementation
void AgentChatWidget::LoadScreenshotPreview(const std::filesystem::path& image_path) {
// For now, store the path and mark as loaded
// Actual texture loading would need to use SDL_image or stb_image
// and then upload to GPU via ImGui backend
// Unload any existing preview first
UnloadScreenshotPreview();
// Load the image using SDL
SDL_Surface* surface = SDL_LoadBMP(image_path.string().c_str());
if (!surface) {
if (toast_manager_) {
toast_manager_->Show(absl::StrFormat("Failed to load image: %s", SDL_GetError()),
ToastType::kError, 3.0f);
}
return;
}
// Get the renderer from ImGui backend
ImGuiIO& io = ImGui::GetIO();
auto* backend_data = static_cast<void**>(io.BackendRendererUserData);
SDL_Renderer* renderer = nullptr;
if (backend_data) {
// Assuming SDL renderer backend
// The backend data structure has renderer as first member
renderer = *reinterpret_cast<SDL_Renderer**>(backend_data);
}
if (!renderer) {
SDL_FreeSurface(surface);
if (toast_manager_) {
toast_manager_->Show("Failed to get SDL renderer", ToastType::kError, 3.0f);
}
return;
}
// Create texture from surface
SDL_Texture* texture = SDL_CreateTextureFromSurface(renderer, surface);
if (!texture) {
SDL_FreeSurface(surface);
if (toast_manager_) {
toast_manager_->Show(absl::StrFormat("Failed to create texture: %s", SDL_GetError()),
ToastType::kError, 3.0f);
}
return;
}
// Store texture info
multimodal_state_.preview.texture_id = reinterpret_cast<void*>(texture);
multimodal_state_.preview.width = surface->w;
multimodal_state_.preview.height = surface->h;
multimodal_state_.preview.loaded = true;
multimodal_state_.preview.show_preview = true;
// TODO: Implement actual texture loading using SDL_image or stb_image
// For now, just track that we have a valid image path
SDL_FreeSurface(surface);
if (toast_manager_) {
toast_manager_->Show("Screenshot preview loaded", ToastType::kInfo, 2.0f);
toast_manager_->Show(absl::StrFormat("Screenshot preview loaded (%dx%d)",
surface->w, surface->h),
ToastType::kSuccess, 2.0f);
}
}
void AgentChatWidget::UnloadScreenshotPreview() {
if (multimodal_state_.preview.texture_id != nullptr) {
// TODO: Free the texture from GPU
// This requires backend-specific cleanup
// Destroy the SDL texture
SDL_Texture* texture = reinterpret_cast<SDL_Texture*>(multimodal_state_.preview.texture_id);
SDL_DestroyTexture(texture);
multimodal_state_.preview.texture_id = nullptr;
}
multimodal_state_.preview.loaded = false;
@@ -2797,7 +2846,7 @@ void AgentChatWidget::RenderScreenshotPreview() {
// Display filename
std::string filename = multimodal_state_.last_capture_path->filename().string();
ImGui::TextColored(theme.text_secondary, "%s", filename.c_str());
ImGui::TextColored(theme.text_secondary_color, "%s", filename.c_str());
// Preview controls
if (ImGui::SmallButton(ICON_MD_CLOSE " Hide")) {
@@ -2821,7 +2870,7 @@ void AgentChatWidget::RenderScreenshotPreview() {
// Placeholder when texture not loaded
ImGui::BeginChild("PreviewPlaceholder", ImVec2(200, 150), true);
ImGui::SetCursorPos(ImVec2(60, 60));
ImGui::TextColored(theme.text_secondary, ICON_MD_IMAGE);
ImGui::TextColored(theme.text_secondary_color, ICON_MD_IMAGE);
ImGui::SetCursorPosX(40);
ImGui::TextWrapped("Preview placeholder");
ImGui::TextDisabled("(Texture loading not yet implemented)");
@@ -2952,21 +3001,104 @@ void AgentChatWidget::CaptureSelectedRegion() {
return;
}
// TODO: Implement actual region capture
// This would involve:
// 1. Capturing the full screenshot
// 2. Cropping to the selected region
// 3. Saving the cropped image
// Get the renderer from ImGui backend
ImGuiIO& io = ImGui::GetIO();
auto* backend_data = static_cast<void**>(io.BackendRendererUserData);
SDL_Renderer* renderer = nullptr;
if (backend_data) {
renderer = *reinterpret_cast<SDL_Renderer**>(backend_data);
}
if (!renderer) {
if (toast_manager_) {
toast_manager_->Show("Failed to get SDL renderer", ToastType::kError, 3.0f);
}
return;
}
// Get renderer size
int full_width = 0;
int full_height = 0;
if (SDL_GetRendererOutputSize(renderer, &full_width, &full_height) != 0) {
if (toast_manager_) {
toast_manager_->Show(absl::StrFormat("Failed to get renderer size: %s", SDL_GetError()),
ToastType::kError, 3.0f);
}
return;
}
// Clamp region to renderer bounds
int capture_x = std::max(0, static_cast<int>(min.x));
int capture_y = std::max(0, static_cast<int>(min.y));
int capture_width = std::min(static_cast<int>(width), full_width - capture_x);
int capture_height = std::min(static_cast<int>(height), full_height - capture_y);
if (capture_width <= 0 || capture_height <= 0) {
if (toast_manager_) {
toast_manager_->Show("Invalid capture region", ToastType::kError);
}
return;
}
// Create surface for the capture region
SDL_Surface* surface = SDL_CreateRGBSurface(0, capture_width, capture_height,
32, 0x00FF0000, 0x0000FF00,
0x000000FF, 0xFF000000);
if (!surface) {
if (toast_manager_) {
toast_manager_->Show(absl::StrFormat("Failed to create surface: %s", SDL_GetError()),
ToastType::kError, 3.0f);
}
return;
}
// Read pixels from the selected region
SDL_Rect region_rect = {capture_x, capture_y, capture_width, capture_height};
if (SDL_RenderReadPixels(renderer, &region_rect, SDL_PIXELFORMAT_ARGB8888,
surface->pixels, surface->pitch) != 0) {
SDL_FreeSurface(surface);
if (toast_manager_) {
toast_manager_->Show(absl::StrFormat("Failed to read pixels: %s", SDL_GetError()),
ToastType::kError, 3.0f);
}
return;
}
// Generate output path
std::filesystem::path screenshot_dir = std::filesystem::temp_directory_path() / "yaze" / "screenshots";
std::error_code ec;
std::filesystem::create_directories(screenshot_dir, ec);
const int64_t timestamp_ms = absl::ToUnixMillis(absl::Now());
std::filesystem::path output_path = screenshot_dir /
std::filesystem::path(absl::StrFormat("region_%lld.bmp", static_cast<long long>(timestamp_ms)));
// Save the cropped image
if (SDL_SaveBMP(surface, output_path.string().c_str()) != 0) {
SDL_FreeSurface(surface);
if (toast_manager_) {
toast_manager_->Show(absl::StrFormat("Failed to save screenshot: %s", SDL_GetError()),
ToastType::kError, 3.0f);
}
return;
}
SDL_FreeSurface(surface);
// Store the capture path and load preview
multimodal_state_.last_capture_path = output_path;
LoadScreenshotPreview(output_path);
if (toast_manager_) {
toast_manager_->Show(
absl::StrFormat("Region captured: %.0fx%.0f", width, height),
absl::StrFormat("Region captured: %dx%d", capture_width, capture_height),
ToastType::kSuccess, 3.0f
);
}
// For now, just call the regular capture callback
if (multimodal_callbacks_.capture_snapshot) {
// Call the Gemini callback if available
if (multimodal_callbacks_.send_to_gemini) {
std::filesystem::path captured_path;
auto status = multimodal_callbacks_.capture_snapshot(&captured_path);
if (status.ok()) {

View File

@@ -41,6 +41,8 @@ AgentUITheme AgentUITheme::FromCurrentTheme() {
theme.json_text_color = ImVec4(0.78f, 0.83f, 0.90f, 1.0f);
theme.command_text_color = ImVec4(1.0f, 0.647f, 0.0f, 1.0f);
theme.code_bg_color = ImVec4(0.08f, 0.08f, 0.10f, 0.95f);
theme.text_secondary_color = ConvertColorToImVec4(current.text_secondary);
// UI element colors
theme.panel_bg_color = ImVec4(0.12f, 0.14f, 0.18f, 0.95f);

View File

@@ -20,6 +20,8 @@ struct AgentUITheme {
ImVec4 user_message_color;
ImVec4 agent_message_color;
ImVec4 system_message_color;
ImVec4 text_secondary_color;
// Content colors
ImVec4 json_text_color;

View File

@@ -1,19 +1,20 @@
#include "cli/handlers/agent/commands.h"
#include <iostream>
#include <set>
#include <algorithm>
#include "absl/status/status.h"
#include "absl/strings/str_format.h"
#include "absl/strings/match.h"
#include "absl/strings/numbers.h"
#include "app/rom.h"
#include "app/editor/message/message_data.h"
namespace yaze {
namespace cli {
namespace agent {
absl:Status HandleDialogueListCommand(
absl::Status HandleDialogueListCommand(
const std::vector<std::string>& arg_vec, Rom* rom_context) {
if (!rom_context || !rom_context->is_loaded()) {
return absl::FailedPreconditionError("ROM not loaded");
@@ -40,43 +41,60 @@ absl:Status HandleDialogueListCommand(
}
}
// Get all dialogue IDs from ROM
// This is a simplified implementation - real one would parse dialogue data
std::vector<int> dialogue_ids;
// Read all dialogue messages from ROM using ReadAllTextData
constexpr int kTextData1 = 0xE0000; // Bank $0E in ALTTP
auto messages = editor::ReadAllTextData(rom_context->mutable_data(), kTextData1);
// Limit the results
int actual_limit = std::min(limit, static_cast<int>(messages.size()));
// ALTTP has dialogue messages from 0x00 to ~0x1FF
for (int i = 0; i < std::min(limit, 512); ++i) {
dialogue_ids.push_back(i);
}
if (format == "json") {
std::cout << "{\n";
std::cout << " \"dialogue_messages\": [\n";
for (size_t i = 0; i < dialogue_ids.size(); ++i) {
int id = dialogue_ids[i];
for (int i = 0; i < actual_limit; ++i) {
const auto& msg = messages[i];
// Create a preview (first 50 chars)
std::string preview = msg.ContentsParsed;
if (preview.length() > 50) {
preview = preview.substr(0, 47) + "...";
}
// Replace newlines with spaces for preview
for (char& c : preview) {
if (c == '\n') c = ' ';
}
std::cout << " {\n";
std::cout << " \"id\": \"0x" << std::hex << std::uppercase << id << std::dec << "\",\n";
std::cout << " \"decimal_id\": " << id << ",\n";
std::cout << " \"preview\": \"Message " << id << "...\"\n";
std::cout << " \"id\": \"0x" << std::hex << std::uppercase << msg.ID << std::dec << "\",\n";
std::cout << " \"decimal_id\": " << msg.ID << ",\n";
std::cout << " \"preview\": \"" << preview << "\"\n";
std::cout << " }";
if (i < dialogue_ids.size() - 1) {
if (i < actual_limit - 1) {
std::cout << ",";
}
std::cout << "\n";
}
std::cout << " ],\n";
std::cout << " \"total\": " << dialogue_ids.size() << ",\n";
std::cout << " \"total\": " << messages.size() << ",\n";
std::cout << " \"showing\": " << actual_limit << ",\n";
std::cout << " \"rom\": \"" << rom_context->filename() << "\"\n";
std::cout << "}\n";
} else {
// Table format
std::cout << "Dialogue Messages (showing " << dialogue_ids.size() << "):\n";
std::cout << "Dialogue Messages (showing " << actual_limit << " of " << messages.size() << "):\n";
std::cout << "----------------------------------------\n";
for (int id : dialogue_ids) {
std::cout << absl::StrFormat("0x%03X (%3d) | Message %d\n", id, id, id);
for (int i = 0; i < actual_limit; ++i) {
const auto& msg = messages[i];
std::string preview = msg.ContentsParsed;
if (preview.length() > 40) {
preview = preview.substr(0, 37) + "...";
}
for (char& c : preview) {
if (c == '\n') c = ' ';
}
std::cout << absl::StrFormat("0x%03X (%3d) | %s\n", msg.ID, msg.ID, preview);
}
std::cout << "----------------------------------------\n";
std::cout << "Total: " << dialogue_ids.size() << " messages\n";
std::cout << "Total: " << messages.size() << " messages\n";
}
return absl::OkStatus();
@@ -96,7 +114,7 @@ absl::Status HandleDialogueReadCommand(
const std::string& token = arg_vec[i];
if (token == "--id" || token == "--message") {
if (i + 1 < arg_vec.size()) {
std::string id_str = arg_vec[++i];
const std::string& id_str = arg_vec[++i];
if (absl::StartsWith(id_str, "0x") || absl::StartsWith(id_str, "0X")) {
message_id = std::stoi(id_str, nullptr, 16);
} else {
@@ -124,10 +142,25 @@ absl::Status HandleDialogueReadCommand(
"Usage: dialogue-read --id <message_id> [--format json|text]");
}
// Simplified dialogue text - real implementation would decode from ROM
std::string dialogue_text = absl::StrFormat(
"This is dialogue message %d. Real implementation would decode from ROM data.",
message_id);
// Read all dialogue messages from ROM
constexpr int kTextData1 = 0xE0000;
auto messages = editor::ReadAllTextData(rom_context->mutable_data(), kTextData1);
// Find the specific message
std::string dialogue_text;
bool found = false;
for (const auto& msg : messages) {
if (msg.ID == message_id) {
dialogue_text = msg.ContentsParsed;
found = true;
break;
}
}
if (!found) {
return absl::NotFoundError(
absl::StrFormat("Message ID 0x%X not found in ROM", message_id));
}
if (format == "json") {
std::cout << "{\n";
@@ -188,11 +221,35 @@ absl::Status HandleDialogueSearchCommand(
"Usage: dialogue-search --query <search_text> [--format json|text] [--limit N]");
}
// Simplified search - real implementation would search actual dialogue data
// Read all dialogue messages from ROM and search
constexpr int kTextData1 = 0xE0000;
auto messages = editor::ReadAllTextData(rom_context->mutable_data(), kTextData1);
// Search for messages containing the query string (case-insensitive)
std::vector<std::pair<int, std::string>> results;
results.push_back({0x01, absl::StrFormat("Message 1 containing '%s'", query)});
results.push_back({0x15, absl::StrFormat("Another message with '%s'", query)});
results.push_back({0x42, absl::StrFormat("Found '%s' in message 66", query)});
std::string query_lower = query;
std::transform(query_lower.begin(), query_lower.end(), query_lower.begin(), ::tolower);
for (const auto& msg : messages) {
std::string msg_lower = msg.ContentsParsed;
std::transform(msg_lower.begin(), msg_lower.end(), msg_lower.begin(), ::tolower);
if (msg_lower.find(query_lower) != std::string::npos) {
// Create preview with matched text
std::string preview = msg.ContentsParsed;
if (preview.length() > 60) {
preview = preview.substr(0, 57) + "...";
}
for (char& c : preview) {
if (c == '\n') c = ' ';
}
results.push_back({msg.ID, preview});
if (results.size() >= static_cast<size_t>(limit)) {
break;
}
}
}
if (format == "json") {
std::cout << "{\n";