feat(z3ed): Complete Phase 2 - Gemini AI service enhancement

Phase 2 Implementation Summary:
- Enhanced GeminiAIService with production-ready features
- Added GeminiConfig struct for flexible configuration
- Implemented health check system with graceful degradation
- Updated to Gemini v1beta API format
- Added robust JSON parsing with markdown stripping fallbacks
- Switched default model to gemini-1.5-flash (faster, cheaper)
- Enhanced error messages with actionable guidance
- Integrated into service factory with health checks
- Added comprehensive test infrastructure

Files Modified:
- src/cli/service/gemini_ai_service.h (added config struct)
- src/cli/service/gemini_ai_service.cc (rewritten for v1beta)
- src/cli/handlers/agent/general_commands.cc (factory update)
- docs/z3ed/LLM-IMPLEMENTATION-CHECKLIST.md (progress tracking)

Files Created:
- scripts/test_gemini_integration.sh (test suite)
- docs/z3ed/PHASE2-COMPLETE.md (implementation summary)
- docs/z3ed/LLM-PROGRESS-UPDATE.md (overall progress)

Build Status:  SUCCESS (macOS ARM64)
Test Status:  Graceful fallback validated
Pending: Real API key validation

See docs/z3ed/PHASE2-COMPLETE.md for details.
This commit is contained in:
scawful
2025-10-03 01:16:39 -04:00
parent 6cec21f7aa
commit d875b45fcd
7 changed files with 1188 additions and 92 deletions

View File

@@ -43,6 +43,7 @@ std::unique_ptr<AIService> CreateAIService() {
const char* provider_env = std::getenv("YAZE_AI_PROVIDER");
const char* gemini_key = std::getenv("GEMINI_API_KEY");
const char* ollama_model = std::getenv("OLLAMA_MODEL");
const char* gemini_model = std::getenv("GEMINI_MODEL");
// Explicit provider selection
if (provider_env && std::string(provider_env) == "ollama") {
@@ -68,8 +69,24 @@ std::unique_ptr<AIService> CreateAIService() {
// Gemini if API key provided
if (gemini_key && std::strlen(gemini_key) > 0) {
std::cout << "🤖 Using Gemini AI (remote)" << std::endl;
return std::make_unique<GeminiAIService>(gemini_key);
GeminiConfig config(gemini_key);
// Allow model override via env
if (gemini_model && std::strlen(gemini_model) > 0) {
config.model = gemini_model;
}
auto service = std::make_unique<GeminiAIService>(config);
// Health check
if (auto status = service->CheckAvailability(); !status.ok()) {
std::cerr << "⚠️ Gemini unavailable: " << status.message() << std::endl;
std::cerr << " Falling back to MockAIService" << std::endl;
return std::make_unique<MockAIService>();
}
std::cout << "🤖 Using Gemini AI with model: " << config.model << std::endl;
return service;
}
// Default: Mock service for testing

View File

@@ -1,8 +1,13 @@
#include "cli/service/gemini_ai_service.h"
#include <cstdlib>
#include <iostream>
#include <string>
#include <vector>
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
#include "absl/strings/strip.h"
#ifdef YAZE_WITH_JSON
#include "incl/httplib.h"
@@ -12,7 +17,83 @@
namespace yaze {
namespace cli {
GeminiAIService::GeminiAIService(const std::string& api_key) : api_key_(api_key) {}
GeminiAIService::GeminiAIService(const GeminiConfig& config)
: config_(config) {
if (config_.system_instruction.empty()) {
config_.system_instruction = BuildSystemInstruction();
}
}
std::string GeminiAIService::BuildSystemInstruction() {
return R"(You are an expert ROM hacking assistant for The Legend of Zelda: A Link to the Past.
Your task is to generate a sequence of z3ed CLI commands to achieve the user's request.
CRITICAL: Respond ONLY with a JSON array of strings. Each string must be a complete z3ed command.
Available z3ed commands:
- palette export --group <group> --id <id> --to <file>
- palette import --group <group> --id <id> --from <file>
- palette set-color --file <file> --index <index> --color <hex_color>
- overworld set-tile --map <map_id> --x <x> --y <y> --tile <tile_id>
- sprite set-position --id <id> --x <x> --y <y>
- dungeon set-room-tile --room <room_id> --x <x> --y <y> --tile <tile_id>
Example response format:
["z3ed palette export --group overworld --id 0 --to palette.json", "z3ed palette set-color --file palette.json --index 5 --color 0xFF0000"]
Do not include explanations, markdown formatting, or code blocks. Only the JSON array.)";
}
absl::Status GeminiAIService::CheckAvailability() {
#ifndef YAZE_WITH_JSON
return absl::UnimplementedError(
"Gemini AI service requires JSON support. Build with -DYAZE_WITH_JSON=ON");
#else
if (config_.api_key.empty()) {
return absl::FailedPreconditionError(
"❌ Gemini API key not configured\n"
" Set GEMINI_API_KEY environment variable\n"
" Get your API key at: https://makersuite.google.com/app/apikey");
}
// Test API connectivity with a simple request
httplib::Client cli("https://generativelanguage.googleapis.com");
cli.set_connection_timeout(5, 0); // 5 seconds timeout
std::string test_endpoint = "/v1beta/models/" + config_.model;
httplib::Headers headers = {
{"x-goog-api-key", config_.api_key},
};
auto res = cli.Get(test_endpoint.c_str(), headers);
if (!res) {
return absl::UnavailableError(
"❌ Cannot reach Gemini API\n"
" Check your internet connection");
}
if (res->status == 401 || res->status == 403) {
return absl::PermissionDeniedError(
"❌ Invalid Gemini API key\n"
" Verify your key at: https://makersuite.google.com/app/apikey");
}
if (res->status == 404) {
return absl::NotFoundError(
absl::StrCat("❌ Model '", config_.model, "' not found\n",
" Try: gemini-1.5-flash or gemini-1.5-pro"));
}
if (res->status != 200) {
return absl::InternalError(
absl::StrCat("❌ Gemini API error: ", res->status, "\n ", res->body));
}
return absl::OkStatus();
#endif
}
absl::StatusOr<std::vector<std::string>> GeminiAIService::GetCommands(
const std::string& prompt) {
@@ -20,66 +101,143 @@ absl::StatusOr<std::vector<std::string>> GeminiAIService::GetCommands(
return absl::UnimplementedError(
"Gemini AI service requires JSON support. Build with -DYAZE_WITH_JSON=ON");
#else
if (api_key_.empty()) {
return absl::FailedPreconditionError("GEMINI_API_KEY not set.");
// Validate configuration
if (auto status = CheckAvailability(); !status.ok()) {
return status;
}
httplib::Client cli("https://generativelanguage.googleapis.com");
cli.set_connection_timeout(30, 0); // 30 seconds for generation
// Build request with proper Gemini API v1beta format
nlohmann::json request_body = {
{"contents",
{{"parts",
{{"text",
"You are an expert ROM hacker for The Legend of Zelda: A Link to the Past. "
"Your task is to generate a sequence of `z3ed` CLI commands to achieve the user's request. "
"Respond only with a JSON array of strings, where each string is a `z3ed` command. "
"Do not include any other text or explanation. "
"Available commands: "
"palette export --group <group> --id <id> --to <file>, "
"palette import --group <group> --id <id> --from <file>, "
"palette set-color --file <file> --index <index> --color <hex_color>, "
"overworld set-tile --map <map_id> --x <x> --y <y> --tile <tile_id>. "
"User request: " + prompt}}}}}
{"system_instruction", {
{"parts", {
{"text", config_.system_instruction}
}}
}},
{"contents", {{
{"parts", {{
{"text", prompt}
}}}
}}},
{"generationConfig", {
{"temperature", config_.temperature},
{"maxOutputTokens", config_.max_output_tokens},
{"responseMimeType", "application/json"}
}}
};
httplib::Headers headers = {
{"Content-Type", "application/json"},
{"x-goog-api-key", api_key_},
{"x-goog-api-key", config_.api_key},
};
auto res = cli.Post("/v1beta/models/gemini-pro:generateContent", headers, request_body.dump(), "application/json");
std::string endpoint = "/v1beta/models/" + config_.model + ":generateContent";
auto res = cli.Post(endpoint.c_str(), headers, request_body.dump(), "application/json");
if (!res) {
return absl::InternalError("Failed to connect to Gemini API.");
return absl::InternalError("Failed to connect to Gemini API");
}
if (res->status != 200) {
return absl::InternalError(absl::StrCat("Gemini API error: ", res->status, " ", res->body));
return absl::InternalError(
absl::StrCat("❌ Gemini API error: ", res->status, "\n ", res->body));
}
nlohmann::json response_json = nlohmann::json::parse(res->body);
std::vector<std::string> commands;
return ParseGeminiResponse(res->body);
#endif
}
absl::StatusOr<std::vector<std::string>> GeminiAIService::ParseGeminiResponse(
const std::string& response_body) {
#ifdef YAZE_WITH_JSON
std::vector<std::string> commands;
try {
nlohmann::json response_json = nlohmann::json::parse(response_body);
// Navigate Gemini's response structure
if (!response_json.contains("candidates") ||
response_json["candidates"].empty()) {
return absl::InternalError("❌ No candidates in Gemini response");
}
for (const auto& candidate : response_json["candidates"]) {
if (!candidate.contains("content") ||
!candidate["content"].contains("parts")) {
continue;
}
for (const auto& part : candidate["content"]["parts"]) {
std::string text_content = part["text"];
// Assuming the AI returns a JSON array of strings directly in the text content
// This might need more robust parsing depending on actual AI output format
nlohmann::json commands_array = nlohmann::json::parse(text_content);
if (commands_array.is_array()) {
for (const auto& cmd : commands_array) {
if (cmd.is_string()) {
commands.push_back(cmd.get<std::string>());
if (!part.contains("text")) {
continue;
}
std::string text_content = part["text"].get<std::string>();
// Strip markdown code blocks if present (```json ... ```)
text_content = std::string(absl::StripAsciiWhitespace(text_content));
if (absl::StartsWith(text_content, "```json")) {
text_content = text_content.substr(7); // Remove ```json
} else if (absl::StartsWith(text_content, "```")) {
text_content = text_content.substr(3); // Remove ```
}
if (absl::EndsWith(text_content, "```")) {
text_content = text_content.substr(0, text_content.length() - 3);
}
text_content = std::string(absl::StripAsciiWhitespace(text_content));
// Parse as JSON array
try {
nlohmann::json commands_array = nlohmann::json::parse(text_content);
if (commands_array.is_array()) {
for (const auto& cmd : commands_array) {
if (cmd.is_string()) {
std::string command = cmd.get<std::string>();
// Remove "z3ed " prefix if LLM included it
if (absl::StartsWith(command, "z3ed ")) {
command = command.substr(5);
}
commands.push_back(command);
}
}
}
} catch (const nlohmann::json::exception& inner_e) {
// Fallback: Try to extract commands line by line
std::vector<std::string> lines = absl::StrSplit(text_content, '\n');
for (const auto& line : lines) {
std::string trimmed = std::string(absl::StripAsciiWhitespace(line));
if (!trimmed.empty() &&
(absl::StartsWith(trimmed, "z3ed ") ||
absl::StartsWith(trimmed, "palette ") ||
absl::StartsWith(trimmed, "overworld ") ||
absl::StartsWith(trimmed, "sprite ") ||
absl::StartsWith(trimmed, "dungeon "))) {
if (absl::StartsWith(trimmed, "z3ed ")) {
trimmed = trimmed.substr(5);
}
commands.push_back(trimmed);
}
}
}
}
}
} catch (const nlohmann::json::exception& e) {
return absl::InternalError(absl::StrCat("Failed to parse Gemini API response: ", e.what()));
return absl::InternalError(
absl::StrCat("❌ Failed to parse Gemini response: ", e.what()));
}
if (commands.empty()) {
return absl::InternalError(
"❌ No valid commands extracted from Gemini response\n"
" Raw response: " + response_body);
}
return commands;
#else
return absl::UnimplementedError("JSON support required");
#endif
}

View File

@@ -11,14 +11,34 @@
namespace yaze {
namespace cli {
struct GeminiConfig {
std::string api_key;
std::string model = "gemini-1.5-flash"; // Default to flash model
float temperature = 0.7f;
int max_output_tokens = 2048;
std::string system_instruction;
GeminiConfig() = default;
explicit GeminiConfig(const std::string& key) : api_key(key) {}
};
class GeminiAIService : public AIService {
public:
explicit GeminiAIService(const std::string& api_key);
explicit GeminiAIService(const GeminiConfig& config);
// Primary interface
absl::StatusOr<std::vector<std::string>> GetCommands(
const std::string& prompt) override;
// Health check
absl::Status CheckAvailability();
private:
std::string api_key_;
std::string BuildSystemInstruction();
absl::StatusOr<std::vector<std::string>> ParseGeminiResponse(
const std::string& response_body);
GeminiConfig config_;
};
} // namespace cli