feat: Update function schemas and system prompt for enhanced agent capabilities

- Revised function schemas in `function_schemas.json` to streamline resource listing and searching, including new parameters for dungeon and overworld queries.
- Introduced a new system prompt in `system_prompt_v3.txt` to improve the agent's proactive exploration and multi-tool chaining strategies.
- Updated `GeminiAIService` to support the new prompt version and enhanced function calling logic for better tool integration.
- Added tests for multimodal image analysis and error handling in `test_gemini_vision.cc` to ensure robust functionality.
This commit is contained in:
scawful
2025-10-04 22:14:04 -04:00
parent e83235ee1a
commit 61f1d6fbac
6 changed files with 656 additions and 302 deletions

View File

@@ -70,6 +70,7 @@ set(YAZE_AGENT_SOURCES
cli/service/agent/conversational_agent_service.cc
cli/service/agent/simple_chat_session.cc
cli/service/agent/tool_dispatcher.cc
cli/service/agent/learned_knowledge_service.cc
cli/service/ai/ai_service.cc
cli/service/ai/ollama_ai_service.cc
cli/service/ai/prompt_builder.cc

View File

@@ -77,9 +77,14 @@ GeminiAIService::GeminiAIService(const GeminiConfig& config)
}
// Try to load version-specific system prompt file
std::string prompt_file = config_.prompt_version == "v2"
? "assets/agent/system_prompt_v2.txt"
: "assets/agent/system_prompt.txt";
std::string prompt_file;
if (config_.prompt_version == "v3") {
prompt_file = "assets/agent/system_prompt_v3.txt";
} else if (config_.prompt_version == "v2") {
prompt_file = "assets/agent/system_prompt_v2.txt";
} else {
prompt_file = "assets/agent/system_prompt.txt";
}
std::vector<std::string> search_paths = {
prompt_file,
@@ -135,9 +140,15 @@ std::vector<std::string> GeminiAIService::GetAvailableTools() const {
std::string GeminiAIService::BuildFunctionCallSchemas() {
#ifndef YAZE_WITH_JSON
return "[]"; // Empty array if JSON not available
return "{}"; // Empty object if JSON not available
#else
// Search for function_schemas.json in multiple locations
// Use the prompt builder's schema generation which reads from prompt_catalogue.yaml
std::string schemas = prompt_builder_.BuildFunctionCallSchemas();
if (!schemas.empty() && schemas != "[]") {
return schemas;
}
// Fallback: Search for function_schemas.json
const std::vector<std::string> search_paths = {
"assets/agent/function_schemas.json",
"../assets/agent/function_schemas.json",
@@ -337,10 +348,30 @@ absl::StatusOr<AgentResponse> GeminiAIService::GenerateResponse(
// Add function calling tools if enabled
if (function_calling_enabled_) {
try {
nlohmann::json tools = nlohmann::json::parse(BuildFunctionCallSchemas());
request_body["tools"] = {{
{"function_declarations", tools}
}};
std::string schemas_str = BuildFunctionCallSchemas();
if (config_.verbose) {
std::cerr << "[DEBUG] Function calling schemas: " << schemas_str.substr(0, 200) << "..." << std::endl;
}
nlohmann::json schemas = nlohmann::json::parse(schemas_str);
// Build tools array - schemas might be an array of tools or a function_declarations object
if (schemas.is_array()) {
// If it's already an array of tools, use it directly
request_body["tools"] = {{
{"function_declarations", schemas}
}};
} else if (schemas.is_object() && schemas.contains("function_declarations")) {
// If it's a wrapper object with function_declarations
request_body["tools"] = {{
{"function_declarations", schemas["function_declarations"]}
}};
} else {
// Treat as single tool object
request_body["tools"] = {{
{"function_declarations", nlohmann::json::array({schemas})}
}};
}
} catch (const nlohmann::json::exception& e) {
std::cerr << "⚠️ Failed to parse function schemas: " << e.what() << std::endl;
}

View File

@@ -19,8 +19,8 @@ struct GeminiConfig {
int max_output_tokens = 2048;
mutable std::string system_instruction; // Mutable to allow lazy initialization
bool use_enhanced_prompting = true; // Enable few-shot examples
bool use_function_calling = false; // Use native Gemini function calling
std::string prompt_version = "default"; // Which prompt file to use (default, v2, etc.)
bool use_function_calling = true; // Use native Gemini function calling (enabled by default for 2.0+)
std::string prompt_version = "v3"; // Which prompt file to use (default, v2, v3, etc.)
bool verbose = false; // Enable debug logging
GeminiConfig() = default;