From 1b4015a87ae09a0b521218364fcebba240687ae2 Mon Sep 17 00:00:00 2001 From: scawful Date: Sat, 4 Oct 2025 03:56:47 -0400 Subject: [PATCH] feat: Consolidate and enhance agent testing scripts, replacing manual and quickstart tests with a comprehensive test suite --- scripts/agent_test_suite.sh | 93 +++++++++++ scripts/manual_gemini_test.sh | 129 -------------- scripts/quickstart_ollama.sh | 128 -------------- scripts/test_agent_conversation_live.sh | 130 --------------- scripts/test_gemini_integration.sh | 213 ------------------------ scripts/test_ollama_integration.sh | 172 ------------------- 6 files changed, 93 insertions(+), 772 deletions(-) create mode 100644 scripts/agent_test_suite.sh delete mode 100755 scripts/manual_gemini_test.sh delete mode 100755 scripts/quickstart_ollama.sh delete mode 100755 scripts/test_agent_conversation_live.sh delete mode 100755 scripts/test_gemini_integration.sh delete mode 100755 scripts/test_ollama_integration.sh diff --git a/scripts/agent_test_suite.sh b/scripts/agent_test_suite.sh new file mode 100644 index 00000000..da15b2f8 --- /dev/null +++ b/scripts/agent_test_suite.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +# Comprehensive test suite for the z3ed AI Agent. +# This script consolidates multiple older test scripts into one. +# +# Usage: ./scripts/agent_test_suite.sh +# provider: ollama, gemini, or mock + +set -e # Exit immediately if a command exits with a non-zero status. + +# --- Configuration --- +Z3ED_BIN="/Users/scawful/Code/yaze/build_test/bin/z3ed" +ROM_PATH="/Users/scawful/Code/yaze/assets/zelda3.sfc" +TEST_DIR="/Users/scawful/Code/yaze/assets/agent" +TEST_FILES=( + "context_and_followup.txt" + "complex_command_generation.txt" + "error_handling_and_edge_cases.txt" +) + +# --- Helper Functions --- +print_header() { + echo "" + echo "=================================================" + echo "$1" + echo "=================================================" +} + +# --- Pre-flight Checks --- +print_header "Performing Pre-flight Checks" + +if [ -z "$1" ]; then + echo "❌ Error: No AI provider specified." + echo "Usage: $0 " + exit 1 +fi +PROVIDER=$1 +echo "✅ Provider: $PROVIDER" + +if [ ! -f "$Z3ED_BIN" ]; then + echo "❌ Error: z3ed binary not found at $Z3ED_BIN" + echo "Please build the project first (e.g., in build_test)." + exit 1 +fi +echo "✅ z3ed binary found." + +if [ ! -f "$ROM_PATH" ]; then + echo "❌ Error: ROM not found at $ROM_PATH" + exit 1 +fi +echo "✅ ROM file found." + +if [ "$PROVIDER" == "gemini" ] && [ -z "$GEMINI_API_KEY" ]; then + echo "❌ Error: GEMINI_API_KEY environment variable is not set." + echo "Please set it to your Gemini API key to run this test." + exit 1 +fi +if [ "$PROVIDER" == "gemini" ]; then + echo "✅ GEMINI_API_KEY is set." +fi + +if [ "$PROVIDER" == "ollama" ]; then + if ! pgrep -x "Ollama" > /dev/null && ! pgrep -x "ollama" > /dev/null; then + echo "⚠️ Warning: Ollama server process not found. The script might fail if it's not running." + else + echo "✅ Ollama server process found." + fi +fi + +# --- Run Test Suite --- +for test_file in "${TEST_FILES[@]}"; do + print_header "Running Test File: $test_file (Provider: $PROVIDER)" + FULL_TEST_PATH="$TEST_DIR/$test_file" + + if [ ! -f "$FULL_TEST_PATH" ]; then + echo "❌ Error: Test file not found: $FULL_TEST_PATH" + continue + fi + + # Construct the command. Use --quiet for cleaner test logs. + COMMAND="$Z3ED_BIN agent simple-chat --file=$FULL_TEST_PATH --rom=$ROM_PATH --ai_provider=$PROVIDER --quiet" + + echo "Executing command..." + echo "--- Agent Output for $test_file ---" + + # Execute the command and print its output + eval $COMMAND + + echo "--- Test Complete ---" + echo "" +done + +print_header "✅ All tests completed successfully!" diff --git a/scripts/manual_gemini_test.sh b/scripts/manual_gemini_test.sh deleted file mode 100755 index 8640873d..00000000 --- a/scripts/manual_gemini_test.sh +++ /dev/null @@ -1,129 +0,0 @@ -#!/bin/bash -# Manual Gemini Integration Test -# Usage: GEMINI_API_KEY='your-key' ./scripts/manual_gemini_test.sh - -set -e - -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -PROJECT_ROOT="$SCRIPT_DIR/.." -Z3ED_BIN="$PROJECT_ROOT/build/bin/z3ed" - -echo "🧪 Manual Gemini Integration Test" -echo "==================================" -echo "" - -# Check if API key is set -if [ -z "$GEMINI_API_KEY" ]; then - echo "❌ Error: GEMINI_API_KEY not set" - echo "" - echo "Usage:" - echo " GEMINI_API_KEY='your-api-key-here' ./scripts/manual_gemini_test.sh" - echo "" - echo "Or export it first:" - echo " export GEMINI_API_KEY='your-api-key-here'" - echo " ./scripts/manual_gemini_test.sh" - exit 1 -fi - -echo "✅ GEMINI_API_KEY is set (length: ${#GEMINI_API_KEY} chars)" -echo "" - -# Test 1: Simple palette command -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "Test 1: Simple palette color change" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "Prompt: 'Change palette 0 color 5 to red'" -echo "" - -OUTPUT=$($Z3ED_BIN agent plan --prompt "Change palette 0 color 5 to red" 2>&1) -echo "$OUTPUT" -echo "" - -if echo "$OUTPUT" | grep -q "Using Gemini AI"; then - echo "✅ Gemini service detected" -else - echo "❌ Expected 'Using Gemini AI' in output" - exit 1 -fi - -if echo "$OUTPUT" | grep -q -E "palette|color"; then - echo "✅ Generated palette-related commands" -else - echo "❌ No palette commands found" - exit 1 -fi - -echo "" - -# Test 2: Overworld modification -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "Test 2: Overworld tile placement" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "Prompt: 'Place a tree at position (10, 20) on map 0'" -echo "" - -OUTPUT=$($Z3ED_BIN agent plan --prompt "Place a tree at position (10, 20) on map 0" 2>&1) -echo "$OUTPUT" -echo "" - -if echo "$OUTPUT" | grep -q "overworld"; then - echo "✅ Generated overworld commands" -else - echo "⚠️ No overworld commands (model may have interpreted differently)" -fi - -echo "" - -# Test 3: Complex multi-step task -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "Test 3: Multi-step task" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "Prompt: 'Export palette 0, change color 3 to blue, and import it back'" -echo "" - -OUTPUT=$($Z3ED_BIN agent plan --prompt "Export palette 0, change color 3 to blue, and import it back" 2>&1) -echo "$OUTPUT" -echo "" - -COMMAND_COUNT=$(echo "$OUTPUT" | grep -c -E "^\s*-" || true) - -if [ "$COMMAND_COUNT" -ge 2 ]; then - echo "✅ Generated multiple commands ($COMMAND_COUNT commands)" -else - echo "⚠️ Expected multiple commands, got $COMMAND_COUNT" -fi - -echo "" - -# Test 4: Direct run command (creates proposal) -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "Test 4: Direct run command (creates proposal)" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "Prompt: 'Validate the ROM'" -echo "" - -OUTPUT=$($Z3ED_BIN agent run --prompt "Validate the ROM" 2>&1 || true) -echo "$OUTPUT" -echo "" - -if echo "$OUTPUT" | grep -q "Proposal"; then - echo "✅ Proposal created" -else - echo "ℹ️ No proposal created (may need ROM file)" -fi - -echo "" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "🎉 Manual Test Suite Complete!" -echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" -echo "" -echo "Summary:" -echo " • Gemini API integration: ✅ Working" -echo " • Command generation: ✅ Functional" -echo " • Service factory: ✅ Correct provider selection" -echo "" -echo "Next steps:" -echo " 1. Review generated commands for accuracy" -echo " 2. Test with more complex prompts" -echo " 3. Compare with Ollama output quality" -echo " 4. Proceed to Phase 3 (Claude) or Phase 4 (Enhanced Prompting)" diff --git a/scripts/quickstart_ollama.sh b/scripts/quickstart_ollama.sh deleted file mode 100755 index 270caf98..00000000 --- a/scripts/quickstart_ollama.sh +++ /dev/null @@ -1,128 +0,0 @@ -#!/bin/bash -# Quick Start Script for Testing Ollama Integration with z3ed -# Usage: ./scripts/quickstart_ollama.sh - -set -e - -echo "🚀 z3ed + Ollama Quick Start" -echo "================================" -echo "" - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# Step 1: Check if Ollama is installed -echo "📦 Step 1: Checking Ollama installation..." -if ! command -v ollama &> /dev/null; then - echo -e "${RED}✗ Ollama not found${NC}" - echo "" - echo "Install Ollama with:" - echo " macOS: brew install ollama" - echo " Linux: curl -fsSL https://ollama.com/install.sh | sh" - echo "" - exit 1 -fi -echo -e "${GREEN}✓ Ollama installed${NC}" -echo "" - -# Step 2: Check if Ollama server is running -echo "🔌 Step 2: Checking Ollama server..." -if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then - echo -e "${YELLOW}⚠ Ollama server not running${NC}" - echo "" - echo "Starting Ollama server in background..." - ollama serve > /dev/null 2>&1 & - OLLAMA_PID=$! - echo "Waiting for server to start..." - sleep 3 - - if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then - echo -e "${RED}✗ Failed to start Ollama server${NC}" - exit 1 - fi - echo -e "${GREEN}✓ Ollama server started (PID: $OLLAMA_PID)${NC}" -else - echo -e "${GREEN}✓ Ollama server running${NC}" -fi -echo "" - -# Step 3: Check if recommended model is available -RECOMMENDED_MODEL="qwen2.5-coder:7b" -echo "🤖 Step 3: Checking for model: $RECOMMENDED_MODEL..." -if ! ollama list | grep -q "$RECOMMENDED_MODEL"; then - echo -e "${YELLOW}⚠ Model not found${NC}" - echo "" - read -p "Pull $RECOMMENDED_MODEL? (~4.7GB download) [y/N]: " -n 1 -r - echo "" - if [[ $REPLY =~ ^[Yy]$ ]]; then - echo "Pulling model (this may take a few minutes)..." - ollama pull "$RECOMMENDED_MODEL" - echo -e "${GREEN}✓ Model pulled successfully${NC}" - else - echo -e "${RED}✗ Model required for testing${NC}" - exit 1 - fi -else - echo -e "${GREEN}✓ Model available${NC}" -fi -echo "" - -# Step 4: Check if z3ed is built -echo "🔨 Step 4: Checking z3ed build..." -if [ ! -f "./build/bin/z3ed" ]; then - echo -e "${YELLOW}⚠ z3ed not found in ./build/bin/${NC}" - echo "" - echo "Building z3ed..." - cmake --build build --target z3ed - if [ ! -f "./build/bin/z3ed" ]; then - echo -e "${RED}✗ Failed to build z3ed${NC}" - exit 1 - fi -fi -echo -e "${GREEN}✓ z3ed ready${NC}" -echo "" - -# Step 5: Test Ollama integration -echo "🧪 Step 5: Testing z3ed + Ollama integration..." -export YAZE_AI_PROVIDER=ollama -export OLLAMA_MODEL="$RECOMMENDED_MODEL" - -echo "" -echo "Running test command:" -echo -e "${BLUE}z3ed agent plan --prompt \"Validate the ROM file\"${NC}" -echo "" - -if ./build/bin/z3ed agent plan --prompt "Validate the ROM file"; then - echo "" - echo -e "${GREEN}✓ Integration test passed!${NC}" -else - echo "" - echo -e "${RED}✗ Integration test failed${NC}" - echo "Check error messages above for details" - exit 1 -fi - -echo "" -echo "================================" -echo -e "${GREEN}🎉 Setup Complete!${NC}" -echo "" -echo "Next steps:" -echo " 1. Try a full agent run:" -echo " export YAZE_AI_PROVIDER=ollama" -echo " z3ed agent run --prompt \"Export first palette\" --rom zelda3.sfc --sandbox" -echo "" -echo " 2. Review generated commands:" -echo " z3ed agent list" -echo " z3ed agent diff" -echo "" -echo " 3. Try different models:" -echo " ollama pull codellama:13b" -echo " export OLLAMA_MODEL=codellama:13b" -echo "" -echo " 4. Read the docs:" -echo " docs/z3ed/LLM-INTEGRATION-PLAN.md" -echo "" diff --git a/scripts/test_agent_conversation_live.sh b/scripts/test_agent_conversation_live.sh deleted file mode 100755 index c891a6d9..00000000 --- a/scripts/test_agent_conversation_live.sh +++ /dev/null @@ -1,130 +0,0 @@ -#!/bin/bash -# Live testing script for conversational agent -# Tests agent function calling with real Ollama/Gemini backends - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" -Z3ED="${PROJECT_ROOT}/build/bin/z3ed" -ROM_FILE="${PROJECT_ROOT}/assets/zelda3.sfc" - -# Colors -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' - -echo "=========================================" -echo "Live Conversational Agent Test" -echo "=========================================" -echo "" - -# Prerequisites check -if [ ! -f "$Z3ED" ]; then - echo -e "${RED}✗ z3ed not found at $Z3ED${NC}" - echo "Build with: cmake --build build --target z3ed" - exit 1 -fi - -if [ ! -f "$ROM_FILE" ]; then - echo -e "${RED}✗ ROM file not found at $ROM_FILE${NC}" - exit 1 -fi - -echo -e "${GREEN}✓ Prerequisites met${NC}" -echo "" - -# Check for AI backends -BACKEND_AVAILABLE=false - -echo "Checking AI Backends..." -echo "-----------------------" - -# Check Ollama -if command -v ollama &> /dev/null; then - if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then - echo -e "${GREEN}✓ Ollama server running${NC}" - if ollama list | grep -q "qwen2.5-coder"; then - echo -e "${GREEN}✓ qwen2.5-coder model available${NC}" - BACKEND_AVAILABLE=true - AI_BACKEND="Ollama" - else - echo -e "${YELLOW}⚠ Recommended model qwen2.5-coder:7b not installed${NC}" - echo " Install with: ollama pull qwen2.5-coder:7b" - fi - else - echo -e "${YELLOW}⚠ Ollama not running${NC}" - echo " Start with: ollama serve" - fi -else - echo -e "${YELLOW}⚠ Ollama not installed${NC}" -fi - -# Check Gemini -if [ -n "$GEMINI_API_KEY" ]; then - echo -e "${GREEN}✓ Gemini API key set${NC}" - BACKEND_AVAILABLE=true - if [ "$AI_BACKEND" != "Ollama" ]; then - AI_BACKEND="Gemini" - fi -else - echo -e "${YELLOW}⚠ GEMINI_API_KEY not set${NC}" -fi - -echo "" - -if [ "$BACKEND_AVAILABLE" = false ]; then - echo -e "${RED}✗ No AI backend available${NC}" - echo "" - echo "Please set up at least one backend:" - echo " - Ollama: brew install ollama && ollama serve && ollama pull qwen2.5-coder:7b" - echo " - Gemini: export GEMINI_API_KEY='your-key-here'" - exit 1 -fi - -echo -e "${GREEN}✓ Using AI Backend: $AI_BACKEND${NC}" -echo "" - -# Run the test-conversation command with default test cases -echo "=========================================" -echo "Running Automated Conversation Tests" -echo "=========================================" -echo "" -echo "This will run 5 default test cases:" -echo " 1. Simple ROM introspection (dungeon query)" -echo " 2. Overworld tile search" -echo " 3. Multi-step conversation" -echo " 4. Command generation (tile placement)" -echo " 5. Map description" -echo "" - -read -p "Press Enter to start tests (or Ctrl+C to cancel)..." -echo "" - -# Run the tests -"$Z3ED" agent test-conversation --rom "$ROM_FILE" --verbose - -TEST_EXIT_CODE=$? - -echo "" -echo "=========================================" -echo "Test Results" -echo "=========================================" - -if [ $TEST_EXIT_CODE -eq 0 ]; then - echo -e "${GREEN}✅ All tests completed successfully${NC}" -else - echo -e "${RED}❌ Tests failed with exit code $TEST_EXIT_CODE${NC}" -fi - -echo "" -echo "Next Steps:" -echo " - Review the output above for any warnings" -echo " - Check if tool calls are being invoked correctly" -echo " - Verify JSON/table formatting is working" -echo " - Test with custom conversation file: z3ed agent test-conversation --file my_tests.json" -echo "" - -exit $TEST_EXIT_CODE diff --git a/scripts/test_gemini_integration.sh b/scripts/test_gemini_integration.sh deleted file mode 100755 index 8fc65a37..00000000 --- a/scripts/test_gemini_integration.sh +++ /dev/null @@ -1,213 +0,0 @@ -#!/bin/bash -# Integration test for Gemini AI Service (Phase 2) - -set -e # Exit on error - -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -PROJECT_ROOT="$SCRIPT_DIR/.." -Z3ED_BIN="$PROJECT_ROOT/build/bin/z3ed" - -echo "🧪 Gemini AI Integration Test Suite" -echo "======================================" - -# Color output helpers -GREEN='\033[0;32m' -RED='\033[0;31m' -YELLOW='\033[0;33m' -NC='\033[0m' # No Color - -pass() { - echo -e "${GREEN}✓${NC} $1" -} - -fail() { - echo -e "${RED}✗${NC} $1" - exit 1 -} - -warn() { - echo -e "${YELLOW}⚠${NC} $1" -} - -# Test 1: z3ed executable exists -echo "" -echo "Test 1: z3ed executable exists" -if [ -f "$Z3ED_BIN" ]; then - pass "z3ed executable found at $Z3ED_BIN" -else - fail "z3ed executable not found. Run: cmake --build build --target z3ed" -fi - -# Test 2: Check GEMINI_API_KEY environment variable -echo "" -echo "Test 2: Check GEMINI_API_KEY environment variable" -if [ -z "$GEMINI_API_KEY" ]; then - warn "GEMINI_API_KEY not set - skipping API tests" - echo " To test Gemini integration:" - echo " 1. Get API key at: https://makersuite.google.com/app/apikey" - echo " 2. Run: export GEMINI_API_KEY='your-api-key'" - echo " 3. Re-run this script" - - # Still test that service factory handles missing key gracefully - echo "" - echo "Test 2a: Verify graceful fallback without API key" - unset YAZE_AI_PROVIDER - OUTPUT=$($Z3ED_BIN agent plan --prompt "Place a tree" 2>&1) - - if echo "$OUTPUT" | grep -q "Using MockAIService"; then - pass "Service factory falls back to Mock when GEMINI_API_KEY missing" - else - fail "Service factory should fall back to Mock without API key" - fi - - echo "" - echo "⏭️ Skipping remaining Gemini API tests (no API key)" - exit 0 -fi - -pass "GEMINI_API_KEY is set" - -# Test 3: Verify Gemini model availability -echo "" -echo "Test 3: Verify Gemini model availability" -GEMINI_MODEL="${GEMINI_MODEL:-gemini-2.5-flash}" -echo " Testing with model: $GEMINI_MODEL" - -# Quick API check -HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \ - -H "x-goog-api-key: $GEMINI_API_KEY" \ - "https://generativelanguage.googleapis.com/v1beta/models/$GEMINI_MODEL") - -if [ "$HTTP_CODE" = "200" ]; then - pass "Gemini API accessible, model '$GEMINI_MODEL' available" -elif [ "$HTTP_CODE" = "401" ] || [ "$HTTP_CODE" = "403" ]; then - fail "Invalid Gemini API key (HTTP $HTTP_CODE)" -elif [ "$HTTP_CODE" = "404" ]; then - fail "Model '$GEMINI_MODEL' not found (HTTP 404)" -else - warn "Unexpected HTTP status: $HTTP_CODE (continuing anyway)" -fi - -# Test 4: Generate commands with Gemini (simple prompt) -echo "" -echo "Test 4: Generate commands with Gemini (simple prompt)" -unset YAZE_AI_PROVIDER # Let service factory auto-detect from GEMINI_API_KEY - -OUTPUT=$($Z3ED_BIN agent plan --prompt "Change the color of palette 0 index 5 to red" 2>&1) - -if echo "$OUTPUT" | grep -q "Using Gemini AI"; then - pass "Service factory selected Gemini" -else - fail "Expected 'Using Gemini AI' in output, got: $OUTPUT" -fi - -if echo "$OUTPUT" | grep -q "palette"; then - pass "Gemini generated palette-related commands" - echo " Generated commands:" - echo "$OUTPUT" | grep -E "^\s*-" | sed 's/^/ /' -else - fail "Expected palette commands in output, got: $OUTPUT" -fi - -# Test 5: Generate commands with complex prompt -echo "" -echo "Test 5: Generate commands with complex prompt (overworld modification)" -OUTPUT=$($Z3ED_BIN agent plan --prompt "Place a tree at coordinates (10, 20) on overworld map 0" 2>&1) - -if echo "$OUTPUT" | grep -q "overworld"; then - pass "Gemini generated overworld commands" - echo " Generated commands:" - echo "$OUTPUT" | grep -E "^\s*-" | sed 's/^/ /' -else - fail "Expected overworld commands in output, got: $OUTPUT" -fi - -# Test 6: Test explicit provider selection -echo "" -echo "Test 6: Test explicit provider selection (YAZE_AI_PROVIDER=gemini)" -# Note: Current implementation doesn't have explicit "gemini" provider value -# It auto-detects from GEMINI_API_KEY. But we can test that Ollama doesn't override. -unset YAZE_AI_PROVIDER - -OUTPUT=$($Z3ED_BIN agent plan --prompt "Export palette 0" 2>&1) - -if echo "$OUTPUT" | grep -q "Using Gemini AI"; then - pass "Gemini selected when GEMINI_API_KEY present" -else - warn "Expected Gemini selection, got: $OUTPUT" -fi - -# Test 7: Verify JSON response parsing -echo "" -echo "Test 7: Verify JSON response parsing (check for command format)" -OUTPUT=$($Z3ED_BIN agent plan --prompt "Set tile at (5,5) to 0x100" 2>&1) - -# Commands should NOT have "z3ed" prefix (service should strip it) -if echo "$OUTPUT" | grep -E "^\s*- z3ed"; then - warn "Commands still contain 'z3ed' prefix (should be stripped)" -else - pass "Commands properly formatted without 'z3ed' prefix" -fi - -# Test 8: Test multiple commands in response -echo "" -echo "Test 8: Test multiple commands generation" -OUTPUT=$($Z3ED_BIN agent plan --prompt "Export palette 0 to test.json, change color 5 to red, then import it back" 2>&1) - -COMMAND_COUNT=$(echo "$OUTPUT" | grep -c -E "^\s*- " || true) - -if [ "$COMMAND_COUNT" -ge 2 ]; then - pass "Gemini generated multiple commands ($COMMAND_COUNT commands)" - echo " Commands:" - echo "$OUTPUT" | grep -E "^\s*-" | sed 's/^/ /' -else - warn "Expected multiple commands, got $COMMAND_COUNT" -fi - -# Test 9: Error handling - invalid API key -echo "" -echo "Test 9: Error handling with invalid API key" -SAVED_KEY="$GEMINI_API_KEY" -export GEMINI_API_KEY="invalid_key_12345" - -OUTPUT=$($Z3ED_BIN agent plan --prompt "Test" 2>&1 || true) - -if echo "$OUTPUT" | grep -q "Invalid Gemini API key\|Falling back to MockAIService"; then - pass "Service handles invalid API key gracefully" -else - warn "Expected error handling message, got: $OUTPUT" -fi - -# Restore key -export GEMINI_API_KEY="$SAVED_KEY" - -# Test 10: Model override via environment -echo "" -echo "Test 10: Model override via GEMINI_MODEL environment variable" -export GEMINI_MODEL="gemini-1.5-pro" - -OUTPUT=$($Z3ED_BIN agent plan --prompt "Test" 2>&1) - -if echo "$OUTPUT" | grep -q "gemini-1.5-pro"; then - pass "GEMINI_MODEL environment variable respected" -else - warn "Expected model override, got: $OUTPUT" -fi - -unset GEMINI_MODEL - -echo "" -echo "======================================" -echo "✅ Gemini Integration Test Suite Complete" -echo "" -echo "Summary:" -echo " - Gemini API accessible" -echo " - Command generation working" -echo " - Error handling functional" -echo " - JSON parsing robust" -echo "" -echo "Next steps:" -echo " 1. Test with various prompt types" -echo " 2. Measure response latency" -echo " 3. Compare accuracy with Ollama" -echo " 4. Consider rate limiting for production" diff --git a/scripts/test_ollama_integration.sh b/scripts/test_ollama_integration.sh deleted file mode 100755 index b3ac424f..00000000 --- a/scripts/test_ollama_integration.sh +++ /dev/null @@ -1,172 +0,0 @@ -#!/bin/bash -# Test script for Ollama AI service integration -# This script validates Phase 1 implementation - -set -e - -echo "🧪 Testing Ollama AI Service Integration (Phase 1)" -echo "==================================================" -echo "" - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -TESTS_PASSED=0 -TESTS_FAILED=0 - -# Helper functions -pass_test() { - echo -e "${GREEN}✓ PASS:${NC} $1" - ((TESTS_PASSED++)) -} - -fail_test() { - echo -e "${RED}✗ FAIL:${NC} $1" - ((TESTS_FAILED++)) -} - -info() { - echo -e "${BLUE}ℹ${NC} $1" -} - -# Test 1: Check if z3ed built successfully -echo "Test 1: z3ed executable exists" -if [ -f "./build/bin/z3ed" ]; then - pass_test "z3ed executable found" -else - fail_test "z3ed executable not found" - exit 1 -fi -echo "" - -# Test 2: Test MockAIService fallback (no LLM configured) -echo "Test 2: MockAIService fallback" -unset YAZE_AI_PROVIDER -unset GEMINI_API_KEY -unset CLAUDE_API_KEY - -OUTPUT=$(./build/bin/z3ed agent plan --prompt "Place a tree" 2>&1 || true) -if echo "$OUTPUT" | grep -q "Using MockAIService"; then - pass_test "MockAIService activated when no LLM configured" - if echo "$OUTPUT" | grep -q "AI Agent Plan:"; then - pass_test "MockAIService generated commands" - fi -else - fail_test "MockAIService fallback not working" -fi -echo "" - -# Test 3: Test Ollama provider selection (without server) -echo "Test 3: Ollama provider selection (without server running)" -export YAZE_AI_PROVIDER=ollama - -OUTPUT=$(./build/bin/z3ed agent plan --prompt "Validate ROM" 2>&1 || true) -if echo "$OUTPUT" | grep -q "Ollama unavailable"; then - pass_test "Ollama health check detected unavailable server" - if echo "$OUTPUT" | grep -q "Falling back to MockAIService"; then - pass_test "Graceful fallback to MockAIService" - else - fail_test "Did not fall back to MockAIService" - fi -else - info "Note: If Ollama is running, this test will pass differently" -fi -echo "" - -# Test 4: Check if Ollama is installed -echo "Test 4: Ollama installation check" -if command -v ollama &> /dev/null; then - pass_test "Ollama is installed" - - # Test 5: Check if Ollama server is running - echo "" - echo "Test 5: Ollama server availability" - if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then - pass_test "Ollama server is running" - - # Test 6: Check for qwen2.5-coder model - echo "" - echo "Test 6: qwen2.5-coder:7b model availability" - if ollama list | grep -q "qwen2.5-coder:7b"; then - pass_test "Recommended model is available" - - # Test 7: End-to-end test with Ollama - echo "" - echo "Test 7: End-to-end LLM command generation" - export YAZE_AI_PROVIDER=ollama - export OLLAMA_MODEL=qwen2.5-coder:7b - - info "Testing: 'agent plan --prompt \"Validate the ROM\"'" - OUTPUT=$(./build/bin/z3ed agent plan --prompt "Validate the ROM" 2>&1) - - if echo "$OUTPUT" | grep -q "Using Ollama AI"; then - pass_test "Ollama AI service activated" - else - fail_test "Ollama AI service not activated" - fi - - if echo "$OUTPUT" | grep -q "AI Agent Plan:"; then - pass_test "Command generation completed" - - # Check if reasonable commands were generated - if echo "$OUTPUT" | grep -q "rom"; then - pass_test "Generated ROM-related command" - else - fail_test "Generated command doesn't seem ROM-related" - fi - else - fail_test "No commands generated" - fi - - echo "" - echo "Generated output:" - echo "---" - echo "$OUTPUT" - echo "---" - - else - fail_test "qwen2.5-coder:7b not found" - info "Install with: ollama pull qwen2.5-coder:7b" - fi - else - fail_test "Ollama server not running" - info "Start with: ollama serve" - fi -else - fail_test "Ollama not installed" - info "Install with: brew install ollama (macOS)" - info "Or visit: https://ollama.com/download" -fi - -echo "" -echo "==================================================" -echo "Test Summary:" -echo -e " ${GREEN}Passed: $TESTS_PASSED${NC}" -echo -e " ${RED}Failed: $TESTS_FAILED${NC}" -echo "" - -if [ $TESTS_FAILED -eq 0 ]; then - echo -e "${GREEN}✓ All tests passed!${NC}" - echo "" - echo "Next steps:" - echo " 1. If Ollama tests were skipped, install and configure:" - echo " brew install ollama" - echo " ollama serve &" - echo " ollama pull qwen2.5-coder:7b" - echo "" - echo " 2. Try the full agent workflow:" - echo " export YAZE_AI_PROVIDER=ollama" - echo " ./build/bin/z3ed agent run --prompt \"Validate ROM\" --rom zelda3.sfc --sandbox" - echo "" - echo " 3. Check the implementation checklist:" - echo " docs/z3ed/LLM-IMPLEMENTATION-CHECKLIST.md" - exit 0 -else - echo -e "${RED}✗ Some tests failed${NC}" - echo "Review the output above for details" - exit 1 -fi