feat: Consolidate and enhance agent testing scripts, replacing manual and quickstart tests with a comprehensive test suite

This commit is contained in:
scawful
2025-10-04 03:56:47 -04:00
parent ed75bdc268
commit 1b4015a87a
6 changed files with 93 additions and 772 deletions

View File

@@ -0,0 +1,93 @@
#!/bin/bash
# Comprehensive test suite for the z3ed AI Agent.
# This script consolidates multiple older test scripts into one.
#
# Usage: ./scripts/agent_test_suite.sh <provider>
# provider: ollama, gemini, or mock
set -e # Exit immediately if a command exits with a non-zero status.
# --- Configuration ---
Z3ED_BIN="/Users/scawful/Code/yaze/build_test/bin/z3ed"
ROM_PATH="/Users/scawful/Code/yaze/assets/zelda3.sfc"
TEST_DIR="/Users/scawful/Code/yaze/assets/agent"
TEST_FILES=(
"context_and_followup.txt"
"complex_command_generation.txt"
"error_handling_and_edge_cases.txt"
)
# --- Helper Functions ---
print_header() {
echo ""
echo "================================================="
echo "$1"
echo "================================================="
}
# --- Pre-flight Checks ---
print_header "Performing Pre-flight Checks"
if [ -z "$1" ]; then
echo "❌ Error: No AI provider specified."
echo "Usage: $0 <ollama|gemini|mock>"
exit 1
fi
PROVIDER=$1
echo "✅ Provider: $PROVIDER"
if [ ! -f "$Z3ED_BIN" ]; then
echo "❌ Error: z3ed binary not found at $Z3ED_BIN"
echo "Please build the project first (e.g., in build_test)."
exit 1
fi
echo "✅ z3ed binary found."
if [ ! -f "$ROM_PATH" ]; then
echo "❌ Error: ROM not found at $ROM_PATH"
exit 1
fi
echo "✅ ROM file found."
if [ "$PROVIDER" == "gemini" ] && [ -z "$GEMINI_API_KEY" ]; then
echo "❌ Error: GEMINI_API_KEY environment variable is not set."
echo "Please set it to your Gemini API key to run this test."
exit 1
fi
if [ "$PROVIDER" == "gemini" ]; then
echo "✅ GEMINI_API_KEY is set."
fi
if [ "$PROVIDER" == "ollama" ]; then
if ! pgrep -x "Ollama" > /dev/null && ! pgrep -x "ollama" > /dev/null; then
echo "⚠️ Warning: Ollama server process not found. The script might fail if it's not running."
else
echo "✅ Ollama server process found."
fi
fi
# --- Run Test Suite ---
for test_file in "${TEST_FILES[@]}"; do
print_header "Running Test File: $test_file (Provider: $PROVIDER)"
FULL_TEST_PATH="$TEST_DIR/$test_file"
if [ ! -f "$FULL_TEST_PATH" ]; then
echo "❌ Error: Test file not found: $FULL_TEST_PATH"
continue
fi
# Construct the command. Use --quiet for cleaner test logs.
COMMAND="$Z3ED_BIN agent simple-chat --file=$FULL_TEST_PATH --rom=$ROM_PATH --ai_provider=$PROVIDER --quiet"
echo "Executing command..."
echo "--- Agent Output for $test_file ---"
# Execute the command and print its output
eval $COMMAND
echo "--- Test Complete ---"
echo ""
done
print_header "✅ All tests completed successfully!"

View File

@@ -1,129 +0,0 @@
#!/bin/bash
# Manual Gemini Integration Test
# Usage: GEMINI_API_KEY='your-key' ./scripts/manual_gemini_test.sh
set -e
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
PROJECT_ROOT="$SCRIPT_DIR/.."
Z3ED_BIN="$PROJECT_ROOT/build/bin/z3ed"
echo "🧪 Manual Gemini Integration Test"
echo "=================================="
echo ""
# Check if API key is set
if [ -z "$GEMINI_API_KEY" ]; then
echo "❌ Error: GEMINI_API_KEY not set"
echo ""
echo "Usage:"
echo " GEMINI_API_KEY='your-api-key-here' ./scripts/manual_gemini_test.sh"
echo ""
echo "Or export it first:"
echo " export GEMINI_API_KEY='your-api-key-here'"
echo " ./scripts/manual_gemini_test.sh"
exit 1
fi
echo "✅ GEMINI_API_KEY is set (length: ${#GEMINI_API_KEY} chars)"
echo ""
# Test 1: Simple palette command
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Test 1: Simple palette color change"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Prompt: 'Change palette 0 color 5 to red'"
echo ""
OUTPUT=$($Z3ED_BIN agent plan --prompt "Change palette 0 color 5 to red" 2>&1)
echo "$OUTPUT"
echo ""
if echo "$OUTPUT" | grep -q "Using Gemini AI"; then
echo "✅ Gemini service detected"
else
echo "❌ Expected 'Using Gemini AI' in output"
exit 1
fi
if echo "$OUTPUT" | grep -q -E "palette|color"; then
echo "✅ Generated palette-related commands"
else
echo "❌ No palette commands found"
exit 1
fi
echo ""
# Test 2: Overworld modification
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Test 2: Overworld tile placement"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Prompt: 'Place a tree at position (10, 20) on map 0'"
echo ""
OUTPUT=$($Z3ED_BIN agent plan --prompt "Place a tree at position (10, 20) on map 0" 2>&1)
echo "$OUTPUT"
echo ""
if echo "$OUTPUT" | grep -q "overworld"; then
echo "✅ Generated overworld commands"
else
echo "⚠️ No overworld commands (model may have interpreted differently)"
fi
echo ""
# Test 3: Complex multi-step task
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Test 3: Multi-step task"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Prompt: 'Export palette 0, change color 3 to blue, and import it back'"
echo ""
OUTPUT=$($Z3ED_BIN agent plan --prompt "Export palette 0, change color 3 to blue, and import it back" 2>&1)
echo "$OUTPUT"
echo ""
COMMAND_COUNT=$(echo "$OUTPUT" | grep -c -E "^\s*-" || true)
if [ "$COMMAND_COUNT" -ge 2 ]; then
echo "✅ Generated multiple commands ($COMMAND_COUNT commands)"
else
echo "⚠️ Expected multiple commands, got $COMMAND_COUNT"
fi
echo ""
# Test 4: Direct run command (creates proposal)
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Test 4: Direct run command (creates proposal)"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Prompt: 'Validate the ROM'"
echo ""
OUTPUT=$($Z3ED_BIN agent run --prompt "Validate the ROM" 2>&1 || true)
echo "$OUTPUT"
echo ""
if echo "$OUTPUT" | grep -q "Proposal"; then
echo "✅ Proposal created"
else
echo " No proposal created (may need ROM file)"
fi
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "🎉 Manual Test Suite Complete!"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
echo "Summary:"
echo " • Gemini API integration: ✅ Working"
echo " • Command generation: ✅ Functional"
echo " • Service factory: ✅ Correct provider selection"
echo ""
echo "Next steps:"
echo " 1. Review generated commands for accuracy"
echo " 2. Test with more complex prompts"
echo " 3. Compare with Ollama output quality"
echo " 4. Proceed to Phase 3 (Claude) or Phase 4 (Enhanced Prompting)"

View File

@@ -1,128 +0,0 @@
#!/bin/bash
# Quick Start Script for Testing Ollama Integration with z3ed
# Usage: ./scripts/quickstart_ollama.sh
set -e
echo "🚀 z3ed + Ollama Quick Start"
echo "================================"
echo ""
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Step 1: Check if Ollama is installed
echo "📦 Step 1: Checking Ollama installation..."
if ! command -v ollama &> /dev/null; then
echo -e "${RED}✗ Ollama not found${NC}"
echo ""
echo "Install Ollama with:"
echo " macOS: brew install ollama"
echo " Linux: curl -fsSL https://ollama.com/install.sh | sh"
echo ""
exit 1
fi
echo -e "${GREEN}✓ Ollama installed${NC}"
echo ""
# Step 2: Check if Ollama server is running
echo "🔌 Step 2: Checking Ollama server..."
if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
echo -e "${YELLOW}⚠ Ollama server not running${NC}"
echo ""
echo "Starting Ollama server in background..."
ollama serve > /dev/null 2>&1 &
OLLAMA_PID=$!
echo "Waiting for server to start..."
sleep 3
if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
echo -e "${RED}✗ Failed to start Ollama server${NC}"
exit 1
fi
echo -e "${GREEN}✓ Ollama server started (PID: $OLLAMA_PID)${NC}"
else
echo -e "${GREEN}✓ Ollama server running${NC}"
fi
echo ""
# Step 3: Check if recommended model is available
RECOMMENDED_MODEL="qwen2.5-coder:7b"
echo "🤖 Step 3: Checking for model: $RECOMMENDED_MODEL..."
if ! ollama list | grep -q "$RECOMMENDED_MODEL"; then
echo -e "${YELLOW}⚠ Model not found${NC}"
echo ""
read -p "Pull $RECOMMENDED_MODEL? (~4.7GB download) [y/N]: " -n 1 -r
echo ""
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo "Pulling model (this may take a few minutes)..."
ollama pull "$RECOMMENDED_MODEL"
echo -e "${GREEN}✓ Model pulled successfully${NC}"
else
echo -e "${RED}✗ Model required for testing${NC}"
exit 1
fi
else
echo -e "${GREEN}✓ Model available${NC}"
fi
echo ""
# Step 4: Check if z3ed is built
echo "🔨 Step 4: Checking z3ed build..."
if [ ! -f "./build/bin/z3ed" ]; then
echo -e "${YELLOW}⚠ z3ed not found in ./build/bin/${NC}"
echo ""
echo "Building z3ed..."
cmake --build build --target z3ed
if [ ! -f "./build/bin/z3ed" ]; then
echo -e "${RED}✗ Failed to build z3ed${NC}"
exit 1
fi
fi
echo -e "${GREEN}✓ z3ed ready${NC}"
echo ""
# Step 5: Test Ollama integration
echo "🧪 Step 5: Testing z3ed + Ollama integration..."
export YAZE_AI_PROVIDER=ollama
export OLLAMA_MODEL="$RECOMMENDED_MODEL"
echo ""
echo "Running test command:"
echo -e "${BLUE}z3ed agent plan --prompt \"Validate the ROM file\"${NC}"
echo ""
if ./build/bin/z3ed agent plan --prompt "Validate the ROM file"; then
echo ""
echo -e "${GREEN}✓ Integration test passed!${NC}"
else
echo ""
echo -e "${RED}✗ Integration test failed${NC}"
echo "Check error messages above for details"
exit 1
fi
echo ""
echo "================================"
echo -e "${GREEN}🎉 Setup Complete!${NC}"
echo ""
echo "Next steps:"
echo " 1. Try a full agent run:"
echo " export YAZE_AI_PROVIDER=ollama"
echo " z3ed agent run --prompt \"Export first palette\" --rom zelda3.sfc --sandbox"
echo ""
echo " 2. Review generated commands:"
echo " z3ed agent list"
echo " z3ed agent diff"
echo ""
echo " 3. Try different models:"
echo " ollama pull codellama:13b"
echo " export OLLAMA_MODEL=codellama:13b"
echo ""
echo " 4. Read the docs:"
echo " docs/z3ed/LLM-INTEGRATION-PLAN.md"
echo ""

View File

@@ -1,130 +0,0 @@
#!/bin/bash
# Live testing script for conversational agent
# Tests agent function calling with real Ollama/Gemini backends
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
Z3ED="${PROJECT_ROOT}/build/bin/z3ed"
ROM_FILE="${PROJECT_ROOT}/assets/zelda3.sfc"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
echo "========================================="
echo "Live Conversational Agent Test"
echo "========================================="
echo ""
# Prerequisites check
if [ ! -f "$Z3ED" ]; then
echo -e "${RED}✗ z3ed not found at $Z3ED${NC}"
echo "Build with: cmake --build build --target z3ed"
exit 1
fi
if [ ! -f "$ROM_FILE" ]; then
echo -e "${RED}✗ ROM file not found at $ROM_FILE${NC}"
exit 1
fi
echo -e "${GREEN}✓ Prerequisites met${NC}"
echo ""
# Check for AI backends
BACKEND_AVAILABLE=false
echo "Checking AI Backends..."
echo "-----------------------"
# Check Ollama
if command -v ollama &> /dev/null; then
if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
echo -e "${GREEN}✓ Ollama server running${NC}"
if ollama list | grep -q "qwen2.5-coder"; then
echo -e "${GREEN}✓ qwen2.5-coder model available${NC}"
BACKEND_AVAILABLE=true
AI_BACKEND="Ollama"
else
echo -e "${YELLOW}⚠ Recommended model qwen2.5-coder:7b not installed${NC}"
echo " Install with: ollama pull qwen2.5-coder:7b"
fi
else
echo -e "${YELLOW}⚠ Ollama not running${NC}"
echo " Start with: ollama serve"
fi
else
echo -e "${YELLOW}⚠ Ollama not installed${NC}"
fi
# Check Gemini
if [ -n "$GEMINI_API_KEY" ]; then
echo -e "${GREEN}✓ Gemini API key set${NC}"
BACKEND_AVAILABLE=true
if [ "$AI_BACKEND" != "Ollama" ]; then
AI_BACKEND="Gemini"
fi
else
echo -e "${YELLOW}⚠ GEMINI_API_KEY not set${NC}"
fi
echo ""
if [ "$BACKEND_AVAILABLE" = false ]; then
echo -e "${RED}✗ No AI backend available${NC}"
echo ""
echo "Please set up at least one backend:"
echo " - Ollama: brew install ollama && ollama serve && ollama pull qwen2.5-coder:7b"
echo " - Gemini: export GEMINI_API_KEY='your-key-here'"
exit 1
fi
echo -e "${GREEN}✓ Using AI Backend: $AI_BACKEND${NC}"
echo ""
# Run the test-conversation command with default test cases
echo "========================================="
echo "Running Automated Conversation Tests"
echo "========================================="
echo ""
echo "This will run 5 default test cases:"
echo " 1. Simple ROM introspection (dungeon query)"
echo " 2. Overworld tile search"
echo " 3. Multi-step conversation"
echo " 4. Command generation (tile placement)"
echo " 5. Map description"
echo ""
read -p "Press Enter to start tests (or Ctrl+C to cancel)..."
echo ""
# Run the tests
"$Z3ED" agent test-conversation --rom "$ROM_FILE" --verbose
TEST_EXIT_CODE=$?
echo ""
echo "========================================="
echo "Test Results"
echo "========================================="
if [ $TEST_EXIT_CODE -eq 0 ]; then
echo -e "${GREEN}✅ All tests completed successfully${NC}"
else
echo -e "${RED}❌ Tests failed with exit code $TEST_EXIT_CODE${NC}"
fi
echo ""
echo "Next Steps:"
echo " - Review the output above for any warnings"
echo " - Check if tool calls are being invoked correctly"
echo " - Verify JSON/table formatting is working"
echo " - Test with custom conversation file: z3ed agent test-conversation --file my_tests.json"
echo ""
exit $TEST_EXIT_CODE

View File

@@ -1,213 +0,0 @@
#!/bin/bash
# Integration test for Gemini AI Service (Phase 2)
set -e # Exit on error
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
PROJECT_ROOT="$SCRIPT_DIR/.."
Z3ED_BIN="$PROJECT_ROOT/build/bin/z3ed"
echo "🧪 Gemini AI Integration Test Suite"
echo "======================================"
# Color output helpers
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[0;33m'
NC='\033[0m' # No Color
pass() {
echo -e "${GREEN}${NC} $1"
}
fail() {
echo -e "${RED}${NC} $1"
exit 1
}
warn() {
echo -e "${YELLOW}${NC} $1"
}
# Test 1: z3ed executable exists
echo ""
echo "Test 1: z3ed executable exists"
if [ -f "$Z3ED_BIN" ]; then
pass "z3ed executable found at $Z3ED_BIN"
else
fail "z3ed executable not found. Run: cmake --build build --target z3ed"
fi
# Test 2: Check GEMINI_API_KEY environment variable
echo ""
echo "Test 2: Check GEMINI_API_KEY environment variable"
if [ -z "$GEMINI_API_KEY" ]; then
warn "GEMINI_API_KEY not set - skipping API tests"
echo " To test Gemini integration:"
echo " 1. Get API key at: https://makersuite.google.com/app/apikey"
echo " 2. Run: export GEMINI_API_KEY='your-api-key'"
echo " 3. Re-run this script"
# Still test that service factory handles missing key gracefully
echo ""
echo "Test 2a: Verify graceful fallback without API key"
unset YAZE_AI_PROVIDER
OUTPUT=$($Z3ED_BIN agent plan --prompt "Place a tree" 2>&1)
if echo "$OUTPUT" | grep -q "Using MockAIService"; then
pass "Service factory falls back to Mock when GEMINI_API_KEY missing"
else
fail "Service factory should fall back to Mock without API key"
fi
echo ""
echo "⏭️ Skipping remaining Gemini API tests (no API key)"
exit 0
fi
pass "GEMINI_API_KEY is set"
# Test 3: Verify Gemini model availability
echo ""
echo "Test 3: Verify Gemini model availability"
GEMINI_MODEL="${GEMINI_MODEL:-gemini-2.5-flash}"
echo " Testing with model: $GEMINI_MODEL"
# Quick API check
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
-H "x-goog-api-key: $GEMINI_API_KEY" \
"https://generativelanguage.googleapis.com/v1beta/models/$GEMINI_MODEL")
if [ "$HTTP_CODE" = "200" ]; then
pass "Gemini API accessible, model '$GEMINI_MODEL' available"
elif [ "$HTTP_CODE" = "401" ] || [ "$HTTP_CODE" = "403" ]; then
fail "Invalid Gemini API key (HTTP $HTTP_CODE)"
elif [ "$HTTP_CODE" = "404" ]; then
fail "Model '$GEMINI_MODEL' not found (HTTP 404)"
else
warn "Unexpected HTTP status: $HTTP_CODE (continuing anyway)"
fi
# Test 4: Generate commands with Gemini (simple prompt)
echo ""
echo "Test 4: Generate commands with Gemini (simple prompt)"
unset YAZE_AI_PROVIDER # Let service factory auto-detect from GEMINI_API_KEY
OUTPUT=$($Z3ED_BIN agent plan --prompt "Change the color of palette 0 index 5 to red" 2>&1)
if echo "$OUTPUT" | grep -q "Using Gemini AI"; then
pass "Service factory selected Gemini"
else
fail "Expected 'Using Gemini AI' in output, got: $OUTPUT"
fi
if echo "$OUTPUT" | grep -q "palette"; then
pass "Gemini generated palette-related commands"
echo " Generated commands:"
echo "$OUTPUT" | grep -E "^\s*-" | sed 's/^/ /'
else
fail "Expected palette commands in output, got: $OUTPUT"
fi
# Test 5: Generate commands with complex prompt
echo ""
echo "Test 5: Generate commands with complex prompt (overworld modification)"
OUTPUT=$($Z3ED_BIN agent plan --prompt "Place a tree at coordinates (10, 20) on overworld map 0" 2>&1)
if echo "$OUTPUT" | grep -q "overworld"; then
pass "Gemini generated overworld commands"
echo " Generated commands:"
echo "$OUTPUT" | grep -E "^\s*-" | sed 's/^/ /'
else
fail "Expected overworld commands in output, got: $OUTPUT"
fi
# Test 6: Test explicit provider selection
echo ""
echo "Test 6: Test explicit provider selection (YAZE_AI_PROVIDER=gemini)"
# Note: Current implementation doesn't have explicit "gemini" provider value
# It auto-detects from GEMINI_API_KEY. But we can test that Ollama doesn't override.
unset YAZE_AI_PROVIDER
OUTPUT=$($Z3ED_BIN agent plan --prompt "Export palette 0" 2>&1)
if echo "$OUTPUT" | grep -q "Using Gemini AI"; then
pass "Gemini selected when GEMINI_API_KEY present"
else
warn "Expected Gemini selection, got: $OUTPUT"
fi
# Test 7: Verify JSON response parsing
echo ""
echo "Test 7: Verify JSON response parsing (check for command format)"
OUTPUT=$($Z3ED_BIN agent plan --prompt "Set tile at (5,5) to 0x100" 2>&1)
# Commands should NOT have "z3ed" prefix (service should strip it)
if echo "$OUTPUT" | grep -E "^\s*- z3ed"; then
warn "Commands still contain 'z3ed' prefix (should be stripped)"
else
pass "Commands properly formatted without 'z3ed' prefix"
fi
# Test 8: Test multiple commands in response
echo ""
echo "Test 8: Test multiple commands generation"
OUTPUT=$($Z3ED_BIN agent plan --prompt "Export palette 0 to test.json, change color 5 to red, then import it back" 2>&1)
COMMAND_COUNT=$(echo "$OUTPUT" | grep -c -E "^\s*- " || true)
if [ "$COMMAND_COUNT" -ge 2 ]; then
pass "Gemini generated multiple commands ($COMMAND_COUNT commands)"
echo " Commands:"
echo "$OUTPUT" | grep -E "^\s*-" | sed 's/^/ /'
else
warn "Expected multiple commands, got $COMMAND_COUNT"
fi
# Test 9: Error handling - invalid API key
echo ""
echo "Test 9: Error handling with invalid API key"
SAVED_KEY="$GEMINI_API_KEY"
export GEMINI_API_KEY="invalid_key_12345"
OUTPUT=$($Z3ED_BIN agent plan --prompt "Test" 2>&1 || true)
if echo "$OUTPUT" | grep -q "Invalid Gemini API key\|Falling back to MockAIService"; then
pass "Service handles invalid API key gracefully"
else
warn "Expected error handling message, got: $OUTPUT"
fi
# Restore key
export GEMINI_API_KEY="$SAVED_KEY"
# Test 10: Model override via environment
echo ""
echo "Test 10: Model override via GEMINI_MODEL environment variable"
export GEMINI_MODEL="gemini-1.5-pro"
OUTPUT=$($Z3ED_BIN agent plan --prompt "Test" 2>&1)
if echo "$OUTPUT" | grep -q "gemini-1.5-pro"; then
pass "GEMINI_MODEL environment variable respected"
else
warn "Expected model override, got: $OUTPUT"
fi
unset GEMINI_MODEL
echo ""
echo "======================================"
echo "✅ Gemini Integration Test Suite Complete"
echo ""
echo "Summary:"
echo " - Gemini API accessible"
echo " - Command generation working"
echo " - Error handling functional"
echo " - JSON parsing robust"
echo ""
echo "Next steps:"
echo " 1. Test with various prompt types"
echo " 2. Measure response latency"
echo " 3. Compare accuracy with Ollama"
echo " 4. Consider rate limiting for production"

View File

@@ -1,172 +0,0 @@
#!/bin/bash
# Test script for Ollama AI service integration
# This script validates Phase 1 implementation
set -e
echo "🧪 Testing Ollama AI Service Integration (Phase 1)"
echo "=================================================="
echo ""
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
TESTS_PASSED=0
TESTS_FAILED=0
# Helper functions
pass_test() {
echo -e "${GREEN}✓ PASS:${NC} $1"
((TESTS_PASSED++))
}
fail_test() {
echo -e "${RED}✗ FAIL:${NC} $1"
((TESTS_FAILED++))
}
info() {
echo -e "${BLUE}${NC} $1"
}
# Test 1: Check if z3ed built successfully
echo "Test 1: z3ed executable exists"
if [ -f "./build/bin/z3ed" ]; then
pass_test "z3ed executable found"
else
fail_test "z3ed executable not found"
exit 1
fi
echo ""
# Test 2: Test MockAIService fallback (no LLM configured)
echo "Test 2: MockAIService fallback"
unset YAZE_AI_PROVIDER
unset GEMINI_API_KEY
unset CLAUDE_API_KEY
OUTPUT=$(./build/bin/z3ed agent plan --prompt "Place a tree" 2>&1 || true)
if echo "$OUTPUT" | grep -q "Using MockAIService"; then
pass_test "MockAIService activated when no LLM configured"
if echo "$OUTPUT" | grep -q "AI Agent Plan:"; then
pass_test "MockAIService generated commands"
fi
else
fail_test "MockAIService fallback not working"
fi
echo ""
# Test 3: Test Ollama provider selection (without server)
echo "Test 3: Ollama provider selection (without server running)"
export YAZE_AI_PROVIDER=ollama
OUTPUT=$(./build/bin/z3ed agent plan --prompt "Validate ROM" 2>&1 || true)
if echo "$OUTPUT" | grep -q "Ollama unavailable"; then
pass_test "Ollama health check detected unavailable server"
if echo "$OUTPUT" | grep -q "Falling back to MockAIService"; then
pass_test "Graceful fallback to MockAIService"
else
fail_test "Did not fall back to MockAIService"
fi
else
info "Note: If Ollama is running, this test will pass differently"
fi
echo ""
# Test 4: Check if Ollama is installed
echo "Test 4: Ollama installation check"
if command -v ollama &> /dev/null; then
pass_test "Ollama is installed"
# Test 5: Check if Ollama server is running
echo ""
echo "Test 5: Ollama server availability"
if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
pass_test "Ollama server is running"
# Test 6: Check for qwen2.5-coder model
echo ""
echo "Test 6: qwen2.5-coder:7b model availability"
if ollama list | grep -q "qwen2.5-coder:7b"; then
pass_test "Recommended model is available"
# Test 7: End-to-end test with Ollama
echo ""
echo "Test 7: End-to-end LLM command generation"
export YAZE_AI_PROVIDER=ollama
export OLLAMA_MODEL=qwen2.5-coder:7b
info "Testing: 'agent plan --prompt \"Validate the ROM\"'"
OUTPUT=$(./build/bin/z3ed agent plan --prompt "Validate the ROM" 2>&1)
if echo "$OUTPUT" | grep -q "Using Ollama AI"; then
pass_test "Ollama AI service activated"
else
fail_test "Ollama AI service not activated"
fi
if echo "$OUTPUT" | grep -q "AI Agent Plan:"; then
pass_test "Command generation completed"
# Check if reasonable commands were generated
if echo "$OUTPUT" | grep -q "rom"; then
pass_test "Generated ROM-related command"
else
fail_test "Generated command doesn't seem ROM-related"
fi
else
fail_test "No commands generated"
fi
echo ""
echo "Generated output:"
echo "---"
echo "$OUTPUT"
echo "---"
else
fail_test "qwen2.5-coder:7b not found"
info "Install with: ollama pull qwen2.5-coder:7b"
fi
else
fail_test "Ollama server not running"
info "Start with: ollama serve"
fi
else
fail_test "Ollama not installed"
info "Install with: brew install ollama (macOS)"
info "Or visit: https://ollama.com/download"
fi
echo ""
echo "=================================================="
echo "Test Summary:"
echo -e " ${GREEN}Passed: $TESTS_PASSED${NC}"
echo -e " ${RED}Failed: $TESTS_FAILED${NC}"
echo ""
if [ $TESTS_FAILED -eq 0 ]; then
echo -e "${GREEN}✓ All tests passed!${NC}"
echo ""
echo "Next steps:"
echo " 1. If Ollama tests were skipped, install and configure:"
echo " brew install ollama"
echo " ollama serve &"
echo " ollama pull qwen2.5-coder:7b"
echo ""
echo " 2. Try the full agent workflow:"
echo " export YAZE_AI_PROVIDER=ollama"
echo " ./build/bin/z3ed agent run --prompt \"Validate ROM\" --rom zelda3.sfc --sandbox"
echo ""
echo " 3. Check the implementation checklist:"
echo " docs/z3ed/LLM-IMPLEMENTATION-CHECKLIST.md"
exit 0
else
echo -e "${RED}✗ Some tests failed${NC}"
echo "Review the output above for details"
exit 1
fi