269 lines
7.4 KiB
Bash
Executable File
269 lines
7.4 KiB
Bash
Executable File
#!/bin/bash
|
|
# Symbol Extraction Tool - Extract symbols from compiled object files
|
|
# Creates a JSON database of all symbols and their defining object files
|
|
#
|
|
# Usage: ./scripts/extract-symbols.sh [BUILD_DIR] [OUTPUT_FILE]
|
|
# BUILD_DIR: Path to CMake build directory (default: build)
|
|
# OUTPUT_FILE: Path to output JSON file (default: build/symbol_database.json)
|
|
|
|
set -euo pipefail
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Configuration
|
|
BUILD_DIR="${1:-.}"
|
|
OUTPUT_FILE="${2:-${BUILD_DIR}/symbol_database.json}"
|
|
TEMP_SYMBOLS="${BUILD_DIR}/.temp_symbols.txt"
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(dirname "${SCRIPT_DIR}")"
|
|
|
|
# Platform detection
|
|
UNAME_S=$(uname -s)
|
|
IS_MACOS=false
|
|
IS_LINUX=false
|
|
IS_WINDOWS=false
|
|
|
|
case "${UNAME_S}" in
|
|
Darwin*) IS_MACOS=true ;;
|
|
Linux*) IS_LINUX=true ;;
|
|
MINGW*|MSYS*|CYGWIN*) IS_WINDOWS=true ;;
|
|
esac
|
|
|
|
# Validation
|
|
if [[ ! -d "${BUILD_DIR}" ]]; then
|
|
echo -e "${RED}Error: Build directory not found: ${BUILD_DIR}${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
echo -e "${BLUE}=== Symbol Extraction Tool ===${NC}"
|
|
echo -e "Build directory: ${BUILD_DIR}"
|
|
echo -e "Output file: ${OUTPUT_FILE}"
|
|
echo ""
|
|
|
|
# Function to extract symbols using nm (Unix/macOS)
|
|
extract_symbols_unix() {
|
|
local obj_file="$1"
|
|
local obj_name="${obj_file##*/}"
|
|
|
|
if ! nm -P "${obj_file}" 2>/dev/null | while read -r sym rest; do
|
|
# Filter out special symbols and undefined references
|
|
if [[ -n "${sym}" ]] && [[ "${rest}" != *"U"* ]]; then
|
|
# Get symbol type (T=text, D=data, R=read-only, etc.)
|
|
local sym_type=$(echo "${rest}" | awk '{print $1}')
|
|
if [[ "${sym_type}" != "U" ]]; then
|
|
echo "${sym}|${obj_name}|${sym_type}"
|
|
fi
|
|
fi
|
|
done; then
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
# Function to extract symbols using dumpbin (Windows)
|
|
extract_symbols_windows() {
|
|
local obj_file="$1"
|
|
local obj_name="${obj_file##*/}"
|
|
|
|
# Use dumpbin to extract symbols
|
|
if dumpbin /symbols "${obj_file}" 2>/dev/null | grep -E "^\s+[0-9A-F]+" | while read -r line; do
|
|
# Parse dumpbin output
|
|
local sym=$(echo "${line}" | awk '{print $NF}')
|
|
if [[ -n "${sym}" ]]; then
|
|
local sym_type="?" # Windows dumpbin doesn't clearly show type
|
|
echo "${sym}|${obj_name}|${sym_type}"
|
|
fi
|
|
done; then
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
# Function to collect all object files
|
|
collect_object_files() {
|
|
local obj_list="${BUILD_DIR}/.object_files.tmp"
|
|
> "${obj_list}"
|
|
|
|
# Find all .o files (Unix/macOS) and .obj files (Windows)
|
|
if ${IS_WINDOWS}; then
|
|
find "${BUILD_DIR}" -type f \( -name "*.obj" -o -name "*.o" \) 2>/dev/null >> "${obj_list}" || true
|
|
else
|
|
find "${BUILD_DIR}" -type f -name "*.o" 2>/dev/null >> "${obj_list}" || true
|
|
fi
|
|
|
|
echo "${obj_list}"
|
|
}
|
|
|
|
# Extract symbols from all object files
|
|
echo -e "${BLUE}Scanning for object files...${NC}"
|
|
OBJ_LIST=$(collect_object_files)
|
|
OBJ_COUNT=$(wc -l < "${OBJ_LIST}")
|
|
|
|
if [[ ${OBJ_COUNT} -eq 0 ]]; then
|
|
echo -e "${YELLOW}Warning: No object files found in ${BUILD_DIR}${NC}"
|
|
echo "Make sure to build the project first."
|
|
exit 1
|
|
fi
|
|
|
|
echo -e "Found ${GREEN}${OBJ_COUNT}${NC} object files"
|
|
echo ""
|
|
echo -e "${BLUE}Extracting symbols (this may take a moment)...${NC}"
|
|
|
|
# Process object files and extract symbols
|
|
: > "${TEMP_SYMBOLS}"
|
|
PROCESSED=0
|
|
FAILED=0
|
|
|
|
while IFS= read -r obj_file; do
|
|
[[ -z "${obj_file}" ]] && continue
|
|
|
|
if [[ ! -f "${obj_file}" ]]; then
|
|
echo -e "${YELLOW}Skipping (not found): ${obj_file}${NC}"
|
|
((FAILED++))
|
|
continue
|
|
fi
|
|
|
|
# Extract symbols based on platform
|
|
if ${IS_WINDOWS}; then
|
|
if extract_symbols_windows "${obj_file}" >> "${TEMP_SYMBOLS}" 2>/dev/null; then
|
|
((PROCESSED++))
|
|
else
|
|
((FAILED++))
|
|
fi
|
|
else
|
|
if extract_symbols_unix "${obj_file}" >> "${TEMP_SYMBOLS}" 2>/dev/null; then
|
|
((PROCESSED++))
|
|
else
|
|
((FAILED++))
|
|
fi
|
|
fi
|
|
|
|
# Progress indicator
|
|
if (( PROCESSED % 50 == 0 )); then
|
|
echo -ne "\r Processed: ${PROCESSED}/${OBJ_COUNT} objects"
|
|
fi
|
|
done < "${OBJ_LIST}"
|
|
|
|
echo -ne "\r Processed: ${GREEN}${PROCESSED}${NC}/${OBJ_COUNT} objects (${FAILED} failed) \n"
|
|
echo ""
|
|
|
|
# Generate JSON output
|
|
echo -e "${BLUE}Generating symbol database...${NC}"
|
|
|
|
# Start JSON
|
|
cat > "${OUTPUT_FILE}" << 'EOF'
|
|
{
|
|
"metadata": {
|
|
"platform": "",
|
|
"build_dir": "",
|
|
"timestamp": "",
|
|
"object_files_scanned": 0,
|
|
"total_symbols": 0
|
|
},
|
|
"conflicts": [],
|
|
"symbols": {}
|
|
}
|
|
EOF
|
|
|
|
# Use Python to generate proper JSON (more portable than jq)
|
|
python3 << PYTHON_EOF
|
|
import json
|
|
import sys
|
|
from datetime import datetime
|
|
from collections import defaultdict
|
|
|
|
# Read extracted symbols
|
|
symbol_dict = defaultdict(list)
|
|
total_symbols = 0
|
|
|
|
try:
|
|
with open("${TEMP_SYMBOLS}", "r") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
parts = line.split("|")
|
|
if len(parts) >= 2:
|
|
symbol = parts[0]
|
|
obj_file = parts[1]
|
|
sym_type = parts[2] if len(parts) > 2 else "?"
|
|
|
|
symbol_dict[symbol].append({
|
|
"object_file": obj_file,
|
|
"type": sym_type
|
|
})
|
|
total_symbols += 1
|
|
except Exception as e:
|
|
print(f"Error reading symbols: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Identify conflicts (symbols defined in multiple object files)
|
|
conflicts = []
|
|
for symbol, definitions in symbol_dict.items():
|
|
if len(definitions) > 1:
|
|
conflicts.append({
|
|
"symbol": symbol,
|
|
"count": len(definitions),
|
|
"definitions": definitions
|
|
})
|
|
|
|
# Sort conflicts by count (most duplicated first)
|
|
conflicts.sort(key=lambda x: x["count"], reverse=True)
|
|
|
|
# Build output JSON
|
|
output = {
|
|
"metadata": {
|
|
"platform": "${UNAME_S}",
|
|
"build_dir": "${BUILD_DIR}",
|
|
"timestamp": datetime.utcnow().isoformat() + "Z",
|
|
"object_files_scanned": ${PROCESSED},
|
|
"total_symbols": total_symbols,
|
|
"total_conflicts": len(conflicts)
|
|
},
|
|
"conflicts": conflicts,
|
|
"symbols": {}
|
|
}
|
|
|
|
# Add symbols to output (optional - only include conflicted symbols for smaller file)
|
|
for symbol, definitions in symbol_dict.items():
|
|
if len(definitions) > 1:
|
|
output["symbols"][symbol] = definitions
|
|
|
|
# Write JSON
|
|
try:
|
|
with open("${OUTPUT_FILE}", "w") as f:
|
|
json.dump(output, f, indent=2)
|
|
except Exception as e:
|
|
print(f"Error writing JSON: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
print(f"Symbol database written to: ${OUTPUT_FILE}")
|
|
print(f"Total symbols: {total_symbols}")
|
|
print(f"Conflicts found: {len(conflicts)}")
|
|
PYTHON_EOF
|
|
|
|
# Cleanup
|
|
rm -f "${TEMP_SYMBOLS}" "${OBJ_LIST}"
|
|
|
|
# Report results
|
|
if [[ -f "${OUTPUT_FILE}" ]]; then
|
|
echo -e "${GREEN}Success!${NC}"
|
|
CONFLICT_COUNT=$(python3 -c "import json; f = json.load(open('${OUTPUT_FILE}')); print(f['metadata'].get('total_conflicts', 0))" 2>/dev/null || echo "?")
|
|
|
|
if [[ "${CONFLICT_COUNT}" -gt 0 ]]; then
|
|
echo -e "${YELLOW}Found ${RED}${CONFLICT_COUNT}${YELLOW} symbol conflicts${NC}"
|
|
exit 1 # Exit with error if conflicts found
|
|
else
|
|
echo -e "${GREEN}No symbol conflicts detected!${NC}"
|
|
exit 0
|
|
fi
|
|
else
|
|
echo -e "${RED}Failed to generate symbol database${NC}"
|
|
exit 1
|
|
fi
|