This commit introduces major performance improvements and migrates from custom job scheduling to Celery Beat for better reliability and scalability. ### 🚀 Performance Optimizations **CVE2CAPEC Client Performance (Fixed startup blocking)** - Implement lazy loading with 24-hour cache for CVE2CAPEC mappings - Add background task for CVE2CAPEC sync (data_sync_tasks.sync_cve2capec) - Remove blocking data fetch during client initialization - API endpoint: POST /api/sync-cve2capec **ExploitDB Client Performance (Fixed webapp request blocking)** - Implement global file index cache to prevent rebuilding on every request - Add lazy loading with 24-hour cache expiry for 46K+ exploit index - Background task for index building (data_sync_tasks.build_exploitdb_index) - API endpoint: POST /api/build-exploitdb-index ### 🔄 Celery Migration & Scheduling **Celery Beat Integration** - Migrate from custom job scheduler to Celery Beat for reliability - Remove 'finetuned' LLM provider (logic moved to ollama container) - Optimized daily workflow with proper timing and dependencies **New Celery Tasks Structure** - tasks/bulk_tasks.py - NVD bulk processing and SIGMA generation - tasks/data_sync_tasks.py - All data synchronization tasks - tasks/maintenance_tasks.py - System maintenance and cleanup - tasks/sigma_tasks.py - SIGMA rule generation tasks **Daily Schedule (Optimized)** ``` 1:00 AM → Weekly cleanup (Sundays) 1:30 AM → Daily result cleanup 2:00 AM → NVD incremental update 3:00 AM → CISA KEV sync 3:15 AM → Nomi-sec PoC sync 3:30 AM → GitHub PoC sync 3:45 AM → ExploitDB sync 4:00 AM → CVE2CAPEC MITRE ATT&CK sync 4:15 AM → ExploitDB index rebuild 5:00 AM → Reference content sync 8:00 AM → SIGMA rule generation 9:00 AM → LLM-enhanced SIGMA generation Every 15min → Health checks ``` ### 🐳 Docker & Infrastructure **Enhanced Docker Setup** - Ollama setup with integrated SIGMA model creation (setup_ollama_with_sigma.py) - Initial database population check and trigger (initial_setup.py) - Proper service dependencies and health checks - Remove manual post-rebuild script requirements **Service Architecture** - Celery worker with 4-queue system (default, bulk_processing, sigma_generation, data_sync) - Flower monitoring dashboard (localhost:5555) - Redis as message broker and result backend ### 🎯 API Improvements **Background Task Endpoints** - GitHub PoC sync now uses Celery (was blocking backend) - All sync operations return task IDs and monitoring URLs - Consistent error handling and progress tracking **New Endpoints** - POST /api/sync-cve2capec - CVE2CAPEC mapping sync - POST /api/build-exploitdb-index - ExploitDB index rebuild ### 📁 Cleanup **Removed Files** - fix_sigma_model.sh (replaced by setup_ollama_with_sigma.py) - Various test_* and debug_* files no longer needed - Old training scripts related to removed 'finetuned' provider - Utility scripts replaced by Docker services ### 🔧 Configuration **Key Files Added/Modified** - backend/celery_config.py - Complete Celery configuration - backend/initial_setup.py - First-boot database population - backend/setup_ollama_with_sigma.py - Integrated Ollama setup - CLAUDE.md - Project documentation and development guide 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
201 lines
5.8 KiB
YAML
201 lines
5.8 KiB
YAML
services:
|
|
db:
|
|
image: postgres:15
|
|
environment:
|
|
POSTGRES_DB: cve_sigma_db
|
|
POSTGRES_USER: cve_user
|
|
POSTGRES_PASSWORD: cve_password
|
|
volumes:
|
|
- postgres_data:/var/lib/postgresql/data
|
|
- ./init.sql:/docker-entrypoint-initdb.d/init.sql
|
|
ports:
|
|
- "5432:5432"
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U cve_user -d cve_sigma_db"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
backend:
|
|
build: ./backend
|
|
ports:
|
|
- "8000:8000"
|
|
environment:
|
|
DATABASE_URL: postgresql://cve_user:cve_password@db:5432/cve_sigma_db
|
|
CELERY_BROKER_URL: redis://redis:6379/0
|
|
CELERY_RESULT_BACKEND: redis://redis:6379/0
|
|
NVD_API_KEY: ${NVD_API_KEY:-}
|
|
GITHUB_TOKEN: ${GITHUB_TOKEN}
|
|
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
|
|
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
|
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://ollama:11434}
|
|
LLM_PROVIDER: ${LLM_PROVIDER:-ollama}
|
|
LLM_MODEL: ${LLM_MODEL:-llama3.2}
|
|
LLM_ENABLED: ${LLM_ENABLED:-true}
|
|
FINETUNED_MODEL_PATH: ${FINETUNED_MODEL_PATH:-/app/models/sigma_llama_finetuned}
|
|
HUGGING_FACE_TOKEN: ${HUGGING_FACE_TOKEN}
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
redis:
|
|
condition: service_started
|
|
ollama-setup:
|
|
condition: service_completed_successfully
|
|
volumes:
|
|
- ./backend:/app
|
|
- ./github_poc_collector:/github_poc_collector
|
|
- ./exploit-db-mirror:/app/exploit-db-mirror
|
|
- ./models:/app/models
|
|
command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
|
|
|
frontend:
|
|
build: ./frontend
|
|
ports:
|
|
- "3000:3000"
|
|
environment:
|
|
REACT_APP_API_URL: http://localhost:8000
|
|
volumes:
|
|
- ./frontend:/app
|
|
- /app/node_modules
|
|
command: npm start
|
|
|
|
redis:
|
|
image: redis:7-alpine
|
|
ports:
|
|
- "6379:6379"
|
|
command: redis-server --appendonly yes
|
|
volumes:
|
|
- redis_data:/data
|
|
|
|
ollama:
|
|
image: ollama/ollama:latest
|
|
ports:
|
|
- "11434:11434"
|
|
volumes:
|
|
- ollama_data:/root/.ollama
|
|
environment:
|
|
- OLLAMA_HOST=0.0.0.0
|
|
restart: unless-stopped
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 5G
|
|
reservations:
|
|
memory: 3G
|
|
|
|
ollama-setup:
|
|
build: ./backend
|
|
depends_on:
|
|
- ollama
|
|
environment:
|
|
OLLAMA_BASE_URL: http://ollama:11434
|
|
LLM_MODEL: llama3.2
|
|
volumes:
|
|
- ./backend:/app
|
|
command: python setup_ollama_with_sigma.py
|
|
restart: "no"
|
|
|
|
initial-setup:
|
|
build: ./backend
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
redis:
|
|
condition: service_started
|
|
celery-worker:
|
|
condition: service_healthy
|
|
environment:
|
|
DATABASE_URL: postgresql://cve_user:cve_password@db:5432/cve_sigma_db
|
|
CELERY_BROKER_URL: redis://redis:6379/0
|
|
CELERY_RESULT_BACKEND: redis://redis:6379/0
|
|
volumes:
|
|
- ./backend:/app
|
|
command: python initial_setup.py
|
|
restart: "no"
|
|
|
|
celery-worker:
|
|
build: ./backend
|
|
command: celery -A celery_config worker --loglevel=info --concurrency=4
|
|
environment:
|
|
DATABASE_URL: postgresql://cve_user:cve_password@db:5432/cve_sigma_db
|
|
CELERY_BROKER_URL: redis://redis:6379/0
|
|
CELERY_RESULT_BACKEND: redis://redis:6379/0
|
|
NVD_API_KEY: ${NVD_API_KEY:-}
|
|
GITHUB_TOKEN: ${GITHUB_TOKEN}
|
|
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
|
|
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
|
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://ollama:11434}
|
|
LLM_PROVIDER: ${LLM_PROVIDER:-ollama}
|
|
LLM_MODEL: ${LLM_MODEL:-llama3.2}
|
|
LLM_ENABLED: ${LLM_ENABLED:-true}
|
|
FINETUNED_MODEL_PATH: ${FINETUNED_MODEL_PATH:-/app/models/sigma_llama_finetuned}
|
|
HUGGING_FACE_TOKEN: ${HUGGING_FACE_TOKEN}
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
redis:
|
|
condition: service_started
|
|
ollama-setup:
|
|
condition: service_completed_successfully
|
|
volumes:
|
|
- ./backend:/app
|
|
- ./github_poc_collector:/github_poc_collector
|
|
- ./exploit-db-mirror:/app/exploit-db-mirror
|
|
- ./models:/app/models
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD", "celery", "-A", "celery_config", "inspect", "ping"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
celery-beat:
|
|
build: ./backend
|
|
command: celery -A celery_config beat --loglevel=info --pidfile=/tmp/celerybeat.pid
|
|
environment:
|
|
DATABASE_URL: postgresql://cve_user:cve_password@db:5432/cve_sigma_db
|
|
CELERY_BROKER_URL: redis://redis:6379/0
|
|
CELERY_RESULT_BACKEND: redis://redis:6379/0
|
|
NVD_API_KEY: ${NVD_API_KEY:-}
|
|
GITHUB_TOKEN: ${GITHUB_TOKEN}
|
|
OPENAI_API_KEY: ${OPENAI_API_KEY:-}
|
|
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY:-}
|
|
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://ollama:11434}
|
|
LLM_PROVIDER: ${LLM_PROVIDER:-ollama}
|
|
LLM_MODEL: ${LLM_MODEL:-llama3.2}
|
|
LLM_ENABLED: ${LLM_ENABLED:-true}
|
|
FINETUNED_MODEL_PATH: ${FINETUNED_MODEL_PATH:-/app/models/sigma_llama_finetuned}
|
|
HUGGING_FACE_TOKEN: ${HUGGING_FACE_TOKEN}
|
|
depends_on:
|
|
db:
|
|
condition: service_healthy
|
|
redis:
|
|
condition: service_started
|
|
celery-worker:
|
|
condition: service_healthy
|
|
volumes:
|
|
- ./backend:/app
|
|
- ./github_poc_collector:/github_poc_collector
|
|
- ./exploit-db-mirror:/app/exploit-db-mirror
|
|
- ./models:/app/models
|
|
restart: unless-stopped
|
|
|
|
flower:
|
|
build: ./backend
|
|
command: celery -A celery_config flower --port=5555
|
|
ports:
|
|
- "5555:5555"
|
|
environment:
|
|
CELERY_BROKER_URL: redis://redis:6379/0
|
|
CELERY_RESULT_BACKEND: redis://redis:6379/0
|
|
depends_on:
|
|
redis:
|
|
condition: service_started
|
|
celery-worker:
|
|
condition: service_healthy
|
|
restart: unless-stopped
|
|
|
|
volumes:
|
|
postgres_data:
|
|
redis_data:
|
|
ollama_data:
|