""" Celery configuration for the Auto SIGMA Rule Generator """ import os from celery import Celery from celery.schedules import crontab from kombu import Queue # Celery configuration broker_url = os.getenv('CELERY_BROKER_URL', 'redis://redis:6379/0') result_backend = os.getenv('CELERY_RESULT_BACKEND', 'redis://redis:6379/0') # Create Celery app celery_app = Celery( 'sigma_generator', broker=broker_url, backend=result_backend, include=[ 'tasks.bulk_tasks', 'tasks.sigma_tasks', 'tasks.data_sync_tasks', 'tasks.maintenance_tasks' ] ) # Celery configuration celery_app.conf.update( # Serialization task_serializer='json', accept_content=['json'], result_serializer='json', # Timezone timezone='UTC', enable_utc=True, # Task tracking task_track_started=True, task_send_sent_event=True, # Result backend settings result_expires=3600, # Results expire after 1 hour result_backend_transport_options={ 'master_name': 'mymaster', 'visibility_timeout': 3600, }, # Worker settings worker_prefetch_multiplier=1, task_acks_late=True, worker_max_tasks_per_child=1000, # Task routes - different queues for different types of tasks task_routes={ 'tasks.bulk_tasks.*': {'queue': 'bulk_processing'}, 'tasks.sigma_tasks.*': {'queue': 'sigma_generation'}, 'tasks.data_sync_tasks.*': {'queue': 'data_sync'}, }, # Queue definitions task_default_queue='default', task_queues=( Queue('default', routing_key='default'), Queue('bulk_processing', routing_key='bulk_processing'), Queue('sigma_generation', routing_key='sigma_generation'), Queue('data_sync', routing_key='data_sync'), ), # Retry settings task_default_retry_delay=60, # 1 minute task_max_retries=3, # Monitoring worker_send_task_events=True, # Optimized Beat schedule for daily workflow # WORKFLOW: NVD incremental -> Exploit syncs -> Reference sync -> SIGMA rules beat_schedule={ # STEP 1: NVD Incremental Update - Daily at 2:00 AM # This runs first to get the latest CVE data from NVD 'daily-nvd-incremental-update': { 'task': 'bulk_tasks.incremental_update_task', 'schedule': crontab(minute=0, hour=2), # Daily at 2:00 AM 'options': {'queue': 'bulk_processing'}, 'kwargs': {'batch_size': 100, 'skip_nvd': False, 'skip_nomi_sec': True} }, # STEP 2: Exploit Data Syncing - Daily starting at 3:00 AM # These run in parallel but start at different times to avoid conflicts # CISA KEV Sync - Daily at 3:00 AM (15 minutes after NVD) 'daily-cisa-kev-sync': { 'task': 'data_sync_tasks.sync_cisa_kev', 'schedule': crontab(minute=0, hour=3), # Daily at 3:00 AM 'options': {'queue': 'data_sync'}, 'kwargs': {'batch_size': 100} }, # Nomi-sec PoC Sync - Daily at 3:15 AM 'daily-nomi-sec-sync': { 'task': 'data_sync_tasks.sync_nomi_sec', 'schedule': crontab(minute=15, hour=3), # Daily at 3:15 AM 'options': {'queue': 'data_sync'}, 'kwargs': {'batch_size': 100} }, # GitHub PoC Sync - Daily at 3:30 AM 'daily-github-poc-sync': { 'task': 'data_sync_tasks.sync_github_poc', 'schedule': crontab(minute=30, hour=3), # Daily at 3:30 AM 'options': {'queue': 'data_sync'}, 'kwargs': {'batch_size': 50} }, # ExploitDB Sync - Daily at 3:45 AM 'daily-exploitdb-sync': { 'task': 'data_sync_tasks.sync_exploitdb', 'schedule': crontab(minute=45, hour=3), # Daily at 3:45 AM 'options': {'queue': 'data_sync'}, 'kwargs': {'batch_size': 30} }, # CVE2CAPEC MITRE ATT&CK Mapping Sync - Daily at 4:00 AM 'daily-cve2capec-sync': { 'task': 'data_sync_tasks.sync_cve2capec', 'schedule': crontab(minute=0, hour=4), # Daily at 4:00 AM 'options': {'queue': 'data_sync'}, 'kwargs': {'force_refresh': False} # Only refresh if cache is stale }, # ExploitDB Index Rebuild - Daily at 4:15 AM 'daily-exploitdb-index-build': { 'task': 'data_sync_tasks.build_exploitdb_index', 'schedule': crontab(minute=15, hour=4), # Daily at 4:15 AM 'options': {'queue': 'data_sync'} }, # STEP 3: Reference Content Sync - Daily at 5:00 AM # This is the longest-running task, starts after exploit syncs have time to complete 'daily-reference-content-sync': { 'task': 'data_sync_tasks.sync_reference_content', 'schedule': crontab(minute=0, hour=5), # Daily at 5:00 AM 'options': {'queue': 'data_sync'}, 'kwargs': {'batch_size': 30, 'max_cves': 200, 'force_resync': False} }, # STEP 4: SIGMA Rule Generation - Daily at 8:00 AM # This runs LAST after all other daily data sync jobs 'daily-sigma-rule-generation': { 'task': 'bulk_tasks.generate_enhanced_sigma_rules', 'schedule': crontab(minute=0, hour=8), # Daily at 8:00 AM 'options': {'queue': 'sigma_generation'} }, # LLM-Enhanced SIGMA Rule Generation - Daily at 9:00 AM # Additional LLM-based rule generation after standard rules 'daily-llm-sigma-generation': { 'task': 'sigma_tasks.generate_enhanced_rules', 'schedule': crontab(minute=0, hour=9), # Daily at 9:00 AM 'options': {'queue': 'sigma_generation'}, 'kwargs': {'cve_ids': None} # Process all CVEs with PoCs }, # MAINTENANCE TASKS # Database Cleanup - Weekly on Sunday at 1:00 AM (before daily workflow) 'weekly-database-cleanup': { 'task': 'tasks.maintenance_tasks.database_cleanup_comprehensive', 'schedule': crontab(minute=0, hour=1, day_of_week=0), # Sunday at 1:00 AM 'options': {'queue': 'default'}, 'kwargs': {'days_to_keep': 30, 'cleanup_failed_jobs': True, 'cleanup_logs': True} }, # Health Check - Every 15 minutes 'health-check-detailed': { 'task': 'tasks.maintenance_tasks.health_check_detailed', 'schedule': crontab(minute='*/15'), # Every 15 minutes 'options': {'queue': 'default'} }, # Celery result cleanup - Daily at 1:30 AM 'daily-cleanup-old-results': { 'task': 'tasks.maintenance_tasks.cleanup_old_results', 'schedule': crontab(minute=30, hour=1), # Daily at 1:30 AM 'options': {'queue': 'default'} }, }, ) # Configure logging celery_app.conf.update( worker_log_format='[%(asctime)s: %(levelname)s/%(processName)s] %(message)s', worker_task_log_format='[%(asctime)s: %(levelname)s/%(processName)s][%(task_name)s(%(task_id)s)] %(message)s', ) # Database session configuration for tasks from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker # Database configuration DATABASE_URL = os.getenv('DATABASE_URL', 'postgresql://cve_user:cve_password@db:5432/cve_sigma_db') # Create engine and session factory engine = create_engine(DATABASE_URL) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) def get_db_session(): """Get database session for tasks""" return SessionLocal() # Import all task modules to register them def register_tasks(): """Register all task modules""" try: from tasks import bulk_tasks, sigma_tasks, data_sync_tasks, maintenance_tasks print("All task modules registered successfully") except ImportError as e: print(f"Warning: Could not import some task modules: {e}") # Auto-register tasks when module is imported if __name__ != "__main__": register_tasks()