#!/usr/bin/env python3 """ Initial setup script that runs once on first boot to populate the database. This script checks if initial data seeding is needed and triggers it via Celery. """ import os import sys import time import logging from datetime import datetime, timedelta from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker from sqlalchemy.exc import OperationalError # Add the current directory to path so we can import our modules sys.path.append(os.path.dirname(os.path.abspath(__file__))) logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Database configuration DATABASE_URL = os.getenv('DATABASE_URL', 'postgresql://cve_user:cve_password@db:5432/cve_sigma_db') def wait_for_database(max_retries: int = 30, delay: int = 5) -> bool: """Wait for database to be ready""" logger.info("Waiting for database to be ready...") for attempt in range(max_retries): try: engine = create_engine(DATABASE_URL) with engine.connect() as conn: conn.execute(text("SELECT 1")) logger.info("✅ Database is ready!") return True except OperationalError as e: logger.info(f"Attempt {attempt + 1}/{max_retries}: Database not ready yet ({e})") except Exception as e: logger.error(f"Unexpected error connecting to database: {e}") if attempt < max_retries - 1: time.sleep(delay) logger.error("❌ Database failed to become ready") return False def check_initial_setup_needed() -> bool: """Check if initial setup is needed by examining the database state""" try: engine = create_engine(DATABASE_URL) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) with SessionLocal() as session: # Check if we have any CVEs in the database result = session.execute(text("SELECT COUNT(*) FROM cves")).fetchone() cve_count = result[0] if result else 0 logger.info(f"Current CVE count in database: {cve_count}") # Check if we have any bulk processing jobs that completed successfully bulk_jobs_result = session.execute(text(""" SELECT COUNT(*) FROM bulk_processing_jobs WHERE job_type = 'nvd_bulk_seed' AND status = 'completed' AND created_at > NOW() - INTERVAL '30 days' """)).fetchone() recent_bulk_jobs = bulk_jobs_result[0] if bulk_jobs_result else 0 logger.info(f"Recent successful bulk seed jobs: {recent_bulk_jobs}") # Initial setup needed if: # 1. Very few CVEs (less than 1000) AND # 2. No recent successful bulk seed jobs initial_setup_needed = cve_count < 1000 and recent_bulk_jobs == 0 if initial_setup_needed: logger.info("🔄 Initial setup is needed - will trigger full NVD sync") else: logger.info("✅ Initial setup already completed - database has sufficient data") return initial_setup_needed except Exception as e: logger.error(f"Error checking initial setup status: {e}") # If we can't check, assume setup is needed return True def trigger_initial_bulk_seed(): """Trigger initial bulk seed via Celery""" try: # Import here to avoid circular dependencies from celery_config import celery_app from tasks.bulk_tasks import full_bulk_seed_task logger.info("🚀 Triggering initial full NVD bulk seed...") # Start a comprehensive bulk seed job # Start from 2020 for faster initial setup, can be adjusted task_result = full_bulk_seed_task.delay( start_year=2020, # Start from 2020 for faster initial setup end_year=None, # Current year skip_nvd=False, skip_nomi_sec=True, # Skip nomi-sec initially, will be done daily skip_exploitdb=True, # Skip exploitdb initially, will be done daily skip_cisa_kev=True # Skip CISA KEV initially, will be done daily ) logger.info(f"✅ Initial bulk seed task started with ID: {task_result.id}") logger.info(f"Monitor progress at: http://localhost:5555/task/{task_result.id}") return task_result.id except Exception as e: logger.error(f"❌ Failed to trigger initial bulk seed: {e}") return None def create_initial_setup_marker(): """Create a marker to indicate initial setup was attempted""" try: engine = create_engine(DATABASE_URL) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) with SessionLocal() as session: # Insert a marker record session.execute(text(""" INSERT INTO bulk_processing_jobs (job_type, status, job_metadata, created_at, started_at) VALUES ('initial_setup_marker', 'completed', '{"purpose": "initial_setup_marker"}', NOW(), NOW()) ON CONFLICT DO NOTHING """)) session.commit() logger.info("✅ Created initial setup marker") except Exception as e: logger.error(f"Error creating initial setup marker: {e}") def main(): """Main initial setup function""" logger.info("🚀 Starting initial setup check...") # Step 1: Wait for database if not wait_for_database(): logger.error("❌ Initial setup failed: Database not available") sys.exit(1) # Step 2: Check if initial setup is needed if not check_initial_setup_needed(): logger.info("🎉 Initial setup not needed - database already populated") sys.exit(0) # Step 3: Wait for Celery to be ready logger.info("Waiting for Celery workers to be ready...") time.sleep(10) # Give Celery workers time to start # Step 4: Trigger initial bulk seed task_id = trigger_initial_bulk_seed() if task_id: # Step 5: Create marker create_initial_setup_marker() logger.info("🎉 Initial setup triggered successfully!") logger.info(f"Task ID: {task_id}") logger.info("The system will begin daily scheduled tasks once initial setup completes.") sys.exit(0) else: logger.error("❌ Initial setup failed") sys.exit(1) if __name__ == "__main__": main()