auto_sigma_rule_generator/backend/routers/bulk_operations.py
bpmcdevitt a6fb367ed4 refactor: modularize backend architecture for improved maintainability
- Extract database models from monolithic main.py (2,373 lines) into organized modules
- Implement service layer pattern with dedicated business logic classes
- Split API endpoints into modular FastAPI routers by functionality
- Add centralized configuration management with environment variable handling
- Create proper separation of concerns across data, service, and presentation layers

**Architecture Changes:**
- models/: SQLAlchemy database models (CVE, SigmaRule, RuleTemplate, BulkProcessingJob)
- config/: Centralized settings and database configuration
- services/: Business logic (CVEService, SigmaRuleService, GitHubExploitAnalyzer)
- routers/: Modular API endpoints (cves, sigma_rules, bulk_operations, llm_operations)
- schemas/: Pydantic request/response models

**Key Improvements:**
- 95% reduction in main.py size (2,373 → 120 lines)
- Updated 15+ backend files with proper import structure
- Eliminated circular dependencies and tight coupling
- Enhanced testability with isolated service components
- Better code organization for team collaboration

**Backward Compatibility:**
- All API endpoints maintain same URLs and behavior
- Zero breaking changes to existing functionality
- Database schema unchanged
- Environment variables preserved

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-14 17:51:23 -05:00

120 lines
No EOL
4.4 KiB
Python

from typing import List, Optional
from fastapi import APIRouter, HTTPException, Depends, BackgroundTasks
from sqlalchemy.orm import Session
from config.database import get_db
from models import BulkProcessingJob, CVE, SigmaRule
from schemas import BulkSeedRequest, NomiSecSyncRequest, GitHubPoCSyncRequest, ExploitDBSyncRequest, CISAKEVSyncRequest, ReferenceSyncRequest
from services import CVEService, SigmaRuleService
router = APIRouter(prefix="/api", tags=["bulk-operations"])
@router.post("/bulk-seed")
async def bulk_seed(request: BulkSeedRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
"""Start bulk seeding operation"""
from bulk_seeder import BulkSeeder
async def run_bulk_seed():
try:
seeder = BulkSeeder(db)
result = await seeder.full_bulk_seed(
start_year=request.start_year,
end_year=request.end_year,
skip_nvd=request.skip_nvd,
skip_nomi_sec=request.skip_nomi_sec
)
print(f"Bulk seed completed: {result}")
except Exception as e:
print(f"Bulk seed failed: {str(e)}")
background_tasks.add_task(run_bulk_seed)
return {"message": "Bulk seeding started", "status": "running"}
@router.post("/incremental-update")
async def incremental_update(background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
"""Start incremental update using NVD modified/recent feeds"""
from nvd_bulk_processor import NVDBulkProcessor
async def run_incremental_update():
try:
processor = NVDBulkProcessor(db)
result = await processor.incremental_update()
print(f"Incremental update completed: {result}")
except Exception as e:
print(f"Incremental update failed: {str(e)}")
background_tasks.add_task(run_incremental_update)
return {"message": "Incremental update started", "status": "running"}
@router.get("/bulk-jobs")
async def get_bulk_jobs(db: Session = Depends(get_db)):
"""Get all bulk processing jobs"""
jobs = db.query(BulkProcessingJob).order_by(BulkProcessingJob.created_at.desc()).limit(20).all()
result = []
for job in jobs:
job_dict = {
'id': str(job.id),
'job_type': job.job_type,
'status': job.status,
'year': job.year,
'total_items': job.total_items,
'processed_items': job.processed_items,
'failed_items': job.failed_items,
'error_message': job.error_message,
'job_metadata': job.job_metadata,
'started_at': job.started_at,
'completed_at': job.completed_at,
'cancelled_at': job.cancelled_at,
'created_at': job.created_at
}
result.append(job_dict)
return result
@router.get("/bulk-status")
async def get_bulk_status(db: Session = Depends(get_db)):
"""Get comprehensive bulk processing status"""
from bulk_seeder import BulkSeeder
seeder = BulkSeeder(db)
status = await seeder.get_seeding_status()
return status
@router.get("/poc-stats")
async def get_poc_stats(db: Session = Depends(get_db)):
"""Get PoC-related statistics"""
from sqlalchemy import func, text
total_cves = db.query(CVE).count()
cves_with_pocs = db.query(CVE).filter(CVE.poc_count > 0).count()
# Get PoC quality distribution
quality_distribution = db.execute(text("""
SELECT
COUNT(*) as total,
AVG(poc_count) as avg_poc_count,
MAX(poc_count) as max_poc_count
FROM cves
WHERE poc_count > 0
""")).fetchone()
# Get rules with PoC data
total_rules = db.query(SigmaRule).count()
exploit_based_rules = db.query(SigmaRule).filter(SigmaRule.exploit_based == True).count()
return {
"total_cves": total_cves,
"cves_with_pocs": cves_with_pocs,
"poc_coverage_percentage": round((cves_with_pocs / total_cves * 100), 2) if total_cves > 0 else 0,
"average_pocs_per_cve": round(quality_distribution.avg_poc_count, 2) if quality_distribution.avg_poc_count else 0,
"max_pocs_for_single_cve": quality_distribution.max_poc_count or 0,
"total_rules": total_rules,
"exploit_based_rules": exploit_based_rules,
"exploit_based_percentage": round((exploit_based_rules / total_rules * 100), 2) if total_rules > 0 else 0
}