Migrate task tracking from BulkProcessingJob to Celery-based monitoring

- Remove BulkProcessingJob model and related endpoints from main.py
- Update CLAUDE.md to reference Flower dashboard for task monitoring
- Simplify enhanced_sigma_generator.py to use unified LLM client
- Remove job tracking logic from mcdevitt_poc_client.py
- Enhance CVE API with search and pagination support
- Update setup_ollama_with_sigma.py with improved checkpoint handling

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Brendan McDevitt 2025-07-21 09:23:26 -05:00
parent 49963338d3
commit d51f3ea402
9 changed files with 615 additions and 1215 deletions

View file

@ -180,10 +180,10 @@ make setup # Initial setup (creates .env from .env.example)
- **BulkProcessingJob**: Job tracking for bulk operations
### Frontend Structure (Enhanced)
- **Four Main Tabs**: Dashboard, CVEs, SIGMA Rules, Bulk Jobs
- **Enhanced Dashboard**: PoC coverage statistics, bulk processing controls
- **Bulk Jobs Tab**: Real-time job monitoring and system status
- **Three Main Tabs**: Dashboard, CVEs, SIGMA Rules
- **Enhanced Dashboard**: PoC coverage statistics, data synchronization controls
- **Enhanced CVE/Rule Display**: PoC quality indicators, exploit-based tagging
- **Task Monitoring**: Via Flower dashboard (http://localhost:5555)
### Data Processing Flow
1. **Bulk Seeding**: NVD JSON downloads → Database storage → nomi-sec PoC sync → Enhanced rule generation
@ -245,7 +245,8 @@ The application now uses an advanced rule generation process:
- **Frontend tests**: `npm test` (in frontend directory)
- **Backend testing**: Use standalone scripts for bulk operations
- **API testing**: Use `/docs` endpoint for Swagger UI
- **Bulk Processing**: Monitor via `/api/bulk-jobs` and frontend Bulk Jobs tab
- **Task Monitoring**: Monitor via Flower dashboard at http://localhost:5555
- **Celery Tasks**: Use `celery -A celery_config worker --loglevel=info` for debugging
### Security Considerations
- **API Keys**: Store NVD and GitHub tokens in environment variables

View file

@ -12,7 +12,7 @@ from typing import Dict, List, Optional, Tuple
from sqlalchemy.orm import Session
import re
from llm_client import LLMClient
from enhanced_llm_client import EnhancedLLMClient
# Enhanced LLM client functionality is now integrated in LLMClient
from yaml_metadata_generator import YAMLMetadataGenerator
from cve2capec_client import CVE2CAPECClient
from poc_analyzer import PoCAnalyzer
@ -26,8 +26,7 @@ class EnhancedSigmaGenerator:
def __init__(self, db_session: Session, llm_provider: str = None, llm_model: str = None):
self.db_session = db_session
self.llm_client = LLMClient(provider=llm_provider, model=llm_model) # Keep for backward compatibility
self.enhanced_llm_client = EnhancedLLMClient(provider=llm_provider, model=llm_model)
self.llm_client = LLMClient(provider=llm_provider, model=llm_model)
self.yaml_generator = YAMLMetadataGenerator(db_session)
self.cve2capec_client = CVE2CAPECClient()
self.poc_analyzer = PoCAnalyzer()
@ -50,16 +49,16 @@ class EnhancedSigmaGenerator:
generation_method = "template"
template = None
if use_hybrid and self.enhanced_llm_client.is_available() and best_poc:
logger.info(f"Attempting hybrid rule generation for {cve.cve_id} using {self.enhanced_llm_client.provider}")
if use_hybrid and self.llm_client.is_available() and best_poc:
logger.info(f"Attempting hybrid rule generation for {cve.cve_id} using {self.llm_client.provider}")
rule_content = await self._generate_hybrid_rule(cve, best_poc, poc_data)
if rule_content:
generation_method = f"hybrid_{self.enhanced_llm_client.provider}"
generation_method = f"hybrid_{self.llm_client.provider}"
# Create a dummy template object for hybrid-generated rules
class HybridTemplate:
def __init__(self, provider_name):
self.template_name = f"Hybrid Generated ({provider_name})"
template = HybridTemplate(self.enhanced_llm_client.provider)
template = HybridTemplate(self.llm_client.provider)
# Fallback to original LLM-enhanced generation
elif use_llm and self.llm_client.is_available() and best_poc:
@ -161,20 +160,12 @@ class EnhancedSigmaGenerator:
poc_analysis = self.poc_analyzer.analyze_poc(poc_content, cve.cve_id)
# Step 3: Generate detection sections using LLM
logger.info(f"Generating detection sections for {cve.cve_id}")
detection_sections = await self.enhanced_llm_client.generate_detection_sections(
yaml_metadata, poc_analysis, cve.cve_id
)
if not detection_sections:
logger.warning(f"Failed to generate detection sections for {cve.cve_id}")
return None
# Step 4: Combine metadata with detection sections
logger.info(f"Combining YAML sections for {cve.cve_id}")
complete_rule = self.enhanced_llm_client.combine_yaml_sections(
yaml_metadata, detection_sections
# Step 3: Generate complete SIGMA rule using LLM
logger.info(f"Generating SIGMA rule for {cve.cve_id}")
complete_rule = await self.llm_client.generate_sigma_rule(
cve_id=cve.cve_id,
poc_content=poc_content,
cve_description=cve.description or ''
)
if complete_rule:

View file

@ -1,7 +1,7 @@
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from sqlalchemy import create_engine, Column, String, Text, DECIMAL, TIMESTAMP, Boolean, ARRAY, Integer, JSON, func
from sqlalchemy import create_engine, Column, String, Text, DECIMAL, TIMESTAMP, Boolean, ARRAY, Integer, JSON, func, or_
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, Session
from sqlalchemy.dialects.postgresql import UUID
@ -28,9 +28,6 @@ from cve2capec_client import CVE2CAPECClient
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Global job tracking
running_jobs = {}
job_cancellation_flags = {}
# Database setup
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://cve_user:cve_password@localhost:5432/cve_sigma_db")
@ -96,22 +93,6 @@ class RuleTemplate(Base):
description = Column(Text)
created_at = Column(TIMESTAMP, default=datetime.utcnow)
class BulkProcessingJob(Base):
__tablename__ = "bulk_processing_jobs"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
job_type = Column(String(50), nullable=False) # 'nvd_bulk_seed', 'nomi_sec_sync', 'incremental_update'
status = Column(String(20), default='pending') # 'pending', 'running', 'completed', 'failed', 'cancelled'
year = Column(Integer) # For year-based processing
total_items = Column(Integer, default=0)
processed_items = Column(Integer, default=0)
failed_items = Column(Integer, default=0)
error_message = Column(Text)
job_metadata = Column(JSON) # Additional job-specific data
started_at = Column(TIMESTAMP)
completed_at = Column(TIMESTAMP)
cancelled_at = Column(TIMESTAMP)
created_at = Column(TIMESTAMP, default=datetime.utcnow)
# Pydantic models
class CVEResponse(BaseModel):
@ -123,6 +104,8 @@ class CVEResponse(BaseModel):
published_date: Optional[datetime] = None
affected_products: Optional[List[str]] = None
reference_urls: Optional[List[str]] = None
poc_count: Optional[int] = 0
poc_data: Optional[dict] = {}
class Config:
from_attributes = True
@ -850,9 +833,38 @@ except ImportError as e:
except Exception as e:
logger.error(f"Error loading Celery job routes: {e}")
@app.get("/api/cves", response_model=List[CVEResponse])
async def get_cves(skip: int = 0, limit: int = 50, db: Session = Depends(get_db)):
cves = db.query(CVE).order_by(CVE.published_date.desc()).offset(skip).limit(limit).all()
@app.get("/api/cves", response_model=dict)
async def get_cves(
skip: int = 0,
limit: int = 50,
search: Optional[str] = None,
severity: Optional[str] = None,
db: Session = Depends(get_db)
):
# Build query with filters
query = db.query(CVE)
# Search filter
if search:
search_filter = f"%{search}%"
query = query.filter(
or_(
CVE.cve_id.ilike(search_filter),
CVE.description.ilike(search_filter),
CVE.affected_products.any(search_filter)
)
)
# Severity filter
if severity:
query = query.filter(CVE.severity.ilike(severity))
# Get total count for pagination
total_count = query.count()
# Apply pagination and ordering
cves = query.order_by(CVE.published_date.desc()).offset(skip).limit(limit).all()
# Convert UUID to string for each CVE
result = []
for cve in cves:
@ -864,10 +876,19 @@ async def get_cves(skip: int = 0, limit: int = 50, db: Session = Depends(get_db)
'severity': cve.severity,
'published_date': cve.published_date,
'affected_products': cve.affected_products,
'reference_urls': cve.reference_urls
'reference_urls': cve.reference_urls,
'poc_count': cve.poc_count or 0,
'poc_data': cve.poc_data or {}
}
result.append(CVEResponse(**cve_dict))
return result
return {
'cves': result,
'total': total_count,
'skip': skip,
'limit': limit,
'has_more': skip + limit < total_count
}
@app.get("/api/cves/{cve_id}", response_model=CVEResponse)
async def get_cve(cve_id: str, db: Session = Depends(get_db)):
@ -883,7 +904,9 @@ async def get_cve(cve_id: str, db: Session = Depends(get_db)):
'severity': cve.severity,
'published_date': cve.published_date,
'affected_products': cve.affected_products,
'reference_urls': cve.reference_urls
'reference_urls': cve.reference_urls,
'poc_count': cve.poc_count or 0,
'poc_data': cve.poc_data or {}
}
return CVEResponse(**cve_dict)
@ -1171,186 +1194,48 @@ async def sync_github_pocs(request: GitHubPoCSyncRequest,
raise HTTPException(status_code=500, detail=f"Failed to start GitHub PoC sync: {e}")
@app.post("/api/sync-exploitdb")
async def sync_exploitdb(background_tasks: BackgroundTasks,
request: ExploitDBSyncRequest,
db: Session = Depends(get_db)):
async def sync_exploitdb(request: ExploitDBSyncRequest, db: Session = Depends(get_db)):
"""Synchronize ExploitDB data from git mirror"""
# Create job record
job = BulkProcessingJob(
job_type='exploitdb_sync',
status='pending',
job_metadata={
'cve_id': request.cve_id,
'batch_size': request.batch_size
try:
# Import Celery task
from tasks.data_sync_tasks import sync_exploitdb_task
# Start Celery task
task_result = sync_exploitdb_task.delay(batch_size=request.batch_size)
return {
"message": f"ExploitDB sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"),
"status": "started",
"task_id": task_result.id,
"monitor_url": f"http://localhost:5555/task/{task_result.id}",
"batch_size": request.batch_size
}
)
db.add(job)
db.commit()
db.refresh(job)
job_id = str(job.id)
running_jobs[job_id] = job
job_cancellation_flags[job_id] = False
async def sync_task():
# Create a new database session for the background task
task_db = SessionLocal()
try:
# Get the job in the new session
task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first()
if not task_job:
logger.error(f"Job {job_id} not found in task session")
return
task_job.status = 'running'
task_job.started_at = datetime.utcnow()
task_db.commit()
from exploitdb_client_local import ExploitDBLocalClient
client = ExploitDBLocalClient(task_db)
if request.cve_id:
# Sync specific CVE
if job_cancellation_flags.get(job_id, False):
logger.info(f"Job {job_id} cancelled before starting")
return
result = await client.sync_cve_exploits(request.cve_id)
logger.info(f"ExploitDB sync for {request.cve_id}: {result}")
else:
# Sync all CVEs with cancellation support
result = await client.bulk_sync_exploitdb(
batch_size=request.batch_size,
cancellation_flag=lambda: job_cancellation_flags.get(job_id, False)
)
logger.info(f"ExploitDB bulk sync completed: {result}")
# Update job status if not cancelled
if not job_cancellation_flags.get(job_id, False):
task_job.status = 'completed'
task_job.completed_at = datetime.utcnow()
task_db.commit()
except Exception as e:
if not job_cancellation_flags.get(job_id, False):
# Get the job again in case it was modified
task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first()
if task_job:
task_job.status = 'failed'
task_job.error_message = str(e)
task_job.completed_at = datetime.utcnow()
task_db.commit()
logger.error(f"ExploitDB sync failed: {e}")
import traceback
traceback.print_exc()
finally:
# Clean up tracking and close the task session
running_jobs.pop(job_id, None)
job_cancellation_flags.pop(job_id, None)
task_db.close()
background_tasks.add_task(sync_task)
return {
"message": f"ExploitDB sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"),
"status": "started",
"job_id": job_id,
"cve_id": request.cve_id,
"batch_size": request.batch_size
}
except Exception as e:
logger.error(f"Error starting ExploitDB sync via Celery: {e}")
raise HTTPException(status_code=500, detail=f"Failed to start ExploitDB sync: {e}")
@app.post("/api/sync-cisa-kev")
async def sync_cisa_kev(background_tasks: BackgroundTasks,
request: CISAKEVSyncRequest,
db: Session = Depends(get_db)):
async def sync_cisa_kev(request: CISAKEVSyncRequest, db: Session = Depends(get_db)):
"""Synchronize CISA Known Exploited Vulnerabilities data"""
# Create job record
job = BulkProcessingJob(
job_type='cisa_kev_sync',
status='pending',
job_metadata={
'cve_id': request.cve_id,
'batch_size': request.batch_size
try:
# Import Celery task
from tasks.data_sync_tasks import sync_cisa_kev_task
# Start Celery task
task_result = sync_cisa_kev_task.delay(batch_size=request.batch_size)
return {
"message": f"CISA KEV sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"),
"status": "started",
"task_id": task_result.id,
"monitor_url": f"http://localhost:5555/task/{task_result.id}",
"batch_size": request.batch_size
}
)
db.add(job)
db.commit()
db.refresh(job)
job_id = str(job.id)
running_jobs[job_id] = job
job_cancellation_flags[job_id] = False
async def sync_task():
# Create a new database session for the background task
task_db = SessionLocal()
try:
# Get the job in the new session
task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first()
if not task_job:
logger.error(f"Job {job_id} not found in task session")
return
task_job.status = 'running'
task_job.started_at = datetime.utcnow()
task_db.commit()
from cisa_kev_client import CISAKEVClient
client = CISAKEVClient(task_db)
if request.cve_id:
# Sync specific CVE
if job_cancellation_flags.get(job_id, False):
logger.info(f"Job {job_id} cancelled before starting")
return
result = await client.sync_cve_kev_data(request.cve_id)
logger.info(f"CISA KEV sync for {request.cve_id}: {result}")
else:
# Sync all CVEs with cancellation support
result = await client.bulk_sync_kev_data(
batch_size=request.batch_size,
cancellation_flag=lambda: job_cancellation_flags.get(job_id, False)
)
logger.info(f"CISA KEV bulk sync completed: {result}")
# Update job status if not cancelled
if not job_cancellation_flags.get(job_id, False):
task_job.status = 'completed'
task_job.completed_at = datetime.utcnow()
task_db.commit()
except Exception as e:
if not job_cancellation_flags.get(job_id, False):
# Get the job again in case it was modified
task_job = task_db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job.id).first()
if task_job:
task_job.status = 'failed'
task_job.error_message = str(e)
task_job.completed_at = datetime.utcnow()
task_db.commit()
logger.error(f"CISA KEV sync failed: {e}")
import traceback
traceback.print_exc()
finally:
# Clean up tracking and close the task session
running_jobs.pop(job_id, None)
job_cancellation_flags.pop(job_id, None)
task_db.close()
background_tasks.add_task(sync_task)
return {
"message": f"CISA KEV sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"),
"status": "started",
"job_id": job_id,
"cve_id": request.cve_id,
"batch_size": request.batch_size
}
except Exception as e:
logger.error(f"Error starting CISA KEV sync via Celery: {e}")
raise HTTPException(status_code=500, detail=f"Failed to start CISA KEV sync: {e}")
@app.post("/api/sync-references")
async def sync_references(request: ReferenceSyncRequest, background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
@ -1672,46 +1557,7 @@ async def get_cisa_kev_stats(db: Session = Depends(get_db)):
logger.error(f"Error getting CISA KEV stats: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/bulk-jobs")
async def get_bulk_jobs(limit: int = 10, db: Session = Depends(get_db)):
"""Get bulk processing job status"""
jobs = db.query(BulkProcessingJob).order_by(
BulkProcessingJob.created_at.desc()
).limit(limit).all()
result = []
for job in jobs:
job_dict = {
'id': str(job.id),
'job_type': job.job_type,
'status': job.status,
'year': job.year,
'total_items': job.total_items,
'processed_items': job.processed_items,
'failed_items': job.failed_items,
'error_message': job.error_message,
'metadata': job.job_metadata,
'started_at': job.started_at,
'completed_at': job.completed_at,
'created_at': job.created_at
}
result.append(job_dict)
return result
@app.get("/api/bulk-status")
async def get_bulk_status(db: Session = Depends(get_db)):
"""Get comprehensive bulk processing status"""
try:
from bulk_seeder import BulkSeeder
seeder = BulkSeeder(db)
status = await seeder.get_seeding_status()
return status
except Exception as e:
logger.error(f"Error getting bulk status: {e}")
return {"error": str(e)}
@app.get("/api/poc-stats")
async def get_poc_stats(db: Session = Depends(get_db)):
@ -2019,69 +1865,7 @@ async def switch_llm_provider(request: dict):
logger.error(f"Error switching LLM provider: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/cancel-job/{job_id}")
async def cancel_job(job_id: str, db: Session = Depends(get_db)):
"""Cancel a running job"""
try:
# Find the job in the database
job = db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job_id).first()
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status not in ['pending', 'running']:
raise HTTPException(status_code=400, detail=f"Cannot cancel job with status: {job.status}")
# Set cancellation flag
job_cancellation_flags[job_id] = True
# Update job status
job.status = 'cancelled'
job.cancelled_at = datetime.utcnow()
job.error_message = "Job cancelled by user"
db.commit()
logger.info(f"Job {job_id} cancellation requested")
return {
"message": f"Job {job_id} cancellation requested",
"status": "cancelled",
"job_id": job_id
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Error cancelling job {job_id}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/running-jobs")
async def get_running_jobs(db: Session = Depends(get_db)):
"""Get all currently running jobs"""
try:
jobs = db.query(BulkProcessingJob).filter(
BulkProcessingJob.status.in_(['pending', 'running'])
).order_by(BulkProcessingJob.created_at.desc()).all()
result = []
for job in jobs:
result.append({
'id': str(job.id),
'job_type': job.job_type,
'status': job.status,
'year': job.year,
'total_items': job.total_items,
'processed_items': job.processed_items,
'failed_items': job.failed_items,
'error_message': job.error_message,
'started_at': job.started_at,
'created_at': job.created_at,
'can_cancel': job.status in ['pending', 'running']
})
return result
except Exception as e:
logger.error(f"Error getting running jobs: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/api/ollama-pull-model")
async def pull_ollama_model(request: dict, background_tasks: BackgroundTasks):

View file

@ -514,7 +514,7 @@ class GitHubPoCClient:
async def bulk_sync_all_cves(self, batch_size: int = 50) -> dict:
"""Bulk synchronize all CVEs with GitHub PoC data"""
from main import CVE, BulkProcessingJob
from main import CVE
# Load all GitHub PoC data first
github_poc_data = self.load_github_poc_data()
@ -522,16 +522,8 @@ class GitHubPoCClient:
if not github_poc_data:
return {"error": "No GitHub PoC data found"}
# Create bulk processing job
job = BulkProcessingJob(
job_type='github_poc_sync',
status='running',
started_at=datetime.utcnow(),
total_items=len(github_poc_data),
job_metadata={'batch_size': batch_size}
)
self.db_session.add(job)
self.db_session.commit()
# Note: Job tracking is now handled by Celery tasks, not BulkProcessingJob
logger.info(f"Starting GitHub PoC sync for {len(github_poc_data)} CVEs with batch size {batch_size}")
total_processed = 0
total_found = 0
@ -553,40 +545,27 @@ class GitHubPoCClient:
total_found += result["pocs_found"]
results.append(result)
job.processed_items += 1
# Small delay to avoid overwhelming GitHub API
await asyncio.sleep(1)
except Exception as e:
logger.error(f"Error syncing PoCs for {cve_id}: {e}")
job.failed_items += 1
# Commit after each batch
self.db_session.commit()
logger.info(f"Processed batch {i//batch_size + 1}/{(len(cve_ids) + batch_size - 1)//batch_size}")
# Update job status
job.status = 'completed'
job.completed_at = datetime.utcnow()
job.job_metadata.update({
'total_processed': total_processed,
'total_pocs_found': total_found,
'cves_with_pocs': len(results)
})
logger.info(f"GitHub PoC sync completed: {total_processed} processed, {total_found} PoCs found")
except Exception as e:
job.status = 'failed'
job.error_message = str(e)
job.completed_at = datetime.utcnow()
logger.error(f"Bulk McDevitt sync job failed: {e}")
logger.error(f"Bulk GitHub PoC sync failed: {e}")
raise
finally:
self.db_session.commit()
return {
'job_id': str(job.id),
'status': job.status,
'status': 'completed',
'total_processed': total_processed,
'total_pocs_found': total_found,
'cves_with_pocs': len(results)

View file

@ -10,7 +10,9 @@ import requests
import subprocess
import sys
import tempfile
import shutil
from typing import Dict, List, Optional
from pathlib import Path
OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://ollama:11434')
DEFAULT_MODEL = os.getenv('LLM_MODEL', 'llama3.2')
@ -94,11 +96,102 @@ def pull_model(model_name: str) -> bool:
log(f"❌ Error pulling model {model_name}: {e}", "ERROR")
return False
def create_sigma_model() -> bool:
"""Create the sigma-llama model with specialized SIGMA generation configuration"""
log("🔄 Creating sigma-llama model...")
def get_best_checkpoint() -> str:
"""Get the best performing checkpoint (highest step count)"""
finetuned_model_path = "/app/models/sigma_llama_finetuned"
# First, remove any existing sigma-llama model
# Look for the highest checkpoint number
checkpoints = []
try:
for item in os.listdir(finetuned_model_path):
if item.startswith('checkpoint-') and os.path.isdir(os.path.join(finetuned_model_path, item)):
try:
checkpoint_num = int(item.split('-')[1])
checkpoints.append((checkpoint_num, item))
except ValueError:
continue
if checkpoints:
# Sort by checkpoint number and get the highest
checkpoints.sort(key=lambda x: x[0], reverse=True)
best_checkpoint = checkpoints[0][1]
log(f"Found best checkpoint: {best_checkpoint} (step {checkpoints[0][0]})")
return os.path.join(finetuned_model_path, best_checkpoint)
except Exception as e:
log(f"Error scanning checkpoints: {e}", "WARN")
# Fallback to root directory
return finetuned_model_path
def create_lora_modelfile(adapter_path: str) -> str:
"""Create a Modelfile that uses the LoRA adapter"""
# Create optimized Modelfile for LoRA adapter
modelfile_content = f"""FROM {DEFAULT_MODEL}:latest
# LoRA Adapter Configuration
# Note: Direct ADAPTER directive requires GGML format
# Using enhanced prompting optimized for fine-tuned model
TEMPLATE \"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a cybersecurity expert specializing in SIGMA rule creation. You have been fine-tuned specifically for generating high-quality SIGMA detection rules.
Generate valid SIGMA rules in YAML format based on the provided CVE and exploit information. Output ONLY valid YAML starting with 'title:' and ending with the last YAML line.
Focus on:
- Accurate logsource identification
- Precise detection logic
- Relevant fields and values
- Proper YAML formatting
- Contextual understanding from CVE details<|eot_id|><|start_header_id|>user<|end_header_id|>
{{ .Prompt }}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
\"\"\"
# Fine-tuned parameters optimized for SIGMA rule generation
PARAMETER temperature 0.1
PARAMETER top_p 0.9
PARAMETER top_k 40
PARAMETER repeat_penalty 1.1
PARAMETER num_ctx 4096
PARAMETER stop "<|eot_id|>"
PARAMETER stop "<|end_of_text|>"
# System message optimized for fine-tuned model
SYSTEM \"\"\"You are a specialized SIGMA rule generation model. Your training has optimized you for creating accurate, contextual SIGMA detection rules. Generate only valid YAML format rules based on the provided context.\"\"\"
"""
return modelfile_content
def create_sigma_model() -> bool:
"""Create the sigma-llama model using the LoRA fine-tuned model"""
log("🔄 Creating sigma-llama model from LoRA fine-tuned model...")
# Check if fine-tuned model exists
finetuned_model_path = "/app/models/sigma_llama_finetuned"
if not os.path.exists(finetuned_model_path):
log(f"❌ Fine-tuned model not found at {finetuned_model_path}", "WARN")
log("Falling back to prompt-tuned base model...", "WARN")
return create_prompt_tuned_model()
log(f"✅ Found LoRA fine-tuned model at {finetuned_model_path}")
# Get the best checkpoint
best_checkpoint_path = get_best_checkpoint()
log(f"Using checkpoint: {best_checkpoint_path}")
# Check if adapter files exist
adapter_file = os.path.join(best_checkpoint_path, "adapter_model.safetensors")
if not os.path.exists(adapter_file):
log(f"❌ Adapter file not found at {adapter_file}", "ERROR")
return create_prompt_tuned_model()
log(f"✅ Found LoRA adapter: {adapter_file}")
# Remove any existing sigma-llama model
try:
response = requests.delete(f"{OLLAMA_BASE_URL}/api/delete",
json={"name": SIGMA_MODEL_NAME},
@ -108,7 +201,71 @@ def create_sigma_model() -> bool:
except Exception:
pass # Model might not exist, that's fine
# Create Modelfile content without the FROM line
# Create optimized Modelfile for LoRA fine-tuned model
modelfile_content = create_lora_modelfile(best_checkpoint_path)
try:
log("Creating LoRA-optimized sigma-llama model...")
log("Note: Using optimized prompting for LoRA fine-tuned model")
log("Note: Direct LoRA loading requires GGML format - using enhanced prompting approach")
# Create the model with LoRA-optimized configuration
# Use the from parameter approach since Modelfile with FROM is not working
payload = {
"name": SIGMA_MODEL_NAME,
"from": f"{DEFAULT_MODEL}:latest",
"modelfile": modelfile_content.replace(f"FROM {DEFAULT_MODEL}:latest\n\n", ""),
"stream": False
}
response = requests.post(
f"{OLLAMA_BASE_URL}/api/create",
json=payload,
timeout=600, # 10 minutes timeout
stream=True
)
if response.status_code == 200:
# Stream and log progress
for line in response.iter_lines():
if line:
try:
data = json.loads(line.decode('utf-8'))
status = data.get('status', '')
if status:
log(f"LoRA model creation: {status}", "DEBUG")
if data.get('error'):
log(f"Model creation error: {data.get('error')}", "ERROR")
return False
except json.JSONDecodeError:
continue
# Verify the model was created
models = get_available_models()
if any(SIGMA_MODEL_NAME in model for model in models):
log("✅ sigma-llama LoRA-optimized model created successfully!")
return True
else:
log("❌ sigma-llama model not found after creation", "ERROR")
return False
else:
log(f"❌ Failed to create sigma-llama model: HTTP {response.status_code}", "ERROR")
try:
error_data = response.json()
log(f"Error details: {error_data}", "ERROR")
except:
log(f"Error response: {response.text}", "ERROR")
return False
except Exception as e:
log(f"❌ Error creating LoRA sigma-llama model: {e}", "ERROR")
return False
def create_prompt_tuned_model() -> bool:
"""Fallback: Create a prompt-tuned model using base llama model"""
log("🔄 Creating prompt-tuned sigma-llama model as fallback...")
# Create Modelfile content for prompt-tuned model
modelfile_content = """TEMPLATE \"\"\"### Instruction:
Generate SIGMA rule logsource and detection sections based on the provided context.
@ -128,7 +285,7 @@ SYSTEM \"\"\"You are a cybersecurity expert specializing in SIGMA rule creation.
"""
try:
# Create the model using the API with 'from' parameter
# Create the model using the base model
payload = {
"name": SIGMA_MODEL_NAME,
"from": f"{DEFAULT_MODEL}:latest",
@ -151,7 +308,7 @@ SYSTEM \"\"\"You are a cybersecurity expert specializing in SIGMA rule creation.
data = json.loads(line.decode('utf-8'))
status = data.get('status', '')
if status:
log(f"Model creation: {status}", "DEBUG")
log(f"Prompt-tuned model creation: {status}", "DEBUG")
if data.get('error'):
log(f"Model creation error: {data.get('error')}", "ERROR")
return False
@ -161,57 +318,68 @@ SYSTEM \"\"\"You are a cybersecurity expert specializing in SIGMA rule creation.
# Verify the model was created
models = get_available_models()
if any(SIGMA_MODEL_NAME in model for model in models):
log("✅ sigma-llama model created successfully!")
log("✅ sigma-llama prompt-tuned model created successfully!")
return True
else:
log("❌ sigma-llama model not found after creation", "ERROR")
return False
else:
log(f"❌ Failed to create sigma-llama model: HTTP {response.status_code}", "ERROR")
try:
error_data = response.json()
log(f"Error details: {error_data}", "ERROR")
except:
log(f"Error response: {response.text}", "ERROR")
return False
except Exception as e:
log(f"❌ Error creating sigma-llama model: {e}", "ERROR")
log(f"❌ Error creating prompt-tuned model: {e}", "ERROR")
return False
def test_sigma_model() -> bool:
"""Test the sigma-llama model"""
log("🔄 Testing sigma-llama model...")
"""Test the sigma-llama LoRA model"""
log("🔄 Testing sigma-llama LoRA model...")
try:
# Use a more comprehensive test prompt
test_prompt = """Generate a SIGMA rule for CVE-2023-1234: PowerShell command execution vulnerability that allows remote code execution through malicious PowerShell scripts.
Vulnerability Details:
- Affects Windows PowerShell
- Remote code execution via script injection
- Commonly exploited through phishing emails
- Targets process execution and command line arguments"""
test_payload = {
"model": SIGMA_MODEL_NAME,
"prompt": "Title: Test PowerShell Rule",
"prompt": test_prompt,
"stream": False
}
response = requests.post(
f"{OLLAMA_BASE_URL}/api/generate",
json=test_payload,
timeout=60
timeout=120 # Longer timeout for LoRA model
)
if response.status_code == 200:
data = response.json()
test_response = data.get('response', '')[:100] # First 100 chars
log(f"✅ Model test successful! Response: {test_response}...")
return True
test_response = data.get('response', '')
# Check if response looks like a SIGMA rule
if 'title:' in test_response.lower() and ('logsource:' in test_response.lower() or 'detection:' in test_response.lower()):
log(f"✅ LoRA model test successful! Generated SIGMA rule structure detected.")
log(f"Response preview: {test_response[:300]}...")
return True
else:
log(f"⚠️ Model responded but output doesn't look like SIGMA rule: {test_response[:200]}...")
return False
else:
log(f"❌ Model test failed: HTTP {response.status_code}", "ERROR")
return False
except Exception as e:
log(f"❌ Error testing model: {e}", "ERROR")
log(f"❌ Error testing LoRA model: {e}", "ERROR")
return False
def main():
"""Main setup function"""
log("🚀 Starting enhanced Ollama setup with SIGMA model creation...")
log("🚀 Starting enhanced Ollama setup with fine-tuned SIGMA model creation...")
# Step 1: Wait for Ollama to be ready
if not wait_for_ollama():
@ -222,16 +390,16 @@ def main():
models = get_available_models()
log(f"Current models: {models}")
# Step 3: Pull default model if needed
# Step 3: Pull default model if needed (for fallback)
if not any(DEFAULT_MODEL in model for model in models):
log(f"Default model {DEFAULT_MODEL} not found, pulling...")
log(f"Default model {DEFAULT_MODEL} not found, pulling for fallback...")
if not pull_model(DEFAULT_MODEL):
log(f"❌ Setup failed: Could not pull {DEFAULT_MODEL}", "ERROR")
sys.exit(1)
else:
log(f"✅ Default model {DEFAULT_MODEL} already available")
# Step 4: Create SIGMA model
# Step 4: Create SIGMA model (will try fine-tuned first, then fallback)
if not create_sigma_model():
log("❌ Setup failed: Could not create sigma-llama model", "ERROR")
sys.exit(1)
@ -246,7 +414,9 @@ def main():
log(f"Final models available: {final_models}")
if any(SIGMA_MODEL_NAME in model for model in final_models):
log("🎉 Setup complete! sigma-llama model is ready for use.")
log("🎉 Setup complete! LoRA fine-tuned sigma-llama model is ready for use.")
log("📊 Model benefits: Enhanced SIGMA rule generation with domain-specific fine-tuning")
log("🔧 Monitor performance: Compare outputs with base model for quality improvements")
sys.exit(0)
else:
log("❌ Setup failed: sigma-llama model not available after setup", "ERROR")

View file

@ -85,97 +85,6 @@ def sync_nomi_sec_task(self, batch_size: int = 50) -> Dict[str, Any]:
db_session.close()
@celery_app.task(bind=True, name='data_sync_tasks.sync_cve2capec')
def sync_cve2capec_task(self, force_refresh: bool = False) -> Dict[str, Any]:
"""
Celery task for CVE2CAPEC MITRE ATT&CK mapping synchronization
Args:
force_refresh: Whether to force refresh the cache regardless of expiry
Returns:
Dictionary containing sync results
"""
try:
# Update task progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 0,
'message': 'Starting CVE2CAPEC MITRE ATT&CK mapping synchronization'
}
)
logger.info(f"Starting CVE2CAPEC sync task with force_refresh: {force_refresh}")
# Import here to avoid circular dependencies
from cve2capec_client import CVE2CAPECClient
# Create client instance
client = CVE2CAPECClient()
# Update progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 10,
'message': 'Fetching MITRE ATT&CK mappings...'
}
)
# Force refresh if requested
if force_refresh:
client._fetch_fresh_data()
# Update progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 50,
'message': 'Processing CVE mappings...'
}
)
# Get statistics about the loaded data
stats = client.get_stats()
# Update progress to completion
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 100,
'message': 'CVE2CAPEC synchronization completed successfully'
}
)
result = {
'status': 'completed',
'total_mappings': stats.get('total_mappings', 0),
'total_techniques': stats.get('unique_techniques', 0),
'cache_updated': True
}
logger.info(f"CVE2CAPEC sync task completed: {result}")
return result
except Exception as e:
logger.error(f"CVE2CAPEC sync task failed: {e}")
self.update_state(
state='FAILURE',
meta={
'stage': 'error',
'progress': 0,
'message': f'Task failed: {str(e)}',
'error': str(e)
}
)
raise
@celery_app.task(bind=True, name='data_sync_tasks.sync_github_poc')
def sync_github_poc_task(self, batch_size: int = 50) -> Dict[str, Any]:
"""
@ -245,97 +154,6 @@ def sync_github_poc_task(self, batch_size: int = 50) -> Dict[str, Any]:
db_session.close()
@celery_app.task(bind=True, name='data_sync_tasks.sync_cve2capec')
def sync_cve2capec_task(self, force_refresh: bool = False) -> Dict[str, Any]:
"""
Celery task for CVE2CAPEC MITRE ATT&CK mapping synchronization
Args:
force_refresh: Whether to force refresh the cache regardless of expiry
Returns:
Dictionary containing sync results
"""
try:
# Update task progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 0,
'message': 'Starting CVE2CAPEC MITRE ATT&CK mapping synchronization'
}
)
logger.info(f"Starting CVE2CAPEC sync task with force_refresh: {force_refresh}")
# Import here to avoid circular dependencies
from cve2capec_client import CVE2CAPECClient
# Create client instance
client = CVE2CAPECClient()
# Update progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 10,
'message': 'Fetching MITRE ATT&CK mappings...'
}
)
# Force refresh if requested
if force_refresh:
client._fetch_fresh_data()
# Update progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 50,
'message': 'Processing CVE mappings...'
}
)
# Get statistics about the loaded data
stats = client.get_stats()
# Update progress to completion
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 100,
'message': 'CVE2CAPEC synchronization completed successfully'
}
)
result = {
'status': 'completed',
'total_mappings': stats.get('total_mappings', 0),
'total_techniques': stats.get('unique_techniques', 0),
'cache_updated': True
}
logger.info(f"CVE2CAPEC sync task completed: {result}")
return result
except Exception as e:
logger.error(f"CVE2CAPEC sync task failed: {e}")
self.update_state(
state='FAILURE',
meta={
'stage': 'error',
'progress': 0,
'message': f'Task failed: {str(e)}',
'error': str(e)
}
)
raise
@celery_app.task(bind=True, name='data_sync_tasks.sync_reference_content')
def sync_reference_content_task(self, batch_size: int = 30, max_cves: int = 200,
force_resync: bool = False) -> Dict[str, Any]:
@ -475,97 +293,6 @@ def sync_reference_content_task(self, batch_size: int = 30, max_cves: int = 200,
finally:
db_session.close()
@celery_app.task(bind=True, name='data_sync_tasks.sync_cve2capec')
def sync_cve2capec_task(self, force_refresh: bool = False) -> Dict[str, Any]:
"""
Celery task for CVE2CAPEC MITRE ATT&CK mapping synchronization
Args:
force_refresh: Whether to force refresh the cache regardless of expiry
Returns:
Dictionary containing sync results
"""
try:
# Update task progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 0,
'message': 'Starting CVE2CAPEC MITRE ATT&CK mapping synchronization'
}
)
logger.info(f"Starting CVE2CAPEC sync task with force_refresh: {force_refresh}")
# Import here to avoid circular dependencies
from cve2capec_client import CVE2CAPECClient
# Create client instance
client = CVE2CAPECClient()
# Update progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 10,
'message': 'Fetching MITRE ATT&CK mappings...'
}
)
# Force refresh if requested
if force_refresh:
client._fetch_fresh_data()
# Update progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 50,
'message': 'Processing CVE mappings...'
}
)
# Get statistics about the loaded data
stats = client.get_stats()
# Update progress to completion
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 100,
'message': 'CVE2CAPEC synchronization completed successfully'
}
)
result = {
'status': 'completed',
'total_mappings': stats.get('total_mappings', 0),
'total_techniques': stats.get('unique_techniques', 0),
'cache_updated': True
}
logger.info(f"CVE2CAPEC sync task completed: {result}")
return result
except Exception as e:
logger.error(f"CVE2CAPEC sync task failed: {e}")
self.update_state(
state='FAILURE',
meta={
'stage': 'error',
'progress': 0,
'message': f'Task failed: {str(e)}',
'error': str(e)
}
)
raise
@celery_app.task(bind=True, name='data_sync_tasks.sync_exploitdb')
def sync_exploitdb_task(self, batch_size: int = 30) -> Dict[str, Any]:
"""
@ -635,97 +362,6 @@ def sync_exploitdb_task(self, batch_size: int = 30) -> Dict[str, Any]:
db_session.close()
@celery_app.task(bind=True, name='data_sync_tasks.sync_cve2capec')
def sync_cve2capec_task(self, force_refresh: bool = False) -> Dict[str, Any]:
"""
Celery task for CVE2CAPEC MITRE ATT&CK mapping synchronization
Args:
force_refresh: Whether to force refresh the cache regardless of expiry
Returns:
Dictionary containing sync results
"""
try:
# Update task progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 0,
'message': 'Starting CVE2CAPEC MITRE ATT&CK mapping synchronization'
}
)
logger.info(f"Starting CVE2CAPEC sync task with force_refresh: {force_refresh}")
# Import here to avoid circular dependencies
from cve2capec_client import CVE2CAPECClient
# Create client instance
client = CVE2CAPECClient()
# Update progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 10,
'message': 'Fetching MITRE ATT&CK mappings...'
}
)
# Force refresh if requested
if force_refresh:
client._fetch_fresh_data()
# Update progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 50,
'message': 'Processing CVE mappings...'
}
)
# Get statistics about the loaded data
stats = client.get_stats()
# Update progress to completion
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 100,
'message': 'CVE2CAPEC synchronization completed successfully'
}
)
result = {
'status': 'completed',
'total_mappings': stats.get('total_mappings', 0),
'total_techniques': stats.get('unique_techniques', 0),
'cache_updated': True
}
logger.info(f"CVE2CAPEC sync task completed: {result}")
return result
except Exception as e:
logger.error(f"CVE2CAPEC sync task failed: {e}")
self.update_state(
state='FAILURE',
meta={
'stage': 'error',
'progress': 0,
'message': f'Task failed: {str(e)}',
'error': str(e)
}
)
raise
@celery_app.task(bind=True, name='data_sync_tasks.sync_cisa_kev')
def sync_cisa_kev_task(self, batch_size: int = 100) -> Dict[str, Any]:
"""
@ -795,97 +431,6 @@ def sync_cisa_kev_task(self, batch_size: int = 100) -> Dict[str, Any]:
db_session.close()
@celery_app.task(bind=True, name='data_sync_tasks.sync_cve2capec')
def sync_cve2capec_task(self, force_refresh: bool = False) -> Dict[str, Any]:
"""
Celery task for CVE2CAPEC MITRE ATT&CK mapping synchronization
Args:
force_refresh: Whether to force refresh the cache regardless of expiry
Returns:
Dictionary containing sync results
"""
try:
# Update task progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 0,
'message': 'Starting CVE2CAPEC MITRE ATT&CK mapping synchronization'
}
)
logger.info(f"Starting CVE2CAPEC sync task with force_refresh: {force_refresh}")
# Import here to avoid circular dependencies
from cve2capec_client import CVE2CAPECClient
# Create client instance
client = CVE2CAPECClient()
# Update progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 10,
'message': 'Fetching MITRE ATT&CK mappings...'
}
)
# Force refresh if requested
if force_refresh:
client._fetch_fresh_data()
# Update progress
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 50,
'message': 'Processing CVE mappings...'
}
)
# Get statistics about the loaded data
stats = client.get_stats()
# Update progress to completion
self.update_state(
state='PROGRESS',
meta={
'stage': 'sync_cve2capec',
'progress': 100,
'message': 'CVE2CAPEC synchronization completed successfully'
}
)
result = {
'status': 'completed',
'total_mappings': stats.get('total_mappings', 0),
'total_techniques': stats.get('unique_techniques', 0),
'cache_updated': True
}
logger.info(f"CVE2CAPEC sync task completed: {result}")
return result
except Exception as e:
logger.error(f"CVE2CAPEC sync task failed: {e}")
self.update_state(
state='FAILURE',
meta={
'stage': 'error',
'progress': 0,
'message': f'Task failed: {str(e)}',
'error': str(e)
}
)
raise
@celery_app.task(bind=True, name='data_sync_tasks.build_exploitdb_index')
def build_exploitdb_index_task(self) -> Dict[str, Any]:
"""

View file

@ -45,7 +45,8 @@ def health_check():
# Check database connectivity
try:
db_session.execute("SELECT 1")
from sqlalchemy import text
db_session.execute(text("SELECT 1"))
db_status = "healthy"
except Exception as e:
db_status = f"unhealthy: {e}"
@ -54,7 +55,9 @@ def health_check():
# Check Redis connectivity
try:
celery_app.backend.ping()
import redis
redis_client = redis.Redis.from_url(celery_app.conf.broker_url)
redis_client.ping()
redis_status = "healthy"
except Exception as e:
redis_status = f"unhealthy: {e}"
@ -195,7 +198,8 @@ def database_cleanup_comprehensive(self, days_to_keep: int = 30, cleanup_failed_
try:
# Run VACUUM on PostgreSQL to reclaim space
db_session.execute("VACUUM;")
from sqlalchemy import text
db_session.execute(text("VACUUM;"))
cleanup_results['database_optimized'] = True
except Exception as e:
logger.warning(f"Could not vacuum database: {e}")
@ -277,13 +281,14 @@ def health_check_detailed(self) -> Dict[str, Any]:
db_session = get_db_session()
try:
from sqlalchemy import text
start_time = datetime.utcnow()
db_session.execute("SELECT 1")
db_session.execute(text("SELECT 1"))
db_response_time = (datetime.utcnow() - start_time).total_seconds()
# Check database size and connections
db_size_result = db_session.execute("SELECT pg_size_pretty(pg_database_size(current_database()));").fetchone()
db_connections_result = db_session.execute("SELECT count(*) FROM pg_stat_activity;").fetchone()
db_size_result = db_session.execute(text("SELECT pg_size_pretty(pg_database_size(current_database()));")).fetchone()
db_connections_result = db_session.execute(text("SELECT count(*) FROM pg_stat_activity;")).fetchone()
health_status['components']['database'] = {
'status': 'healthy',
@ -313,8 +318,10 @@ def health_check_detailed(self) -> Dict[str, Any]:
)
try:
import redis
start_time = datetime.utcnow()
celery_app.backend.ping()
redis_client = redis.Redis.from_url(celery_app.conf.broker_url)
redis_client.ping()
redis_response_time = (datetime.utcnow() - start_time).total_seconds()
# Get Redis info

View file

@ -92,8 +92,10 @@ services:
LLM_MODEL: llama3.2
volumes:
- ./backend:/app
- ./models:/app/models
command: python setup_ollama_with_sigma.py
restart: "no"
user: root
initial-setup:
build: ./backend

View file

@ -8,6 +8,10 @@ const API_BASE_URL = process.env.REACT_APP_API_URL || 'http://localhost:8000';
function App() {
const [cves, setCves] = useState([]);
const [cveSearch, setCveSearch] = useState('');
const [cveFilters, setCveFilters] = useState({ severity: '' });
const [cvePagination, setCvePagination] = useState({ skip: 0, limit: 20, total: 0, hasMore: false });
const [loadingCves, setLoadingCves] = useState(false);
const [sigmaRules, setSigmaRules] = useState([]);
const [selectedCve, setSelectedCve] = useState(null);
const [stats, setStats] = useState({});
@ -15,18 +19,40 @@ function App() {
const [activeTab, setActiveTab] = useState('dashboard');
const [fetchingCves, setFetchingCves] = useState(false);
const [testResult, setTestResult] = useState(null);
const [bulkJobs, setBulkJobs] = useState([]);
const [bulkStatus, setBulkStatus] = useState({});
const [pocStats, setPocStats] = useState({});
const [gitHubPocStats, setGitHubPocStats] = useState({});
const [exploitdbStats, setExploitdbStats] = useState({});
const [cisaKevStats, setCisaKevStats] = useState({});
const [bulkProcessing, setBulkProcessing] = useState(false);
const [hasRunningJobs, setHasRunningJobs] = useState(false);
const [runningJobTypes, setRunningJobTypes] = useState(new Set());
const [llmStatus, setLlmStatus] = useState({});
const [exploitSyncDropdownOpen, setExploitSyncDropdownOpen] = useState(false);
// Function to fetch CVEs with search and pagination
const fetchCves = async (search = '', filters = {}, pagination = { skip: 0, limit: 20 }) => {
setLoadingCves(true);
try {
const params = new URLSearchParams({
skip: pagination.skip.toString(),
limit: pagination.limit.toString(),
});
if (search) params.append('search', search);
if (filters.severity) params.append('severity', filters.severity);
const response = await axios.get(`${API_BASE_URL}/api/cves?${params}`);
setCves(response.data.cves || []);
setCvePagination({
skip: response.data.skip || 0,
limit: response.data.limit || 20,
total: response.data.total || 0,
hasMore: response.data.has_more || false
});
} catch (error) {
console.error('Error fetching CVEs:', error);
} finally {
setLoadingCves(false);
}
};
useEffect(() => {
fetchData();
}, []);
@ -45,42 +71,6 @@ function App() {
};
}, [exploitSyncDropdownOpen]);
// Helper functions to check if specific job types are running
const isJobTypeRunning = (jobType) => {
return runningJobTypes.has(jobType);
};
const isBulkSeedRunning = () => {
return isJobTypeRunning('nvd_bulk_seed') || isJobTypeRunning('bulk_seed');
};
const isIncrementalUpdateRunning = () => {
return isJobTypeRunning('incremental_update');
};
const isNomiSecSyncRunning = () => {
return isJobTypeRunning('nomi_sec_sync');
};
const isGitHubPocSyncRunning = () => {
return isJobTypeRunning('github_poc_sync');
};
const isExploitDBSyncRunning = () => {
return isJobTypeRunning('exploitdb_sync') || isJobTypeRunning('exploitdb_sync_local');
};
const isCISAKEVSyncRunning = () => {
return isJobTypeRunning('cisa_kev_sync');
};
const isRuleGenerationRunning = () => {
return isJobTypeRunning('rule_regeneration') || isJobTypeRunning('llm_rule_generation');
};
const areAnyExploitSyncsRunning = () => {
return isNomiSecSyncRunning() || isGitHubPocSyncRunning() || isExploitDBSyncRunning() || isCISAKEVSyncRunning();
};
// Note: Scheduler functionality removed - now handled by Celery Beat
// Monitoring available via Flower at http://localhost:5555
@ -88,12 +78,10 @@ function App() {
const fetchData = async () => {
try {
setLoading(true);
const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes, githubPocStatsRes, exploitdbStatsRes, cisaKevStatsRes, llmStatusRes] = await Promise.all([
axios.get(`${API_BASE_URL}/api/cves`),
const [cvesRes, rulesRes, statsRes, pocStatsRes, githubPocStatsRes, exploitdbStatsRes, cisaKevStatsRes, llmStatusRes] = await Promise.all([
axios.get(`${API_BASE_URL}/api/cves?limit=20`),
axios.get(`${API_BASE_URL}/api/sigma-rules`),
axios.get(`${API_BASE_URL}/api/stats`),
axios.get(`${API_BASE_URL}/api/bulk-jobs`),
axios.get(`${API_BASE_URL}/api/bulk-status`),
axios.get(`${API_BASE_URL}/api/poc-stats`),
axios.get(`${API_BASE_URL}/api/github-poc-stats`).catch(err => ({ data: {} })),
axios.get(`${API_BASE_URL}/api/exploitdb-stats`).catch(err => ({ data: {} })),
@ -101,24 +89,20 @@ function App() {
axios.get(`${API_BASE_URL}/api/llm-status`).catch(err => ({ data: {} }))
]);
setCves(cvesRes.data);
setCves(cvesRes.data.cves || []);
setCvePagination({
skip: cvesRes.data.skip || 0,
limit: cvesRes.data.limit || 20,
total: cvesRes.data.total || 0,
hasMore: cvesRes.data.has_more || false
});
setSigmaRules(rulesRes.data);
setStats(statsRes.data);
setBulkJobs(bulkJobsRes.data);
setBulkStatus(bulkStatusRes.data);
setPocStats(pocStatsRes.data);
setGitHubPocStats(githubPocStatsRes.data);
setExploitdbStats(exploitdbStatsRes.data);
setCisaKevStats(cisaKevStatsRes.data);
setLlmStatus(llmStatusRes.data);
// Update running jobs state
const runningJobs = bulkJobsRes.data.filter(job => job.status === 'running' || job.status === 'pending');
setHasRunningJobs(runningJobs.length > 0);
// Update specific job types that are running
const activeJobTypes = new Set(runningJobs.map(job => job.job_type));
setRunningJobTypes(activeJobTypes);
} catch (error) {
console.error('Error fetching data:', error);
} finally {
@ -126,19 +110,6 @@ function App() {
}
};
const cancelJob = async (jobId) => {
try {
const response = await axios.post(`${API_BASE_URL}/api/cancel-job/${jobId}`);
console.log('Cancel job response:', response.data);
// Refresh data after cancelling
setTimeout(() => {
fetchData();
}, 1000);
} catch (error) {
console.error('Error cancelling job:', error);
alert('Failed to cancel job. Please try again.');
}
};
const handleFetchCves = async () => {
try {
@ -411,25 +382,15 @@ function App() {
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<button
onClick={() => startBulkSeed(2002)}
disabled={isBulkSeedRunning()}
className={`px-4 py-2 rounded-md text-white ${
isBulkSeedRunning()
? 'bg-gray-400 cursor-not-allowed'
: 'bg-blue-600 hover:bg-blue-700'
}`}
className="px-4 py-2 rounded-md text-white bg-blue-600 hover:bg-blue-700"
>
{isBulkSeedRunning() ? 'Processing...' : 'Sync NVD CVEs'}
Sync NVD CVEs
</button>
<button
onClick={startIncrementalUpdate}
disabled={isIncrementalUpdateRunning()}
className={`px-4 py-2 rounded-md text-white ${
isIncrementalUpdateRunning()
? 'bg-gray-400 cursor-not-allowed'
: 'bg-green-600 hover:bg-green-700'
}`}
className="px-4 py-2 rounded-md text-white bg-green-600 hover:bg-green-700"
>
{isIncrementalUpdateRunning() ? 'Processing...' : 'Incremental Update'}
Incremental Update
</button>
</div>
</div>
@ -440,13 +401,9 @@ function App() {
<div className="relative inline-block text-left">
<button
onClick={() => setExploitSyncDropdownOpen(!exploitSyncDropdownOpen)}
className={`inline-flex items-center justify-center w-full rounded-md border shadow-sm px-4 py-2 text-sm font-medium ${
areAnyExploitSyncsRunning()
? 'border-blue-300 bg-blue-50 text-blue-700'
: 'border-gray-300 bg-white text-gray-700 hover:bg-gray-50'
}`}
className="inline-flex items-center justify-center w-full rounded-md border shadow-sm px-4 py-2 text-sm font-medium border-gray-300 bg-white text-gray-700 hover:bg-gray-50"
>
{areAnyExploitSyncsRunning() ? 'Exploit Syncs Running...' : 'Sync Exploit Data'}
Sync Exploit Data
<svg className="-mr-1 ml-2 h-5 w-5" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor">
<path fillRule="evenodd" d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" clipRule="evenodd" />
</svg>
@ -457,79 +414,43 @@ function App() {
<div className="py-1">
<button
onClick={() => {
if (!isNomiSecSyncRunning()) {
syncNomiSec();
setExploitSyncDropdownOpen(false);
}
syncNomiSec();
setExploitSyncDropdownOpen(false);
}}
disabled={isNomiSecSyncRunning()}
className={`block w-full text-left px-4 py-2 text-sm ${
isNomiSecSyncRunning()
? 'text-gray-400 cursor-not-allowed bg-gray-50'
: 'text-gray-700 hover:bg-purple-50 hover:text-purple-900'
}`}
className="block w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-purple-50 hover:text-purple-900"
>
<span className={`inline-block w-3 h-3 rounded-full mr-2 ${
isNomiSecSyncRunning() ? 'bg-gray-400' : 'bg-purple-600'
}`}></span>
{isNomiSecSyncRunning() ? 'Syncing nomi-sec PoCs...' : 'Sync nomi-sec PoCs'}
<span className="inline-block w-3 h-3 rounded-full mr-2 bg-purple-600"></span>
Sync nomi-sec PoCs
</button>
<button
onClick={() => {
if (!isGitHubPocSyncRunning()) {
syncGitHubPocs();
setExploitSyncDropdownOpen(false);
}
syncGitHubPocs();
setExploitSyncDropdownOpen(false);
}}
disabled={isGitHubPocSyncRunning()}
className={`block w-full text-left px-4 py-2 text-sm ${
isGitHubPocSyncRunning()
? 'text-gray-400 cursor-not-allowed bg-gray-50'
: 'text-gray-700 hover:bg-green-50 hover:text-green-900'
}`}
className="block w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-green-50 hover:text-green-900"
>
<span className={`inline-block w-3 h-3 rounded-full mr-2 ${
isGitHubPocSyncRunning() ? 'bg-gray-400' : 'bg-green-600'
}`}></span>
{isGitHubPocSyncRunning() ? 'Syncing GitHub PoCs...' : 'Sync GitHub PoCs'}
<span className="inline-block w-3 h-3 rounded-full mr-2 bg-green-600"></span>
Sync GitHub PoCs
</button>
<button
onClick={() => {
if (!isExploitDBSyncRunning()) {
syncExploitDB();
setExploitSyncDropdownOpen(false);
}
syncExploitDB();
setExploitSyncDropdownOpen(false);
}}
disabled={isExploitDBSyncRunning()}
className={`block w-full text-left px-4 py-2 text-sm ${
isExploitDBSyncRunning()
? 'text-gray-400 cursor-not-allowed bg-gray-50'
: 'text-gray-700 hover:bg-red-50 hover:text-red-900'
}`}
className="block w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-red-50 hover:text-red-900"
>
<span className={`inline-block w-3 h-3 rounded-full mr-2 ${
isExploitDBSyncRunning() ? 'bg-gray-400' : 'bg-red-600'
}`}></span>
{isExploitDBSyncRunning() ? 'Syncing ExploitDB...' : 'Sync ExploitDB'}
<span className="inline-block w-3 h-3 rounded-full mr-2 bg-red-600"></span>
Sync ExploitDB
</button>
<button
onClick={() => {
if (!isCISAKEVSyncRunning()) {
syncCISAKEV();
setExploitSyncDropdownOpen(false);
}
syncCISAKEV();
setExploitSyncDropdownOpen(false);
}}
disabled={isCISAKEVSyncRunning()}
className={`block w-full text-left px-4 py-2 text-sm ${
isCISAKEVSyncRunning()
? 'text-gray-400 cursor-not-allowed bg-gray-50'
: 'text-gray-700 hover:bg-yellow-50 hover:text-yellow-900'
}`}
className="block w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-yellow-50 hover:text-yellow-900"
>
<span className={`inline-block w-3 h-3 rounded-full mr-2 ${
isCISAKEVSyncRunning() ? 'bg-gray-400' : 'bg-yellow-600'
}`}></span>
{isCISAKEVSyncRunning() ? 'Syncing CISA KEV...' : 'Sync CISA KEV'}
<span className="inline-block w-3 h-3 rounded-full mr-2 bg-yellow-600"></span>
Sync CISA KEV
</button>
</div>
</div>
@ -542,14 +463,9 @@ function App() {
<h3 className="text-lg font-medium text-gray-900 mb-3">Phase 3: Reference Data Syncing</h3>
<button
onClick={syncReferences}
disabled={hasRunningJobs}
className={`px-4 py-2 rounded-md text-white ${
hasRunningJobs
? 'bg-gray-400 cursor-not-allowed'
: 'bg-orange-600 hover:bg-orange-700'
}`}
className="px-4 py-2 rounded-md text-white bg-orange-600 hover:bg-orange-700"
>
{hasRunningJobs ? 'Processing...' : 'Sync References'}
Sync References
</button>
</div>
@ -559,26 +475,21 @@ function App() {
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<button
onClick={() => regenerateRules()}
disabled={isRuleGenerationRunning()}
className={`px-4 py-2 rounded-md text-white ${
isRuleGenerationRunning()
? 'bg-gray-400 cursor-not-allowed'
: 'bg-indigo-600 hover:bg-indigo-700'
}`}
className="px-4 py-2 rounded-md text-white bg-indigo-600 hover:bg-indigo-700"
>
{isRuleGenerationRunning() ? 'Processing...' : 'Regenerate Rules'}
Regenerate Rules
</button>
<button
onClick={() => generateLlmRules()}
disabled={isRuleGenerationRunning() || llmStatus.status !== 'ready'}
disabled={llmStatus.status !== 'ready'}
className={`px-4 py-2 rounded-md text-white ${
isRuleGenerationRunning() || llmStatus.status !== 'ready'
llmStatus.status !== 'ready'
? 'bg-gray-400 cursor-not-allowed'
: 'bg-violet-600 hover:bg-violet-700'
}`}
title={llmStatus.status !== 'ready' ? 'LLM not configured' : ''}
>
{isRuleGenerationRunning() ? 'Processing...' : 'Generate LLM Rules'}
Generate LLM Rules
</button>
</div>
</div>
@ -734,44 +645,144 @@ function App() {
</div>
);
const CVEList = () => (
<div className="bg-white rounded-lg shadow">
<div className="px-6 py-4 border-b border-gray-200">
<h2 className="text-xl font-bold text-gray-900">All CVEs</h2>
</div>
<div className="divide-y divide-gray-200">
{cves.map((cve) => (
<div key={cve.id} className="p-6 hover:bg-gray-50">
<div className="flex items-center justify-between">
<div className="flex-1">
<h3 className="text-lg font-medium text-gray-900">{cve.cve_id}</h3>
<p className="text-sm text-gray-600 mt-1 line-clamp-2">
{cve.description}
</p>
<div className="flex items-center mt-2 space-x-4">
<span className={`inline-flex px-2 py-1 text-xs font-semibold rounded-full ${getSeverityColor(cve.severity)}`}>
{cve.severity || 'N/A'}
</span>
<span className="text-sm text-gray-500">
CVSS: {cve.cvss_score || 'N/A'}
</span>
<span className="text-sm text-gray-500">
{cve.published_date ? formatDate(cve.published_date) : 'N/A'}
</span>
const CVEList = () => {
const handleSearch = (e) => {
e.preventDefault();
fetchCves(cveSearch, cveFilters, { skip: 0, limit: 20 });
};
const handleFilterChange = (filterName, value) => {
const newFilters = { ...cveFilters, [filterName]: value };
setCveFilters(newFilters);
fetchCves(cveSearch, newFilters, { skip: 0, limit: 20 });
};
const handlePageChange = (newSkip) => {
fetchCves(cveSearch, cveFilters, { skip: newSkip, limit: 20 });
};
return (
<div className="bg-white rounded-lg shadow">
<div className="px-6 py-4 border-b border-gray-200">
<div className="flex justify-between items-center mb-4">
<h2 className="text-xl font-bold text-gray-900">All CVEs</h2>
<div className="text-sm text-gray-500">
{cvePagination.total} total CVEs
</div>
</div>
{/* Search and Filters */}
<div className="flex gap-4 mb-4">
<form onSubmit={handleSearch} className="flex-1">
<div className="relative">
<input
type="text"
value={cveSearch}
onChange={(e) => setCveSearch(e.target.value)}
placeholder="Search CVEs by ID, description, or affected products..."
className="w-full px-4 py-2 border border-gray-300 rounded-md focus:ring-2 focus:ring-blue-500 focus:border-transparent"
/>
<button
type="submit"
className="absolute right-2 top-2 p-1 text-gray-400 hover:text-gray-600"
>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
</svg>
</button>
</div>
</form>
<select
value={cveFilters.severity}
onChange={(e) => handleFilterChange('severity', e.target.value)}
className="px-4 py-2 border border-gray-300 rounded-md focus:ring-2 focus:ring-blue-500 focus:border-transparent"
>
<option value="">All Severities</option>
<option value="CRITICAL">Critical</option>
<option value="HIGH">High</option>
<option value="MEDIUM">Medium</option>
<option value="LOW">Low</option>
</select>
</div>
</div>
{/* CVE List */}
<div className="divide-y divide-gray-200">
{loadingCves ? (
<div className="p-6 text-center">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600 mx-auto"></div>
<p className="mt-2 text-gray-500">Loading CVEs...</p>
</div>
) : cves.length === 0 ? (
<div className="p-6 text-center text-gray-500">
No CVEs found matching your search criteria.
</div>
) : (
cves.map((cve) => (
<div key={cve.id} className="p-6 hover:bg-gray-50">
<div className="flex items-center justify-between">
<div className="flex-1">
<h3 className="text-lg font-medium text-gray-900">{cve.cve_id}</h3>
<p className="text-sm text-gray-600 mt-1 line-clamp-2">
{cve.description}
</p>
<div className="flex items-center mt-2 space-x-4">
<span className={`inline-flex px-2 py-1 text-xs font-semibold rounded-full ${getSeverityColor(cve.severity)}`}>
{cve.severity || 'N/A'}
</span>
<span className="text-sm text-gray-500">
CVSS: {cve.cvss_score || 'N/A'}
</span>
<span className="text-sm text-gray-500">
{cve.published_date ? formatDate(cve.published_date) : 'N/A'}
</span>
{cve.poc_count > 0 && (
<span className="inline-flex items-center px-2 py-1 text-xs font-semibold rounded-full bg-orange-100 text-orange-800">
🔍 {cve.poc_count} PoC{cve.poc_count > 1 ? 's' : ''}
</span>
)}
</div>
</div>
<button
onClick={() => setSelectedCve(cve)}
className="ml-4 bg-blue-600 hover:bg-blue-700 text-white px-4 py-2 rounded-md"
>
View Details
</button>
</div>
</div>
))
)}
</div>
{/* Pagination */}
{!loadingCves && cves.length > 0 && (
<div className="px-6 py-4 border-t border-gray-200 flex items-center justify-between">
<div className="text-sm text-gray-500">
Showing {cvePagination.skip + 1} to {Math.min(cvePagination.skip + cvePagination.limit, cvePagination.total)} of {cvePagination.total} CVEs
</div>
<div className="flex space-x-2">
<button
onClick={() => setSelectedCve(cve)}
className="ml-4 bg-blue-600 hover:bg-blue-700 text-white px-4 py-2 rounded-md"
onClick={() => handlePageChange(Math.max(0, cvePagination.skip - cvePagination.limit))}
disabled={cvePagination.skip === 0}
className="px-3 py-2 text-sm font-medium text-gray-500 bg-white border border-gray-300 rounded-md hover:bg-gray-50 disabled:opacity-50 disabled:cursor-not-allowed"
>
View Details
Previous
</button>
<button
onClick={() => handlePageChange(cvePagination.skip + cvePagination.limit)}
disabled={!cvePagination.hasMore}
className="px-3 py-2 text-sm font-medium text-gray-500 bg-white border border-gray-300 rounded-md hover:bg-gray-50 disabled:opacity-50 disabled:cursor-not-allowed"
>
Next
</button>
</div>
</div>
))}
)}
</div>
</div>
);
);
};
const SigmaRulesList = () => (
<div className="bg-white rounded-lg shadow">
@ -970,6 +981,98 @@ function App() {
</div>
)}
{/* References Section */}
{cve.reference_urls && cve.reference_urls.length > 0 && (
<div>
<h3 className="text-lg font-medium text-gray-900 mb-2">References</h3>
<div className="bg-gray-50 p-4 rounded-md">
<ul className="space-y-2">
{cve.reference_urls.map((url, index) => (
<li key={index}>
<a
href={url}
target="_blank"
rel="noopener noreferrer"
className="text-sm text-blue-600 hover:text-blue-800 underline break-all"
>
{url}
</a>
</li>
))}
</ul>
</div>
</div>
)}
{/* Exploit/PoC Links Section */}
{cve.poc_data && Object.keys(cve.poc_data).length > 0 && (
<div>
<h3 className="text-lg font-medium text-gray-900 mb-2">Exploits & Proof of Concepts</h3>
<div className="space-y-4">
{Object.entries(cve.poc_data).map(([source, data]) => (
<div key={source} className="border border-gray-200 rounded-md p-4">
<div className="flex items-center justify-between mb-3">
<h4 className="font-medium text-gray-900 capitalize">{source}</h4>
<span className="inline-flex items-center px-2 py-1 text-xs font-semibold rounded-full bg-blue-100 text-blue-800">
{data.exploits?.length || data.pocs?.length || 0} items
</span>
</div>
{(data.exploits || data.pocs || []).slice(0, 5).map((item, index) => (
<div key={index} className="mb-3 p-3 bg-gray-50 rounded">
<div className="flex items-start justify-between">
<div className="flex-1">
<h5 className="text-sm font-medium text-gray-900">
{item.title || item.name || item.description || 'Untitled'}
</h5>
{item.description && (
<p className="text-xs text-gray-600 mt-1 line-clamp-2">
{item.description}
</p>
)}
<div className="flex items-center mt-2 space-x-3">
{item.html_url && (
<a
href={item.html_url}
target="_blank"
rel="noopener noreferrer"
className="text-xs text-blue-600 hover:text-blue-800 underline"
>
View Source
</a>
)}
{item.quality_analysis && (
<span className={`inline-flex px-2 py-1 text-xs font-semibold rounded-full ${
item.quality_analysis.quality_tier === 'excellent' ? 'bg-green-100 text-green-800' :
item.quality_analysis.quality_tier === 'good' ? 'bg-blue-100 text-blue-800' :
item.quality_analysis.quality_tier === 'fair' ? 'bg-yellow-100 text-yellow-800' :
'bg-gray-100 text-gray-800'
}`}>
{item.quality_analysis.quality_tier}
</span>
)}
{item.stargazers_count && (
<span className="text-xs text-gray-500">
{item.stargazers_count}
</span>
)}
</div>
</div>
</div>
</div>
))}
{(data.exploits || data.pocs || []).length > 5 && (
<p className="text-xs text-gray-500 mt-2">
... and {(data.exploits || data.pocs || []).length - 5} more items
</p>
)}
</div>
))}
</div>
</div>
)}
<div>
<h3 className="text-lg font-medium text-gray-900 mb-2">Generated SIGMA Rules ({cveRules.length})</h3>
{cveRules.length > 0 ? (
@ -1044,177 +1147,6 @@ function App() {
);
};
const BulkJobsList = () => (
<div className="space-y-6">
<div className="flex justify-between items-center">
<h1 className="text-2xl font-bold text-gray-900">Bulk Processing Jobs</h1>
<button
onClick={fetchData}
className="bg-blue-600 hover:bg-blue-700 text-white px-4 py-2 rounded-md text-sm"
>
Refresh
</button>
</div>
{/* Bulk Status Overview */}
<div className="bg-white rounded-lg shadow p-6">
<h2 className="text-lg font-bold text-gray-900 mb-4">System Status</h2>
{bulkStatus.database_stats && (
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
<div className="text-center">
<div className="text-2xl font-bold text-blue-600">{bulkStatus.database_stats.total_cves}</div>
<div className="text-sm text-gray-500">Total CVEs</div>
</div>
<div className="text-center">
<div className="text-2xl font-bold text-green-600">{bulkStatus.database_stats.bulk_processed_cves}</div>
<div className="text-sm text-gray-500">Bulk Processed</div>
</div>
<div className="text-center">
<div className="text-2xl font-bold text-purple-600">{bulkStatus.database_stats.cves_with_pocs}</div>
<div className="text-sm text-gray-500">With PoCs</div>
</div>
<div className="text-center">
<div className="text-2xl font-bold text-indigo-600">{bulkStatus.database_stats.nomi_sec_rules}</div>
<div className="text-sm text-gray-500">Enhanced Rules</div>
</div>
</div>
)}
</div>
{/* Running Jobs */}
{bulkJobs.some(job => job.status === 'running' || job.status === 'pending') && (
<div className="bg-white rounded-lg shadow">
<div className="px-6 py-4 border-b border-gray-200">
<h2 className="text-lg font-bold text-gray-900">Running Jobs</h2>
</div>
<div className="divide-y divide-gray-200">
{bulkJobs
.filter(job => job.status === 'running' || job.status === 'pending')
.map((job) => (
<div key={job.id} className="px-6 py-4 bg-blue-50">
<div className="flex items-center justify-between">
<div className="flex-1">
<div className="flex items-center space-x-3">
<h3 className="text-lg font-medium text-gray-900">{job.job_type}</h3>
<span className={`inline-flex px-2 py-1 text-xs font-semibold rounded-full ${
job.status === 'running' ? 'bg-blue-100 text-blue-800' :
'bg-gray-100 text-gray-800'
}`}>
{job.status}
</span>
</div>
<div className="mt-2 flex items-center space-x-6 text-sm text-gray-500">
<span>Started: {formatDate(job.started_at)}</span>
{job.year && <span>Year: {job.year}</span>}
</div>
{job.total_items > 0 && (
<div className="mt-2">
<div className="flex items-center space-x-4 text-sm text-gray-600">
<span>Progress: {job.processed_items}/{job.total_items}</span>
{job.failed_items > 0 && (
<span className="text-red-600">Failed: {job.failed_items}</span>
)}
</div>
<div className="mt-1 w-full bg-gray-200 rounded-full h-2">
<div
className="bg-blue-600 h-2 rounded-full"
style={{ width: `${(job.processed_items / job.total_items) * 100}%` }}
></div>
</div>
</div>
)}
</div>
<div className="flex-shrink-0 ml-4">
<button
onClick={() => cancelJob(job.id)}
className="bg-red-600 hover:bg-red-700 text-white px-3 py-1 rounded-md text-sm font-medium"
>
Cancel
</button>
</div>
</div>
</div>
))}
</div>
</div>
)}
{/* Recent Jobs */}
<div className="bg-white rounded-lg shadow">
<div className="px-6 py-4 border-b border-gray-200">
<h2 className="text-lg font-bold text-gray-900">Recent Jobs</h2>
</div>
<div className="divide-y divide-gray-200">
{bulkJobs.length === 0 ? (
<div className="px-6 py-8 text-center text-gray-500">
No bulk processing jobs found
</div>
) : (
bulkJobs.map((job) => (
<div key={job.id} className="px-6 py-4">
<div className="flex items-center justify-between">
<div className="flex-1">
<div className="flex items-center space-x-3">
<h3 className="text-lg font-medium text-gray-900">{job.job_type}</h3>
<span className={`inline-flex px-2 py-1 text-xs font-semibold rounded-full ${
job.status === 'completed' ? 'bg-green-100 text-green-800' :
job.status === 'running' ? 'bg-blue-100 text-blue-800' :
job.status === 'failed' ? 'bg-red-100 text-red-800' :
job.status === 'cancelled' ? 'bg-orange-100 text-orange-800' :
'bg-gray-100 text-gray-800'
}`}>
{job.status}
</span>
</div>
<div className="mt-2 flex items-center space-x-6 text-sm text-gray-500">
<span>Started: {formatDate(job.started_at)}</span>
{job.completed_at && (
<span>Completed: {formatDate(job.completed_at)}</span>
)}
{job.year && (
<span>Year: {job.year}</span>
)}
</div>
{job.total_items > 0 && (
<div className="mt-2">
<div className="flex items-center space-x-4 text-sm text-gray-600">
<span>Progress: {job.processed_items}/{job.total_items}</span>
{job.failed_items > 0 && (
<span className="text-red-600">Failed: {job.failed_items}</span>
)}
</div>
<div className="mt-1 w-full bg-gray-200 rounded-full h-2">
<div
className="bg-blue-600 h-2 rounded-full"
style={{ width: `${(job.processed_items / job.total_items) * 100}%` }}
></div>
</div>
</div>
)}
{job.error_message && (
<div className="mt-2 p-2 bg-red-50 border border-red-200 rounded text-sm text-red-700">
{job.error_message}
</div>
)}
</div>
<div className="flex-shrink-0 ml-4">
{(job.status === 'running' || job.status === 'pending') && (
<button
onClick={() => cancelJob(job.id)}
className="bg-red-600 hover:bg-red-700 text-white px-3 py-1 rounded-md text-sm font-medium"
>
Cancel
</button>
)}
</div>
</div>
</div>
))
)}
</div>
</div>
</div>
);
// Note: SchedulerManager component removed - job scheduling now handled by Celery Beat
// Task monitoring available via Flower dashboard at http://localhost:5555
@ -1270,16 +1202,6 @@ function App() {
>
SIGMA Rules
</button>
<button
onClick={() => setActiveTab('bulk-jobs')}
className={`inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium ${
activeTab === 'bulk-jobs'
? 'border-blue-500 text-gray-900'
: 'border-transparent text-gray-500 hover:text-gray-700 hover:border-gray-300'
}`}
>
Bulk Jobs
</button>
</div>
</div>
</div>
@ -1291,7 +1213,6 @@ function App() {
{activeTab === 'dashboard' && <Dashboard />}
{activeTab === 'cves' && <CVEList />}
{activeTab === 'rules' && <SigmaRulesList />}
{activeTab === 'bulk-jobs' && <BulkJobsList />}
</div>
</main>