more updates for bulk
This commit is contained in:
parent
5a9ae34996
commit
790e4bd91f
11 changed files with 2500 additions and 22 deletions
35
README.md
35
README.md
|
@ -1,20 +1,27 @@
|
|||
# CVE-SIGMA Auto Generator
|
||||
# CVE-SIGMA Auto Generator (Enhanced)
|
||||
|
||||
An automated platform that fetches CVE data and automatically generates SIGMA rules for threat detection.
|
||||
An advanced automated platform that processes comprehensive CVE data and generates enhanced SIGMA rules for threat detection using curated exploit intelligence.
|
||||
|
||||
## Features
|
||||
## 🚀 Enhanced Features
|
||||
|
||||
- **Automated CVE Fetching**: Regularly polls the NVD (National Vulnerability Database) for CVEs from July 2025
|
||||
- **GitHub Exploit Analysis**: Automatically searches GitHub for exploit code related to each CVE
|
||||
- **Intelligent SIGMA Rule Generation**: Creates SIGMA rules based on CVE characteristics AND actual exploit code
|
||||
- **Exploit-Based Detection**: Enhanced rules using real indicators extracted from GitHub exploits
|
||||
- **Modern Web Interface**: React-based UI for browsing CVEs and managing SIGMA rules
|
||||
- **Real-time Updates**: Background tasks keep CVE data current with current 2025 vulnerabilities
|
||||
- **Rule Templates**: Configurable templates for different types of vulnerabilities
|
||||
- **MITRE ATT&CK Mapping**: Automatic mapping to MITRE ATT&CK techniques
|
||||
- **API Testing**: Built-in NVD API connectivity testing
|
||||
- **Enhanced Error Handling**: Robust fallback mechanisms and detailed logging
|
||||
- **Docker Compose**: Easy deployment and orchestration
|
||||
### Data Processing
|
||||
- **Bulk NVD Processing**: Downloads and processes complete NVD JSON datasets (2002-2025)
|
||||
- **nomi-sec PoC Integration**: Uses curated PoC data from github.com/nomi-sec/PoC-in-GitHub
|
||||
- **Incremental Updates**: Efficient updates using NVD modified/recent feeds
|
||||
- **Quality Assessment**: Advanced PoC quality scoring with star count, recency, and relevance analysis
|
||||
|
||||
### Intelligence Generation
|
||||
- **Enhanced SIGMA Rules**: Creates rules using real exploit indicators from curated PoCs
|
||||
- **Quality Tiers**: Excellent, Good, Fair, Poor, Very Poor classification system
|
||||
- **Smart Template Selection**: AI-driven template matching based on PoC characteristics
|
||||
- **Advanced Indicator Extraction**: Processes, files, network, registry, and command patterns
|
||||
- **MITRE ATT&CK Mapping**: Automatic technique identification based on exploit analysis
|
||||
|
||||
### User Experience
|
||||
- **Modern Web Interface**: React-based UI with enhanced bulk processing controls
|
||||
- **Real-time Monitoring**: Live job tracking and progress monitoring
|
||||
- **Comprehensive Statistics**: PoC coverage, quality metrics, and processing status
|
||||
- **Bulk Operations Dashboard**: Centralized control for all data processing operations
|
||||
|
||||
## Architecture
|
||||
|
||||
|
|
340
backend/bulk_seeder.py
Normal file
340
backend/bulk_seeder.py
Normal file
|
@ -0,0 +1,340 @@
|
|||
"""
|
||||
Bulk Data Seeding Coordinator
|
||||
Orchestrates the complete bulk seeding process using NVD JSON feeds and nomi-sec PoC data
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from nvd_bulk_processor import NVDBulkProcessor
|
||||
from nomi_sec_client import NomiSecClient
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class BulkSeeder:
|
||||
"""Coordinates bulk seeding operations"""
|
||||
|
||||
def __init__(self, db_session: Session):
|
||||
self.db_session = db_session
|
||||
self.nvd_processor = NVDBulkProcessor(db_session)
|
||||
self.nomi_sec_client = NomiSecClient(db_session)
|
||||
|
||||
async def full_bulk_seed(self, start_year: int = 2002,
|
||||
end_year: Optional[int] = None,
|
||||
skip_nvd: bool = False,
|
||||
skip_nomi_sec: bool = False) -> dict:
|
||||
"""
|
||||
Perform complete bulk seeding operation
|
||||
|
||||
Args:
|
||||
start_year: Starting year for NVD data (default: 2002)
|
||||
end_year: Ending year for NVD data (default: current year)
|
||||
skip_nvd: Skip NVD bulk processing (default: False)
|
||||
skip_nomi_sec: Skip nomi-sec PoC synchronization (default: False)
|
||||
|
||||
Returns:
|
||||
Dictionary containing operation results
|
||||
"""
|
||||
if end_year is None:
|
||||
end_year = datetime.now().year
|
||||
|
||||
results = {
|
||||
'start_time': datetime.utcnow(),
|
||||
'nvd_results': None,
|
||||
'nomi_sec_results': None,
|
||||
'total_time': None,
|
||||
'status': 'running'
|
||||
}
|
||||
|
||||
logger.info(f"Starting full bulk seed operation ({start_year}-{end_year})")
|
||||
|
||||
try:
|
||||
# Phase 1: NVD Bulk Processing
|
||||
if not skip_nvd:
|
||||
logger.info("Phase 1: Starting NVD bulk processing...")
|
||||
nvd_results = await self.nvd_processor.bulk_seed_database(
|
||||
start_year=start_year,
|
||||
end_year=end_year
|
||||
)
|
||||
results['nvd_results'] = nvd_results
|
||||
logger.info(f"Phase 1 complete: {nvd_results['total_processed']} CVEs processed")
|
||||
else:
|
||||
logger.info("Phase 1: Skipping NVD bulk processing")
|
||||
|
||||
# Phase 2: nomi-sec PoC Synchronization
|
||||
if not skip_nomi_sec:
|
||||
logger.info("Phase 2: Starting nomi-sec PoC synchronization...")
|
||||
nomi_sec_results = await self.nomi_sec_client.bulk_sync_all_cves(
|
||||
batch_size=50 # Smaller batches for API stability
|
||||
)
|
||||
results['nomi_sec_results'] = nomi_sec_results
|
||||
logger.info(f"Phase 2 complete: {nomi_sec_results['total_pocs_found']} PoCs found")
|
||||
else:
|
||||
logger.info("Phase 2: Skipping nomi-sec PoC synchronization")
|
||||
|
||||
# Phase 3: Generate Enhanced SIGMA Rules
|
||||
logger.info("Phase 3: Generating enhanced SIGMA rules...")
|
||||
sigma_results = await self.generate_enhanced_sigma_rules()
|
||||
results['sigma_results'] = sigma_results
|
||||
logger.info(f"Phase 3 complete: {sigma_results['rules_generated']} rules generated")
|
||||
|
||||
results['status'] = 'completed'
|
||||
results['end_time'] = datetime.utcnow()
|
||||
results['total_time'] = (results['end_time'] - results['start_time']).total_seconds()
|
||||
|
||||
logger.info(f"Full bulk seed operation completed in {results['total_time']:.2f} seconds")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Bulk seed operation failed: {e}")
|
||||
results['status'] = 'failed'
|
||||
results['error'] = str(e)
|
||||
results['end_time'] = datetime.utcnow()
|
||||
|
||||
return results
|
||||
|
||||
async def incremental_update(self) -> dict:
|
||||
"""
|
||||
Perform incremental update operation
|
||||
|
||||
Returns:
|
||||
Dictionary containing update results
|
||||
"""
|
||||
results = {
|
||||
'start_time': datetime.utcnow(),
|
||||
'nvd_update': None,
|
||||
'nomi_sec_update': None,
|
||||
'status': 'running'
|
||||
}
|
||||
|
||||
logger.info("Starting incremental update...")
|
||||
|
||||
try:
|
||||
# Update NVD data using modified/recent feeds
|
||||
logger.info("Updating NVD data...")
|
||||
nvd_update = await self.nvd_processor.incremental_update()
|
||||
results['nvd_update'] = nvd_update
|
||||
|
||||
# Update PoC data for newly added/modified CVEs
|
||||
if nvd_update['total_processed'] > 0:
|
||||
logger.info("Updating PoC data for modified CVEs...")
|
||||
# Get recently modified CVEs and sync their PoCs
|
||||
recent_cves = await self._get_recently_modified_cves()
|
||||
nomi_sec_update = await self._sync_specific_cves(recent_cves)
|
||||
results['nomi_sec_update'] = nomi_sec_update
|
||||
|
||||
results['status'] = 'completed'
|
||||
results['end_time'] = datetime.utcnow()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Incremental update failed: {e}")
|
||||
results['status'] = 'failed'
|
||||
results['error'] = str(e)
|
||||
results['end_time'] = datetime.utcnow()
|
||||
|
||||
return results
|
||||
|
||||
async def generate_enhanced_sigma_rules(self) -> dict:
|
||||
"""Generate enhanced SIGMA rules using nomi-sec PoC data"""
|
||||
from main import CVE, SigmaRule
|
||||
|
||||
# Import the enhanced rule generator
|
||||
from enhanced_sigma_generator import EnhancedSigmaGenerator
|
||||
|
||||
generator = EnhancedSigmaGenerator(self.db_session)
|
||||
|
||||
# Get all CVEs that have PoC data but no enhanced rules
|
||||
cves_with_pocs = self.db_session.query(CVE).filter(
|
||||
CVE.poc_count > 0
|
||||
).all()
|
||||
|
||||
rules_generated = 0
|
||||
rules_updated = 0
|
||||
|
||||
for cve in cves_with_pocs:
|
||||
try:
|
||||
# Check if we need to generate/update the rule
|
||||
existing_rule = self.db_session.query(SigmaRule).filter(
|
||||
SigmaRule.cve_id == cve.cve_id
|
||||
).first()
|
||||
|
||||
if existing_rule and existing_rule.poc_source == 'nomi_sec':
|
||||
# Rule already exists and is up to date
|
||||
continue
|
||||
|
||||
# Generate enhanced rule
|
||||
rule_result = await generator.generate_enhanced_rule(cve)
|
||||
|
||||
if rule_result['success']:
|
||||
if existing_rule:
|
||||
rules_updated += 1
|
||||
else:
|
||||
rules_generated += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating rule for {cve.cve_id}: {e}")
|
||||
continue
|
||||
|
||||
self.db_session.commit()
|
||||
|
||||
return {
|
||||
'rules_generated': rules_generated,
|
||||
'rules_updated': rules_updated,
|
||||
'total_processed': len(cves_with_pocs)
|
||||
}
|
||||
|
||||
async def _get_recently_modified_cves(self, hours: int = 24) -> list:
|
||||
"""Get CVEs modified within the last N hours"""
|
||||
from main import CVE
|
||||
|
||||
cutoff_time = datetime.utcnow() - timedelta(hours=hours)
|
||||
|
||||
recent_cves = self.db_session.query(CVE).filter(
|
||||
CVE.updated_at >= cutoff_time
|
||||
).all()
|
||||
|
||||
return [cve.cve_id for cve in recent_cves]
|
||||
|
||||
async def _sync_specific_cves(self, cve_ids: list) -> dict:
|
||||
"""Sync PoC data for specific CVEs"""
|
||||
total_processed = 0
|
||||
total_pocs_found = 0
|
||||
|
||||
for cve_id in cve_ids:
|
||||
try:
|
||||
result = await self.nomi_sec_client.sync_cve_pocs(cve_id)
|
||||
total_processed += 1
|
||||
total_pocs_found += result.get('pocs_found', 0)
|
||||
|
||||
# Small delay to avoid overwhelming the API
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing PoCs for {cve_id}: {e}")
|
||||
continue
|
||||
|
||||
return {
|
||||
'total_processed': total_processed,
|
||||
'total_pocs_found': total_pocs_found
|
||||
}
|
||||
|
||||
async def get_seeding_status(self) -> dict:
|
||||
"""Get current seeding status and statistics"""
|
||||
from main import CVE, SigmaRule, BulkProcessingJob
|
||||
|
||||
# Get database statistics
|
||||
total_cves = self.db_session.query(CVE).count()
|
||||
bulk_processed_cves = self.db_session.query(CVE).filter(
|
||||
CVE.bulk_processed == True
|
||||
).count()
|
||||
|
||||
cves_with_pocs = self.db_session.query(CVE).filter(
|
||||
CVE.poc_count > 0
|
||||
).count()
|
||||
|
||||
total_rules = self.db_session.query(SigmaRule).count()
|
||||
nomi_sec_rules = self.db_session.query(SigmaRule).filter(
|
||||
SigmaRule.poc_source == 'nomi_sec'
|
||||
).count()
|
||||
|
||||
# Get recent job status
|
||||
recent_jobs = self.db_session.query(BulkProcessingJob).order_by(
|
||||
BulkProcessingJob.created_at.desc()
|
||||
).limit(5).all()
|
||||
|
||||
job_status = []
|
||||
for job in recent_jobs:
|
||||
job_status.append({
|
||||
'id': str(job.id),
|
||||
'job_type': job.job_type,
|
||||
'status': job.status,
|
||||
'created_at': job.created_at,
|
||||
'completed_at': job.completed_at,
|
||||
'processed_items': job.processed_items,
|
||||
'total_items': job.total_items,
|
||||
'failed_items': job.failed_items
|
||||
})
|
||||
|
||||
return {
|
||||
'database_stats': {
|
||||
'total_cves': total_cves,
|
||||
'bulk_processed_cves': bulk_processed_cves,
|
||||
'cves_with_pocs': cves_with_pocs,
|
||||
'total_rules': total_rules,
|
||||
'nomi_sec_rules': nomi_sec_rules,
|
||||
'poc_coverage': (cves_with_pocs / total_cves * 100) if total_cves > 0 else 0,
|
||||
'nomi_sec_coverage': (nomi_sec_rules / total_rules * 100) if total_rules > 0 else 0
|
||||
},
|
||||
'recent_jobs': job_status,
|
||||
'nvd_data_status': await self._get_nvd_data_status(),
|
||||
'nomi_sec_status': await self.nomi_sec_client.get_sync_status()
|
||||
}
|
||||
|
||||
async def _get_nvd_data_status(self) -> dict:
|
||||
"""Get NVD data status"""
|
||||
from main import CVE
|
||||
|
||||
# Get year distribution
|
||||
year_counts = {}
|
||||
cves = self.db_session.query(CVE).all()
|
||||
|
||||
for cve in cves:
|
||||
if cve.published_date:
|
||||
year = cve.published_date.year
|
||||
year_counts[year] = year_counts.get(year, 0) + 1
|
||||
|
||||
# Get source distribution
|
||||
source_counts = {}
|
||||
for cve in cves:
|
||||
source = cve.data_source or 'unknown'
|
||||
source_counts[source] = source_counts.get(source, 0) + 1
|
||||
|
||||
return {
|
||||
'year_distribution': year_counts,
|
||||
'source_distribution': source_counts,
|
||||
'total_cves': len(cves),
|
||||
'date_range': {
|
||||
'earliest': min(cve.published_date for cve in cves if cve.published_date),
|
||||
'latest': max(cve.published_date for cve in cves if cve.published_date)
|
||||
} if cves else None
|
||||
}
|
||||
|
||||
|
||||
# Standalone script functionality
|
||||
async def main():
|
||||
"""Main function for standalone execution"""
|
||||
from main import SessionLocal, engine, Base
|
||||
|
||||
# Create tables
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
# Create database session
|
||||
db_session = SessionLocal()
|
||||
|
||||
try:
|
||||
# Create bulk seeder
|
||||
seeder = BulkSeeder(db_session)
|
||||
|
||||
# Get current status
|
||||
status = await seeder.get_seeding_status()
|
||||
print(f"Current Status: {status['database_stats']['total_cves']} CVEs in database")
|
||||
|
||||
# Perform full bulk seed if database is empty
|
||||
if status['database_stats']['total_cves'] == 0:
|
||||
print("Database is empty. Starting full bulk seed...")
|
||||
results = await seeder.full_bulk_seed(start_year=2020) # Start from 2020 for faster testing
|
||||
print(f"Bulk seed completed: {results}")
|
||||
else:
|
||||
print("Database contains data. Running incremental update...")
|
||||
results = await seeder.incremental_update()
|
||||
print(f"Incremental update completed: {results}")
|
||||
|
||||
finally:
|
||||
db_session.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
438
backend/enhanced_sigma_generator.py
Normal file
438
backend/enhanced_sigma_generator.py
Normal file
|
@ -0,0 +1,438 @@
|
|||
"""
|
||||
Enhanced SIGMA Rule Generator
|
||||
Generates improved SIGMA rules using nomi-sec PoC data and traditional indicators
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
import re
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class EnhancedSigmaGenerator:
|
||||
"""Enhanced SIGMA rule generator using nomi-sec PoC data"""
|
||||
|
||||
def __init__(self, db_session: Session):
|
||||
self.db_session = db_session
|
||||
|
||||
async def generate_enhanced_rule(self, cve) -> dict:
|
||||
"""Generate enhanced SIGMA rule for a CVE using PoC data"""
|
||||
from main import SigmaRule, RuleTemplate
|
||||
|
||||
try:
|
||||
# Get PoC data
|
||||
poc_data = cve.poc_data or []
|
||||
|
||||
# Find the best quality PoC
|
||||
best_poc = None
|
||||
if poc_data:
|
||||
best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0))
|
||||
|
||||
# Select appropriate template based on PoC analysis
|
||||
template = await self._select_template(cve, best_poc)
|
||||
|
||||
if not template:
|
||||
logger.warning(f"No suitable template found for {cve.cve_id}")
|
||||
return {'success': False, 'error': 'No suitable template'}
|
||||
|
||||
# Generate rule content
|
||||
rule_content = await self._generate_rule_content(cve, template, poc_data)
|
||||
|
||||
# Calculate confidence level
|
||||
confidence_level = self._calculate_confidence_level(cve, poc_data)
|
||||
|
||||
# Store or update SIGMA rule
|
||||
existing_rule = self.db_session.query(SigmaRule).filter(
|
||||
SigmaRule.cve_id == cve.cve_id
|
||||
).first()
|
||||
|
||||
rule_data = {
|
||||
'cve_id': cve.cve_id,
|
||||
'rule_name': f"{cve.cve_id} Enhanced Detection",
|
||||
'rule_content': rule_content,
|
||||
'detection_type': template.template_name,
|
||||
'log_source': self._extract_log_source(template.template_name),
|
||||
'confidence_level': confidence_level,
|
||||
'auto_generated': True,
|
||||
'exploit_based': len(poc_data) > 0,
|
||||
'poc_source': 'nomi_sec',
|
||||
'poc_quality_score': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0,
|
||||
'nomi_sec_data': {
|
||||
'total_pocs': len(poc_data),
|
||||
'best_poc_quality': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0,
|
||||
'total_stars': sum(p.get('stargazers_count', 0) for p in poc_data),
|
||||
'avg_stars': sum(p.get('stargazers_count', 0) for p in poc_data) / len(poc_data) if poc_data else 0
|
||||
},
|
||||
'github_repos': [p.get('html_url', '') for p in poc_data],
|
||||
'exploit_indicators': json.dumps(self._combine_exploit_indicators(poc_data)),
|
||||
'updated_at': datetime.utcnow()
|
||||
}
|
||||
|
||||
if existing_rule:
|
||||
# Update existing rule
|
||||
for key, value in rule_data.items():
|
||||
setattr(existing_rule, key, value)
|
||||
logger.info(f"Updated SIGMA rule for {cve.cve_id}")
|
||||
else:
|
||||
# Create new rule
|
||||
new_rule = SigmaRule(**rule_data)
|
||||
self.db_session.add(new_rule)
|
||||
logger.info(f"Created new SIGMA rule for {cve.cve_id}")
|
||||
|
||||
self.db_session.commit()
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'cve_id': cve.cve_id,
|
||||
'template': template.template_name,
|
||||
'confidence_level': confidence_level,
|
||||
'poc_count': len(poc_data),
|
||||
'quality_score': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating enhanced rule for {cve.cve_id}: {e}")
|
||||
return {'success': False, 'error': str(e)}
|
||||
|
||||
async def _select_template(self, cve, best_poc: Optional[dict]) -> Optional[object]:
|
||||
"""Select the most appropriate template based on CVE and PoC analysis"""
|
||||
from main import RuleTemplate
|
||||
|
||||
templates = self.db_session.query(RuleTemplate).all()
|
||||
|
||||
if not templates:
|
||||
logger.warning("No rule templates found in database")
|
||||
return None
|
||||
|
||||
# Score templates based on relevance
|
||||
template_scores = {}
|
||||
|
||||
for template in templates:
|
||||
score = 0
|
||||
|
||||
# Score based on PoC indicators (highest priority)
|
||||
if best_poc:
|
||||
indicators = best_poc.get('exploit_indicators', {})
|
||||
score += self._score_template_poc_match(template, indicators)
|
||||
|
||||
# Score based on CVE description
|
||||
score += self._score_template_cve_match(template, cve)
|
||||
|
||||
# Score based on affected products
|
||||
if cve.affected_products:
|
||||
score += self._score_template_product_match(template, cve.affected_products)
|
||||
|
||||
template_scores[template] = score
|
||||
|
||||
# Return template with highest score
|
||||
if template_scores:
|
||||
best_template = max(template_scores, key=template_scores.get)
|
||||
logger.info(f"Selected template {best_template.template_name} with score {template_scores[best_template]}")
|
||||
return best_template
|
||||
|
||||
return None
|
||||
|
||||
def _score_template_poc_match(self, template: object, indicators: dict) -> int:
|
||||
"""Score template based on PoC indicators"""
|
||||
score = 0
|
||||
template_name = template.template_name.lower()
|
||||
|
||||
# Process-based templates
|
||||
if 'process' in template_name or 'execution' in template_name:
|
||||
if indicators.get('processes') or indicators.get('commands'):
|
||||
score += 30
|
||||
|
||||
# Network-based templates
|
||||
if 'network' in template_name or 'connection' in template_name:
|
||||
if indicators.get('network') or indicators.get('urls'):
|
||||
score += 30
|
||||
|
||||
# File-based templates
|
||||
if 'file' in template_name or 'modification' in template_name:
|
||||
if indicators.get('files'):
|
||||
score += 30
|
||||
|
||||
# PowerShell templates
|
||||
if 'powershell' in template_name:
|
||||
processes = indicators.get('processes', [])
|
||||
if any('powershell' in p.lower() for p in processes):
|
||||
score += 35
|
||||
|
||||
return score
|
||||
|
||||
def _score_template_cve_match(self, template: object, cve) -> int:
|
||||
"""Score template based on CVE description"""
|
||||
score = 0
|
||||
template_name = template.template_name.lower()
|
||||
description = (cve.description or '').lower()
|
||||
|
||||
# Keyword matching
|
||||
if 'remote' in description and 'execution' in description:
|
||||
if 'process' in template_name or 'execution' in template_name:
|
||||
score += 20
|
||||
|
||||
if 'powershell' in description:
|
||||
if 'powershell' in template_name:
|
||||
score += 25
|
||||
|
||||
if 'network' in description or 'http' in description:
|
||||
if 'network' in template_name:
|
||||
score += 20
|
||||
|
||||
if 'file' in description or 'upload' in description:
|
||||
if 'file' in template_name:
|
||||
score += 20
|
||||
|
||||
return score
|
||||
|
||||
def _score_template_product_match(self, template: object, affected_products: list) -> int:
|
||||
"""Score template based on affected products"""
|
||||
score = 0
|
||||
|
||||
if not template.applicable_product_patterns:
|
||||
return 0
|
||||
|
||||
for pattern in template.applicable_product_patterns:
|
||||
pattern_lower = pattern.lower()
|
||||
for product in affected_products:
|
||||
product_lower = product.lower()
|
||||
if pattern_lower in product_lower:
|
||||
score += 10
|
||||
break
|
||||
|
||||
return score
|
||||
|
||||
async def _generate_rule_content(self, cve, template: object, poc_data: list) -> str:
|
||||
"""Generate the actual SIGMA rule content"""
|
||||
# Combine all exploit indicators
|
||||
combined_indicators = self._combine_exploit_indicators(poc_data)
|
||||
|
||||
# Get base template content
|
||||
rule_content = template.template_content
|
||||
|
||||
# Replace template placeholders
|
||||
replacements = {
|
||||
'{{CVE_ID}}': cve.cve_id,
|
||||
'{{TITLE}}': f"{cve.cve_id} Enhanced Detection",
|
||||
'{{DESCRIPTION}}': self._generate_description(cve, poc_data),
|
||||
'{{LEVEL}}': self._calculate_confidence_level(cve, poc_data).lower(),
|
||||
'{{REFERENCES}}': self._generate_references(cve, poc_data),
|
||||
'{{TAGS}}': self._generate_tags(cve, poc_data),
|
||||
'{{PROCESSES}}': self._format_indicators(combined_indicators.get('processes', [])),
|
||||
'{{FILES}}': self._format_indicators(combined_indicators.get('files', [])),
|
||||
'{{COMMANDS}}': self._format_indicators(combined_indicators.get('commands', [])),
|
||||
'{{NETWORK}}': self._format_indicators(combined_indicators.get('network', [])),
|
||||
'{{URLS}}': self._format_indicators(combined_indicators.get('urls', [])),
|
||||
'{{REGISTRY}}': self._format_indicators(combined_indicators.get('registry', []))
|
||||
}
|
||||
|
||||
# Apply replacements
|
||||
for placeholder, value in replacements.items():
|
||||
rule_content = rule_content.replace(placeholder, value)
|
||||
|
||||
# Add enhanced detection based on PoC quality
|
||||
if poc_data:
|
||||
rule_content = self._enhance_detection_logic(rule_content, combined_indicators, poc_data)
|
||||
|
||||
return rule_content
|
||||
|
||||
def _combine_exploit_indicators(self, poc_data: list) -> dict:
|
||||
"""Combine exploit indicators from all PoCs"""
|
||||
combined = {
|
||||
'processes': [],
|
||||
'files': [],
|
||||
'commands': [],
|
||||
'network': [],
|
||||
'urls': [],
|
||||
'registry': []
|
||||
}
|
||||
|
||||
for poc in poc_data:
|
||||
indicators = poc.get('exploit_indicators', {})
|
||||
for key in combined.keys():
|
||||
if key in indicators:
|
||||
combined[key].extend(indicators[key])
|
||||
|
||||
# Deduplicate and filter
|
||||
for key in combined.keys():
|
||||
combined[key] = list(set(combined[key]))
|
||||
# Remove empty and invalid entries
|
||||
combined[key] = [item for item in combined[key] if item and len(item) > 2]
|
||||
|
||||
return combined
|
||||
|
||||
def _generate_description(self, cve, poc_data: list) -> str:
|
||||
"""Generate enhanced rule description"""
|
||||
base_desc = f"Detection for {cve.cve_id}"
|
||||
|
||||
if cve.description:
|
||||
# Extract key terms from CVE description
|
||||
desc_words = cve.description.lower().split()
|
||||
key_terms = [word for word in desc_words if word in [
|
||||
'remote', 'execution', 'injection', 'bypass', 'privilege', 'escalation',
|
||||
'overflow', 'disclosure', 'traversal', 'deserialization'
|
||||
]]
|
||||
|
||||
if key_terms:
|
||||
base_desc += f" involving {', '.join(set(key_terms[:3]))}"
|
||||
|
||||
if poc_data:
|
||||
total_pocs = len(poc_data)
|
||||
total_stars = sum(p.get('stargazers_count', 0) for p in poc_data)
|
||||
base_desc += f" [Enhanced with {total_pocs} PoC(s), {total_stars} stars]"
|
||||
|
||||
return base_desc
|
||||
|
||||
def _generate_references(self, cve, poc_data: list) -> str:
|
||||
"""Generate references section"""
|
||||
refs = []
|
||||
|
||||
# Add CVE reference
|
||||
refs.append(f"https://nvd.nist.gov/vuln/detail/{cve.cve_id}")
|
||||
|
||||
# Add top PoC references (max 3)
|
||||
if poc_data:
|
||||
sorted_pocs = sorted(poc_data, key=lambda x: x.get('stargazers_count', 0), reverse=True)
|
||||
for poc in sorted_pocs[:3]:
|
||||
if poc.get('html_url'):
|
||||
refs.append(poc['html_url'])
|
||||
|
||||
return '\\n'.join(f" - {ref}" for ref in refs)
|
||||
|
||||
def _generate_tags(self, cve, poc_data: list) -> str:
|
||||
"""Generate MITRE ATT&CK tags and other tags"""
|
||||
tags = []
|
||||
|
||||
# CVE tag
|
||||
tags.append(cve.cve_id.lower())
|
||||
|
||||
# Add technique tags based on indicators
|
||||
combined_indicators = self._combine_exploit_indicators(poc_data)
|
||||
|
||||
if combined_indicators.get('processes'):
|
||||
tags.append('attack.t1059') # Command and Scripting Interpreter
|
||||
|
||||
if combined_indicators.get('network'):
|
||||
tags.append('attack.t1071') # Application Layer Protocol
|
||||
|
||||
if combined_indicators.get('files'):
|
||||
tags.append('attack.t1105') # Ingress Tool Transfer
|
||||
|
||||
if any('powershell' in p.lower() for p in combined_indicators.get('processes', [])):
|
||||
tags.append('attack.t1059.001') # PowerShell
|
||||
|
||||
# Add PoC quality tags
|
||||
if poc_data:
|
||||
tags.append('exploit.poc')
|
||||
best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0))
|
||||
quality_tier = best_poc.get('quality_analysis', {}).get('quality_tier', 'poor')
|
||||
tags.append(f'poc.quality.{quality_tier}')
|
||||
|
||||
return '\\n'.join(f" - {tag}" for tag in tags)
|
||||
|
||||
def _format_indicators(self, indicators: list) -> str:
|
||||
"""Format indicators for SIGMA rule"""
|
||||
if not indicators:
|
||||
return ''
|
||||
|
||||
# Limit indicators to avoid overly complex rules
|
||||
limited_indicators = indicators[:10]
|
||||
|
||||
formatted = []
|
||||
for indicator in limited_indicators:
|
||||
# Escape special characters for SIGMA
|
||||
escaped = indicator.replace('\\\\', '\\\\\\\\').replace('*', '\\\\*').replace('?', '\\\\?')
|
||||
formatted.append(f' - "{escaped}"')
|
||||
|
||||
return '\\n'.join(formatted)
|
||||
|
||||
def _enhance_detection_logic(self, rule_content: str, indicators: dict, poc_data: list) -> str:
|
||||
"""Enhance detection logic based on PoC quality and indicators"""
|
||||
|
||||
# If we have high-quality PoCs, add additional detection conditions
|
||||
best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0))
|
||||
quality_score = best_poc.get('quality_analysis', {}).get('quality_score', 0)
|
||||
|
||||
if quality_score > 60: # High quality PoC
|
||||
# Add more specific detection conditions
|
||||
if indicators.get('processes') and indicators.get('commands'):
|
||||
additional_condition = """
|
||||
process_and_command:
|
||||
Image|contains: {{PROCESSES}}
|
||||
CommandLine|contains: {{COMMANDS}}"""
|
||||
|
||||
# Insert before the condition line
|
||||
rule_content = rule_content.replace(
|
||||
'condition: selection',
|
||||
additional_condition + '\\n condition: selection or process_and_command'
|
||||
)
|
||||
|
||||
return rule_content
|
||||
|
||||
def _calculate_confidence_level(self, cve, poc_data: list) -> str:
|
||||
"""Calculate confidence level based on CVE and PoC data"""
|
||||
score = 0
|
||||
|
||||
# CVSS score factor
|
||||
if cve.cvss_score:
|
||||
if cve.cvss_score >= 9.0:
|
||||
score += 40
|
||||
elif cve.cvss_score >= 7.0:
|
||||
score += 30
|
||||
elif cve.cvss_score >= 5.0:
|
||||
score += 20
|
||||
else:
|
||||
score += 10
|
||||
|
||||
# PoC quality factor
|
||||
if poc_data:
|
||||
total_stars = sum(p.get('stargazers_count', 0) for p in poc_data)
|
||||
poc_count = len(poc_data)
|
||||
|
||||
score += min(total_stars, 30) # Max 30 points for stars
|
||||
score += min(poc_count * 5, 20) # Max 20 points for PoC count
|
||||
|
||||
# Quality tier bonus
|
||||
best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0))
|
||||
quality_tier = best_poc.get('quality_analysis', {}).get('quality_tier', 'poor')
|
||||
|
||||
tier_bonus = {
|
||||
'excellent': 20,
|
||||
'good': 15,
|
||||
'fair': 10,
|
||||
'poor': 5,
|
||||
'very_poor': 0
|
||||
}
|
||||
score += tier_bonus.get(quality_tier, 0)
|
||||
|
||||
# Determine confidence level
|
||||
if score >= 80:
|
||||
return 'HIGH'
|
||||
elif score >= 60:
|
||||
return 'MEDIUM'
|
||||
elif score >= 40:
|
||||
return 'LOW'
|
||||
else:
|
||||
return 'INFORMATIONAL'
|
||||
|
||||
def _extract_log_source(self, template_name: str) -> str:
|
||||
"""Extract log source from template name"""
|
||||
template_lower = template_name.lower()
|
||||
|
||||
if 'process' in template_lower or 'execution' in template_lower:
|
||||
return 'process_creation'
|
||||
elif 'network' in template_lower:
|
||||
return 'network_connection'
|
||||
elif 'file' in template_lower:
|
||||
return 'file_event'
|
||||
elif 'powershell' in template_lower:
|
||||
return 'powershell'
|
||||
elif 'registry' in template_lower:
|
||||
return 'registry_event'
|
||||
else:
|
||||
return 'generic'
|
371
backend/main.py
371
backend/main.py
|
@ -1,7 +1,7 @@
|
|||
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
from sqlalchemy import create_engine, Column, String, Text, DECIMAL, TIMESTAMP, Boolean, ARRAY
|
||||
from sqlalchemy import create_engine, Column, String, Text, DECIMAL, TIMESTAMP, Boolean, ARRAY, Integer, JSON, func
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker, Session
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
|
@ -19,6 +19,16 @@ import base64
|
|||
from github import Github
|
||||
from urllib.parse import urlparse
|
||||
import hashlib
|
||||
import logging
|
||||
import threading
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Global job tracking
|
||||
running_jobs = {}
|
||||
job_cancellation_flags = {}
|
||||
|
||||
# Database setup
|
||||
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://cve_user:cve_password@localhost:5432/cve_sigma_db")
|
||||
|
@ -39,6 +49,13 @@ class CVE(Base):
|
|||
modified_date = Column(TIMESTAMP)
|
||||
affected_products = Column(ARRAY(String))
|
||||
reference_urls = Column(ARRAY(String))
|
||||
# Bulk processing fields
|
||||
data_source = Column(String(20), default='nvd_api') # 'nvd_api', 'nvd_bulk', 'manual'
|
||||
nvd_json_version = Column(String(10), default='2.0')
|
||||
bulk_processed = Column(Boolean, default=False)
|
||||
# nomi-sec PoC fields
|
||||
poc_count = Column(Integer, default=0)
|
||||
poc_data = Column(JSON) # Store nomi-sec PoC metadata
|
||||
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||
updated_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||
|
||||
|
@ -56,6 +73,10 @@ class SigmaRule(Base):
|
|||
exploit_based = Column(Boolean, default=False)
|
||||
github_repos = Column(ARRAY(String))
|
||||
exploit_indicators = Column(Text) # JSON string of extracted indicators
|
||||
# Enhanced fields for new data sources
|
||||
poc_source = Column(String(20), default='github_search') # 'github_search', 'nomi_sec', 'manual'
|
||||
poc_quality_score = Column(Integer, default=0) # Based on star count, activity, etc.
|
||||
nomi_sec_data = Column(JSON) # Store nomi-sec PoC metadata
|
||||
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||
updated_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||
|
||||
|
@ -69,6 +90,23 @@ class RuleTemplate(Base):
|
|||
description = Column(Text)
|
||||
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||
|
||||
class BulkProcessingJob(Base):
|
||||
__tablename__ = "bulk_processing_jobs"
|
||||
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
job_type = Column(String(50), nullable=False) # 'nvd_bulk_seed', 'nomi_sec_sync', 'incremental_update'
|
||||
status = Column(String(20), default='pending') # 'pending', 'running', 'completed', 'failed', 'cancelled'
|
||||
year = Column(Integer) # For year-based processing
|
||||
total_items = Column(Integer, default=0)
|
||||
processed_items = Column(Integer, default=0)
|
||||
failed_items = Column(Integer, default=0)
|
||||
error_message = Column(Text)
|
||||
job_metadata = Column(JSON) # Additional job-specific data
|
||||
started_at = Column(TIMESTAMP)
|
||||
completed_at = Column(TIMESTAMP)
|
||||
cancelled_at = Column(TIMESTAMP)
|
||||
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||
|
||||
# Pydantic models
|
||||
class CVEResponse(BaseModel):
|
||||
id: str
|
||||
|
@ -941,12 +979,341 @@ async def get_stats(db: Session = Depends(get_db)):
|
|||
total_rules = db.query(SigmaRule).count()
|
||||
recent_cves = db.query(CVE).filter(CVE.published_date >= datetime.utcnow() - timedelta(days=7)).count()
|
||||
|
||||
# Enhanced stats with bulk processing info
|
||||
bulk_processed_cves = db.query(CVE).filter(CVE.bulk_processed == True).count()
|
||||
cves_with_pocs = db.query(CVE).filter(CVE.poc_count > 0).count()
|
||||
nomi_sec_rules = db.query(SigmaRule).filter(SigmaRule.poc_source == 'nomi_sec').count()
|
||||
|
||||
return {
|
||||
"total_cves": total_cves,
|
||||
"total_sigma_rules": total_rules,
|
||||
"recent_cves_7_days": recent_cves
|
||||
"recent_cves_7_days": recent_cves,
|
||||
"bulk_processed_cves": bulk_processed_cves,
|
||||
"cves_with_pocs": cves_with_pocs,
|
||||
"nomi_sec_rules": nomi_sec_rules,
|
||||
"poc_coverage": (cves_with_pocs / total_cves * 100) if total_cves > 0 else 0,
|
||||
"nomi_sec_coverage": (nomi_sec_rules / total_rules * 100) if total_rules > 0 else 0
|
||||
}
|
||||
|
||||
# New bulk processing endpoints
|
||||
@app.post("/api/bulk-seed")
|
||||
async def start_bulk_seed(background_tasks: BackgroundTasks,
|
||||
start_year: int = 2002,
|
||||
end_year: Optional[int] = None,
|
||||
skip_nvd: bool = False,
|
||||
skip_nomi_sec: bool = False,
|
||||
db: Session = Depends(get_db)):
|
||||
"""Start bulk seeding process"""
|
||||
|
||||
async def bulk_seed_task():
|
||||
try:
|
||||
from bulk_seeder import BulkSeeder
|
||||
seeder = BulkSeeder(db)
|
||||
result = await seeder.full_bulk_seed(
|
||||
start_year=start_year,
|
||||
end_year=end_year,
|
||||
skip_nvd=skip_nvd,
|
||||
skip_nomi_sec=skip_nomi_sec
|
||||
)
|
||||
logger.info(f"Bulk seed completed: {result}")
|
||||
except Exception as e:
|
||||
logger.error(f"Bulk seed failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
background_tasks.add_task(bulk_seed_task)
|
||||
|
||||
return {
|
||||
"message": "Bulk seeding process started",
|
||||
"status": "started",
|
||||
"start_year": start_year,
|
||||
"end_year": end_year or datetime.now().year,
|
||||
"skip_nvd": skip_nvd,
|
||||
"skip_nomi_sec": skip_nomi_sec
|
||||
}
|
||||
|
||||
@app.post("/api/incremental-update")
|
||||
async def start_incremental_update(background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
|
||||
"""Start incremental update process"""
|
||||
|
||||
async def incremental_update_task():
|
||||
try:
|
||||
from bulk_seeder import BulkSeeder
|
||||
seeder = BulkSeeder(db)
|
||||
result = await seeder.incremental_update()
|
||||
logger.info(f"Incremental update completed: {result}")
|
||||
except Exception as e:
|
||||
logger.error(f"Incremental update failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
background_tasks.add_task(incremental_update_task)
|
||||
|
||||
return {
|
||||
"message": "Incremental update process started",
|
||||
"status": "started"
|
||||
}
|
||||
|
||||
@app.post("/api/sync-nomi-sec")
|
||||
async def sync_nomi_sec(background_tasks: BackgroundTasks,
|
||||
cve_id: Optional[str] = None,
|
||||
batch_size: int = 50,
|
||||
db: Session = Depends(get_db)):
|
||||
"""Synchronize nomi-sec PoC data"""
|
||||
|
||||
# Create job record
|
||||
job = BulkProcessingJob(
|
||||
job_type='nomi_sec_sync',
|
||||
status='pending',
|
||||
job_metadata={
|
||||
'cve_id': cve_id,
|
||||
'batch_size': batch_size
|
||||
}
|
||||
)
|
||||
db.add(job)
|
||||
db.commit()
|
||||
db.refresh(job)
|
||||
|
||||
job_id = str(job.id)
|
||||
running_jobs[job_id] = job
|
||||
job_cancellation_flags[job_id] = False
|
||||
|
||||
async def sync_task():
|
||||
try:
|
||||
job.status = 'running'
|
||||
job.started_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
from nomi_sec_client import NomiSecClient
|
||||
client = NomiSecClient(db)
|
||||
|
||||
if cve_id:
|
||||
# Sync specific CVE
|
||||
if job_cancellation_flags.get(job_id, False):
|
||||
logger.info(f"Job {job_id} cancelled before starting")
|
||||
return
|
||||
|
||||
result = await client.sync_cve_pocs(cve_id)
|
||||
logger.info(f"Nomi-sec sync for {cve_id}: {result}")
|
||||
else:
|
||||
# Sync all CVEs with cancellation support
|
||||
result = await client.bulk_sync_all_cves(
|
||||
batch_size=batch_size,
|
||||
cancellation_flag=lambda: job_cancellation_flags.get(job_id, False)
|
||||
)
|
||||
logger.info(f"Nomi-sec bulk sync completed: {result}")
|
||||
|
||||
# Update job status if not cancelled
|
||||
if not job_cancellation_flags.get(job_id, False):
|
||||
job.status = 'completed'
|
||||
job.completed_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
except Exception as e:
|
||||
if not job_cancellation_flags.get(job_id, False):
|
||||
job.status = 'failed'
|
||||
job.error_message = str(e)
|
||||
job.completed_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
logger.error(f"Nomi-sec sync failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
# Clean up tracking
|
||||
running_jobs.pop(job_id, None)
|
||||
job_cancellation_flags.pop(job_id, None)
|
||||
|
||||
background_tasks.add_task(sync_task)
|
||||
|
||||
return {
|
||||
"message": f"Nomi-sec sync started" + (f" for {cve_id}" if cve_id else " for all CVEs"),
|
||||
"status": "started",
|
||||
"job_id": job_id,
|
||||
"cve_id": cve_id,
|
||||
"batch_size": batch_size
|
||||
}
|
||||
|
||||
@app.get("/api/bulk-jobs")
|
||||
async def get_bulk_jobs(limit: int = 10, db: Session = Depends(get_db)):
|
||||
"""Get bulk processing job status"""
|
||||
|
||||
jobs = db.query(BulkProcessingJob).order_by(
|
||||
BulkProcessingJob.created_at.desc()
|
||||
).limit(limit).all()
|
||||
|
||||
result = []
|
||||
for job in jobs:
|
||||
job_dict = {
|
||||
'id': str(job.id),
|
||||
'job_type': job.job_type,
|
||||
'status': job.status,
|
||||
'year': job.year,
|
||||
'total_items': job.total_items,
|
||||
'processed_items': job.processed_items,
|
||||
'failed_items': job.failed_items,
|
||||
'error_message': job.error_message,
|
||||
'metadata': job.job_metadata,
|
||||
'started_at': job.started_at,
|
||||
'completed_at': job.completed_at,
|
||||
'created_at': job.created_at
|
||||
}
|
||||
result.append(job_dict)
|
||||
|
||||
return result
|
||||
|
||||
@app.get("/api/bulk-status")
|
||||
async def get_bulk_status(db: Session = Depends(get_db)):
|
||||
"""Get comprehensive bulk processing status"""
|
||||
|
||||
try:
|
||||
from bulk_seeder import BulkSeeder
|
||||
seeder = BulkSeeder(db)
|
||||
status = await seeder.get_seeding_status()
|
||||
return status
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting bulk status: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
@app.get("/api/poc-stats")
|
||||
async def get_poc_stats(db: Session = Depends(get_db)):
|
||||
"""Get PoC-related statistics"""
|
||||
|
||||
try:
|
||||
from nomi_sec_client import NomiSecClient
|
||||
client = NomiSecClient(db)
|
||||
stats = await client.get_sync_status()
|
||||
|
||||
# Additional PoC statistics
|
||||
high_quality_cves = db.query(CVE).filter(
|
||||
CVE.poc_count > 0,
|
||||
func.json_extract_path_text(CVE.poc_data, '0', 'quality_analysis', 'quality_score').cast(Integer) > 60
|
||||
).count()
|
||||
|
||||
stats.update({
|
||||
'high_quality_cves': high_quality_cves,
|
||||
'avg_poc_count': db.query(func.avg(CVE.poc_count)).filter(CVE.poc_count > 0).scalar() or 0
|
||||
})
|
||||
|
||||
return stats
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting PoC stats: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
@app.post("/api/regenerate-rules")
|
||||
async def regenerate_sigma_rules(background_tasks: BackgroundTasks,
|
||||
force: bool = False,
|
||||
db: Session = Depends(get_db)):
|
||||
"""Regenerate SIGMA rules using enhanced nomi-sec data"""
|
||||
|
||||
async def regenerate_task():
|
||||
try:
|
||||
from enhanced_sigma_generator import EnhancedSigmaGenerator
|
||||
generator = EnhancedSigmaGenerator(db)
|
||||
|
||||
# Get CVEs with PoC data
|
||||
cves_with_pocs = db.query(CVE).filter(CVE.poc_count > 0).all()
|
||||
|
||||
rules_generated = 0
|
||||
rules_updated = 0
|
||||
|
||||
for cve in cves_with_pocs:
|
||||
# Check if we should regenerate
|
||||
existing_rule = db.query(SigmaRule).filter(
|
||||
SigmaRule.cve_id == cve.cve_id
|
||||
).first()
|
||||
|
||||
if existing_rule and existing_rule.poc_source == 'nomi_sec' and not force:
|
||||
continue
|
||||
|
||||
# Generate enhanced rule
|
||||
result = await generator.generate_enhanced_rule(cve)
|
||||
|
||||
if result['success']:
|
||||
if existing_rule:
|
||||
rules_updated += 1
|
||||
else:
|
||||
rules_generated += 1
|
||||
|
||||
logger.info(f"Rule regeneration completed: {rules_generated} new, {rules_updated} updated")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Rule regeneration failed: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
background_tasks.add_task(regenerate_task)
|
||||
|
||||
return {
|
||||
"message": "SIGMA rule regeneration started",
|
||||
"status": "started",
|
||||
"force": force
|
||||
}
|
||||
|
||||
@app.post("/api/cancel-job/{job_id}")
|
||||
async def cancel_job(job_id: str, db: Session = Depends(get_db)):
|
||||
"""Cancel a running job"""
|
||||
try:
|
||||
# Find the job in the database
|
||||
job = db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job_id).first()
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
if job.status not in ['pending', 'running']:
|
||||
raise HTTPException(status_code=400, detail=f"Cannot cancel job with status: {job.status}")
|
||||
|
||||
# Set cancellation flag
|
||||
job_cancellation_flags[job_id] = True
|
||||
|
||||
# Update job status
|
||||
job.status = 'cancelled'
|
||||
job.cancelled_at = datetime.utcnow()
|
||||
job.error_message = "Job cancelled by user"
|
||||
|
||||
db.commit()
|
||||
|
||||
logger.info(f"Job {job_id} cancellation requested")
|
||||
|
||||
return {
|
||||
"message": f"Job {job_id} cancellation requested",
|
||||
"status": "cancelled",
|
||||
"job_id": job_id
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error cancelling job {job_id}: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/api/running-jobs")
|
||||
async def get_running_jobs(db: Session = Depends(get_db)):
|
||||
"""Get all currently running jobs"""
|
||||
try:
|
||||
jobs = db.query(BulkProcessingJob).filter(
|
||||
BulkProcessingJob.status.in_(['pending', 'running'])
|
||||
).order_by(BulkProcessingJob.created_at.desc()).all()
|
||||
|
||||
result = []
|
||||
for job in jobs:
|
||||
result.append({
|
||||
'id': str(job.id),
|
||||
'job_type': job.job_type,
|
||||
'status': job.status,
|
||||
'year': job.year,
|
||||
'total_items': job.total_items,
|
||||
'processed_items': job.processed_items,
|
||||
'failed_items': job.failed_items,
|
||||
'error_message': job.error_message,
|
||||
'started_at': job.started_at,
|
||||
'created_at': job.created_at,
|
||||
'can_cancel': job.status in ['pending', 'running']
|
||||
})
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting running jobs: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
|
|
477
backend/nomi_sec_client.py
Normal file
477
backend/nomi_sec_client.py
Normal file
|
@ -0,0 +1,477 @@
|
|||
"""
|
||||
Nomi-sec PoC-in-GitHub Integration Client
|
||||
Interfaces with the nomi-sec PoC-in-GitHub API for curated exploit data
|
||||
"""
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_, or_
|
||||
import time
|
||||
import re
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class NomiSecClient:
|
||||
"""Client for interacting with nomi-sec PoC-in-GitHub API"""
|
||||
|
||||
def __init__(self, db_session: Session):
|
||||
self.db_session = db_session
|
||||
self.base_url = "https://poc-in-github.motikan2010.net/api/v1"
|
||||
self.rss_url = "https://poc-in-github.motikan2010.net/rss"
|
||||
|
||||
# Rate limiting
|
||||
self.rate_limit_delay = 1.0 # 1 second between requests
|
||||
self.last_request_time = 0
|
||||
|
||||
# Cache for recently fetched data
|
||||
self.cache = {}
|
||||
self.cache_ttl = 300 # 5 minutes
|
||||
|
||||
async def _make_request(self, session: aiohttp.ClientSession,
|
||||
url: str, params: dict = None) -> Optional[dict]:
|
||||
"""Make a rate-limited request to the API"""
|
||||
try:
|
||||
# Rate limiting
|
||||
current_time = time.time()
|
||||
time_since_last = current_time - self.last_request_time
|
||||
if time_since_last < self.rate_limit_delay:
|
||||
await asyncio.sleep(self.rate_limit_delay - time_since_last)
|
||||
|
||||
async with session.get(url, params=params, timeout=30) as response:
|
||||
self.last_request_time = time.time()
|
||||
|
||||
if response.status == 200:
|
||||
return await response.json()
|
||||
else:
|
||||
logger.warning(f"API request failed: {response.status} for {url}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error making request to {url}: {e}")
|
||||
return None
|
||||
|
||||
async def get_pocs_for_cve(self, cve_id: str) -> List[dict]:
|
||||
"""Get all PoC repositories for a specific CVE"""
|
||||
cache_key = f"cve_{cve_id}"
|
||||
|
||||
# Check cache
|
||||
if cache_key in self.cache:
|
||||
cached_data, timestamp = self.cache[cache_key]
|
||||
if time.time() - timestamp < self.cache_ttl:
|
||||
return cached_data
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
params = {"cve_id": cve_id}
|
||||
data = await self._make_request(session, self.base_url, params)
|
||||
|
||||
if data and "pocs" in data:
|
||||
pocs = data["pocs"]
|
||||
# Cache the result
|
||||
self.cache[cache_key] = (pocs, time.time())
|
||||
logger.info(f"Found {len(pocs)} PoCs for {cve_id}")
|
||||
return pocs
|
||||
else:
|
||||
logger.info(f"No PoCs found for {cve_id}")
|
||||
return []
|
||||
|
||||
async def get_recent_pocs(self, limit: int = 100) -> List[dict]:
|
||||
"""Get recent PoCs from the API"""
|
||||
async with aiohttp.ClientSession() as session:
|
||||
params = {"limit": limit, "sort": "created_at"}
|
||||
data = await self._make_request(session, self.base_url, params)
|
||||
|
||||
if data and "pocs" in data:
|
||||
return data["pocs"]
|
||||
else:
|
||||
return []
|
||||
|
||||
async def get_high_quality_pocs(self, min_stars: int = 5, limit: int = 100) -> List[dict]:
|
||||
"""Get high-quality PoCs sorted by star count"""
|
||||
async with aiohttp.ClientSession() as session:
|
||||
params = {"limit": limit, "sort": "stargazers_count"}
|
||||
data = await self._make_request(session, self.base_url, params)
|
||||
|
||||
if data and "pocs" in data:
|
||||
# Filter by star count
|
||||
filtered_pocs = [
|
||||
poc for poc in data["pocs"]
|
||||
if int(poc.get("stargazers_count", "0")) >= min_stars
|
||||
]
|
||||
return filtered_pocs
|
||||
else:
|
||||
return []
|
||||
|
||||
async def search_pocs(self, query: str, limit: int = 50) -> List[dict]:
|
||||
"""Search for PoCs using a query string"""
|
||||
async with aiohttp.ClientSession() as session:
|
||||
params = {"limit": limit, "q": query}
|
||||
data = await self._make_request(session, self.base_url, params)
|
||||
|
||||
if data and "pocs" in data:
|
||||
return data["pocs"]
|
||||
else:
|
||||
return []
|
||||
|
||||
def analyze_poc_quality(self, poc: dict) -> dict:
|
||||
"""Analyze the quality of a PoC repository"""
|
||||
quality_score = 0
|
||||
factors = {}
|
||||
|
||||
# Star count factor (0-40 points)
|
||||
stars = int(poc.get("stargazers_count", "0"))
|
||||
star_score = min(stars * 2, 40) # 2 points per star, max 40
|
||||
quality_score += star_score
|
||||
factors["star_score"] = star_score
|
||||
|
||||
# Recency factor (0-20 points)
|
||||
try:
|
||||
updated_at = datetime.fromisoformat(poc.get("updated_at", "").replace('Z', '+00:00'))
|
||||
days_old = (datetime.now(updated_at.tzinfo) - updated_at).days
|
||||
recency_score = max(20 - (days_old // 30), 0) # Lose 1 point per month
|
||||
quality_score += recency_score
|
||||
factors["recency_score"] = recency_score
|
||||
except:
|
||||
factors["recency_score"] = 0
|
||||
|
||||
# Description quality factor (0-15 points)
|
||||
description = poc.get("description", "")
|
||||
desc_score = 0
|
||||
if description:
|
||||
desc_score = min(len(description) // 10, 15) # 1 point per 10 chars, max 15
|
||||
quality_score += desc_score
|
||||
factors["description_score"] = desc_score
|
||||
|
||||
# Vulnerability description factor (0-15 points)
|
||||
vuln_desc = poc.get("vuln_description", "")
|
||||
vuln_score = 0
|
||||
if vuln_desc:
|
||||
vuln_score = min(len(vuln_desc) // 20, 15) # 1 point per 20 chars, max 15
|
||||
quality_score += vuln_score
|
||||
factors["vuln_description_score"] = vuln_score
|
||||
|
||||
# Repository name relevance factor (0-10 points)
|
||||
repo_name = poc.get("name", "").lower()
|
||||
cve_id = poc.get("cve_id", "").lower()
|
||||
name_score = 0
|
||||
if cve_id and cve_id.replace("-", "") in repo_name.replace("-", ""):
|
||||
name_score = 10
|
||||
elif any(keyword in repo_name for keyword in ["exploit", "poc", "cve", "vuln"]):
|
||||
name_score = 5
|
||||
quality_score += name_score
|
||||
factors["name_relevance_score"] = name_score
|
||||
|
||||
return {
|
||||
"quality_score": quality_score,
|
||||
"factors": factors,
|
||||
"quality_tier": self._get_quality_tier(quality_score)
|
||||
}
|
||||
|
||||
def _get_quality_tier(self, score: int) -> str:
|
||||
"""Get quality tier based on score"""
|
||||
if score >= 80:
|
||||
return "excellent"
|
||||
elif score >= 60:
|
||||
return "good"
|
||||
elif score >= 40:
|
||||
return "fair"
|
||||
elif score >= 20:
|
||||
return "poor"
|
||||
else:
|
||||
return "very_poor"
|
||||
|
||||
def extract_exploit_indicators(self, poc: dict) -> dict:
|
||||
"""Extract exploit indicators from PoC metadata"""
|
||||
indicators = {
|
||||
"processes": [],
|
||||
"files": [],
|
||||
"network": [],
|
||||
"registry": [],
|
||||
"commands": [],
|
||||
"urls": [],
|
||||
"techniques": []
|
||||
}
|
||||
|
||||
# Extract from description and vulnerability description
|
||||
text_sources = [
|
||||
poc.get("description", ""),
|
||||
poc.get("vuln_description", ""),
|
||||
poc.get("name", "")
|
||||
]
|
||||
|
||||
full_text = " ".join(text_sources).lower()
|
||||
|
||||
# Process patterns
|
||||
process_patterns = [
|
||||
r'\b(cmd\.exe|powershell\.exe|bash|sh|python\.exe|java\.exe)\b',
|
||||
r'\b(createprocess|shellexecute|system)\b',
|
||||
r'\b(reverse.?shell|bind.?shell)\b'
|
||||
]
|
||||
|
||||
for pattern in process_patterns:
|
||||
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||
indicators["processes"].extend(matches)
|
||||
|
||||
# File patterns
|
||||
file_patterns = [
|
||||
r'\b([a-zA-Z]:\\[^\\]+\\[^\\]+\.[a-zA-Z0-9]+)\b', # Windows paths
|
||||
r'\b(/[^/\s]+/[^/\s]+\.[a-zA-Z0-9]+)\b', # Unix paths
|
||||
r'\b(\w+\.(exe|dll|bat|ps1|py|sh|jar))\b' # Common executable files
|
||||
]
|
||||
|
||||
for pattern in file_patterns:
|
||||
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||
if isinstance(matches[0], tuple) if matches else False:
|
||||
indicators["files"].extend([m[0] for m in matches])
|
||||
else:
|
||||
indicators["files"].extend(matches)
|
||||
|
||||
# Network patterns
|
||||
network_patterns = [
|
||||
r'\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b', # IP addresses
|
||||
r'\b((?:\d{1,5})|(?:0x[a-fA-F0-9]{1,4}))\b', # Ports
|
||||
r'\b(http[s]?://[^\s]+)\b' # URLs
|
||||
]
|
||||
|
||||
for pattern in network_patterns:
|
||||
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||
if pattern.startswith(r'\b(http'):
|
||||
indicators["urls"].extend(matches)
|
||||
else:
|
||||
indicators["network"].extend(matches)
|
||||
|
||||
# Command patterns
|
||||
command_patterns = [
|
||||
r'\b(curl|wget|nc|netcat|ncat)\b',
|
||||
r'\b(whoami|id|uname|systeminfo)\b',
|
||||
r'\b(cat|type|more|less)\b'
|
||||
]
|
||||
|
||||
for pattern in command_patterns:
|
||||
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||
indicators["commands"].extend(matches)
|
||||
|
||||
# Clean up and deduplicate
|
||||
for key in indicators:
|
||||
indicators[key] = list(set(indicators[key]))
|
||||
|
||||
return indicators
|
||||
|
||||
async def sync_cve_pocs(self, cve_id: str) -> dict:
|
||||
"""Synchronize PoC data for a specific CVE"""
|
||||
from main import CVE, SigmaRule
|
||||
|
||||
# Get existing CVE
|
||||
cve = self.db_session.query(CVE).filter(CVE.cve_id == cve_id).first()
|
||||
if not cve:
|
||||
logger.warning(f"CVE {cve_id} not found in database")
|
||||
return {"error": "CVE not found"}
|
||||
|
||||
# Fetch PoCs from nomi-sec API
|
||||
pocs = await self.get_pocs_for_cve(cve_id)
|
||||
|
||||
if not pocs:
|
||||
logger.info(f"No PoCs found for {cve_id}")
|
||||
return {"cve_id": cve_id, "pocs_found": 0}
|
||||
|
||||
# Analyze and store PoC data
|
||||
poc_data = []
|
||||
github_repos = []
|
||||
total_quality_score = 0
|
||||
|
||||
for poc in pocs:
|
||||
quality_analysis = self.analyze_poc_quality(poc)
|
||||
exploit_indicators = self.extract_exploit_indicators(poc)
|
||||
|
||||
poc_entry = {
|
||||
"id": poc.get("id"),
|
||||
"name": poc.get("name"),
|
||||
"owner": poc.get("owner"),
|
||||
"full_name": poc.get("full_name"),
|
||||
"html_url": poc.get("html_url"),
|
||||
"description": poc.get("description"),
|
||||
"stargazers_count": int(poc.get("stargazers_count", "0")),
|
||||
"created_at": poc.get("created_at"),
|
||||
"updated_at": poc.get("updated_at"),
|
||||
"quality_analysis": quality_analysis,
|
||||
"exploit_indicators": exploit_indicators
|
||||
}
|
||||
|
||||
poc_data.append(poc_entry)
|
||||
github_repos.append(poc.get("html_url", ""))
|
||||
total_quality_score += quality_analysis["quality_score"]
|
||||
|
||||
# Update CVE with PoC data
|
||||
cve.poc_count = len(pocs)
|
||||
cve.poc_data = poc_data
|
||||
cve.updated_at = datetime.utcnow()
|
||||
|
||||
# Update or create SIGMA rule with enhanced PoC data
|
||||
sigma_rule = self.db_session.query(SigmaRule).filter(
|
||||
SigmaRule.cve_id == cve_id
|
||||
).first()
|
||||
|
||||
if sigma_rule:
|
||||
sigma_rule.poc_source = 'nomi_sec'
|
||||
sigma_rule.poc_quality_score = total_quality_score // len(pocs) if pocs else 0
|
||||
sigma_rule.nomi_sec_data = {
|
||||
"total_pocs": len(pocs),
|
||||
"average_quality": total_quality_score // len(pocs) if pocs else 0,
|
||||
"best_poc": max(poc_data, key=lambda x: x["quality_analysis"]["quality_score"]) if poc_data else None,
|
||||
"total_stars": sum(p["stargazers_count"] for p in poc_data)
|
||||
}
|
||||
sigma_rule.github_repos = github_repos
|
||||
sigma_rule.updated_at = datetime.utcnow()
|
||||
|
||||
# Extract best exploit indicators
|
||||
best_indicators = {}
|
||||
for poc in poc_data:
|
||||
for key, values in poc["exploit_indicators"].items():
|
||||
if key not in best_indicators:
|
||||
best_indicators[key] = []
|
||||
best_indicators[key].extend(values)
|
||||
|
||||
# Deduplicate and store
|
||||
for key in best_indicators:
|
||||
best_indicators[key] = list(set(best_indicators[key]))
|
||||
|
||||
sigma_rule.exploit_indicators = json.dumps(best_indicators)
|
||||
|
||||
self.db_session.commit()
|
||||
|
||||
logger.info(f"Synchronized {len(pocs)} PoCs for {cve_id}")
|
||||
|
||||
return {
|
||||
"cve_id": cve_id,
|
||||
"pocs_found": len(pocs),
|
||||
"total_quality_score": total_quality_score,
|
||||
"average_quality": total_quality_score // len(pocs) if pocs else 0,
|
||||
"github_repos": github_repos
|
||||
}
|
||||
|
||||
async def bulk_sync_all_cves(self, batch_size: int = 100, cancellation_flag: Optional[callable] = None) -> dict:
|
||||
"""Synchronize PoC data for all CVEs in database"""
|
||||
from main import CVE, BulkProcessingJob
|
||||
|
||||
# Create bulk processing job
|
||||
job = BulkProcessingJob(
|
||||
job_type='nomi_sec_sync',
|
||||
status='running',
|
||||
started_at=datetime.utcnow(),
|
||||
job_metadata={'batch_size': batch_size}
|
||||
)
|
||||
self.db_session.add(job)
|
||||
self.db_session.commit()
|
||||
|
||||
total_processed = 0
|
||||
total_found = 0
|
||||
results = []
|
||||
|
||||
try:
|
||||
# Get all CVEs from database
|
||||
cves = self.db_session.query(CVE).all()
|
||||
job.total_items = len(cves)
|
||||
self.db_session.commit()
|
||||
|
||||
# Process in batches
|
||||
for i in range(0, len(cves), batch_size):
|
||||
# Check for cancellation before each batch
|
||||
if cancellation_flag and cancellation_flag():
|
||||
logger.info("Bulk sync cancelled by user")
|
||||
job.status = 'cancelled'
|
||||
job.cancelled_at = datetime.utcnow()
|
||||
job.error_message = "Job cancelled by user"
|
||||
break
|
||||
|
||||
batch = cves[i:i + batch_size]
|
||||
|
||||
for cve in batch:
|
||||
# Check for cancellation before each CVE
|
||||
if cancellation_flag and cancellation_flag():
|
||||
logger.info("Bulk sync cancelled by user")
|
||||
job.status = 'cancelled'
|
||||
job.cancelled_at = datetime.utcnow()
|
||||
job.error_message = "Job cancelled by user"
|
||||
break
|
||||
|
||||
try:
|
||||
result = await self.sync_cve_pocs(cve.cve_id)
|
||||
total_processed += 1
|
||||
|
||||
if result.get("pocs_found", 0) > 0:
|
||||
total_found += result["pocs_found"]
|
||||
results.append(result)
|
||||
|
||||
job.processed_items += 1
|
||||
|
||||
# Small delay to avoid overwhelming the API
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing PoCs for {cve.cve_id}: {e}")
|
||||
job.failed_items += 1
|
||||
|
||||
# Break out of outer loop if cancelled
|
||||
if job.status == 'cancelled':
|
||||
break
|
||||
|
||||
# Commit after each batch
|
||||
self.db_session.commit()
|
||||
logger.info(f"Processed batch {i//batch_size + 1}/{(len(cves) + batch_size - 1)//batch_size}")
|
||||
|
||||
# Update job status (only if not cancelled)
|
||||
if job.status != 'cancelled':
|
||||
job.status = 'completed'
|
||||
job.completed_at = datetime.utcnow()
|
||||
|
||||
job.job_metadata.update({
|
||||
'total_processed': total_processed,
|
||||
'total_pocs_found': total_found,
|
||||
'cves_with_pocs': len(results)
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
job.status = 'failed'
|
||||
job.error_message = str(e)
|
||||
job.completed_at = datetime.utcnow()
|
||||
logger.error(f"Bulk PoC sync job failed: {e}")
|
||||
|
||||
finally:
|
||||
self.db_session.commit()
|
||||
|
||||
return {
|
||||
'job_id': str(job.id),
|
||||
'status': job.status,
|
||||
'total_processed': total_processed,
|
||||
'total_pocs_found': total_found,
|
||||
'cves_with_pocs': len(results)
|
||||
}
|
||||
|
||||
async def get_sync_status(self) -> dict:
|
||||
"""Get synchronization status"""
|
||||
from main import CVE, SigmaRule
|
||||
|
||||
# Count CVEs with PoC data
|
||||
total_cves = self.db_session.query(CVE).count()
|
||||
cves_with_pocs = self.db_session.query(CVE).filter(CVE.poc_count > 0).count()
|
||||
|
||||
# Count SIGMA rules with nomi-sec data
|
||||
total_rules = self.db_session.query(SigmaRule).count()
|
||||
rules_with_nomi_sec = self.db_session.query(SigmaRule).filter(
|
||||
SigmaRule.poc_source == 'nomi_sec'
|
||||
).count()
|
||||
|
||||
return {
|
||||
'total_cves': total_cves,
|
||||
'cves_with_pocs': cves_with_pocs,
|
||||
'poc_coverage': (cves_with_pocs / total_cves * 100) if total_cves > 0 else 0,
|
||||
'total_rules': total_rules,
|
||||
'rules_with_nomi_sec': rules_with_nomi_sec,
|
||||
'nomi_sec_coverage': (rules_with_nomi_sec / total_rules * 100) if total_rules > 0 else 0
|
||||
}
|
483
backend/nvd_bulk_processor.py
Normal file
483
backend/nvd_bulk_processor.py
Normal file
|
@ -0,0 +1,483 @@
|
|||
"""
|
||||
NVD JSON Dataset Bulk Processor
|
||||
Downloads and processes NVD JSON data feeds for comprehensive CVE seeding
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import gzip
|
||||
import zipfile
|
||||
import os
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_, or_
|
||||
import asyncio
|
||||
import aiohttp
|
||||
from pathlib import Path
|
||||
import hashlib
|
||||
import time
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class NVDBulkProcessor:
|
||||
"""Handles bulk downloading and processing of NVD JSON data feeds"""
|
||||
|
||||
def __init__(self, db_session: Session, data_dir: str = "./nvd_data"):
|
||||
self.db_session = db_session
|
||||
self.data_dir = Path(data_dir)
|
||||
self.data_dir.mkdir(exist_ok=True)
|
||||
self.api_key = os.getenv("NVD_API_KEY")
|
||||
|
||||
# NVD JSON 2.0 feed URLs
|
||||
self.base_url = "https://nvd.nist.gov/feeds/json/cve/1.1"
|
||||
self.feed_urls = {
|
||||
"modified": f"{self.base_url}/nvdcve-1.1-modified.json.gz",
|
||||
"recent": f"{self.base_url}/nvdcve-1.1-recent.json.gz"
|
||||
}
|
||||
|
||||
# Rate limiting
|
||||
self.rate_limit_delay = 0.6 # 600ms between requests
|
||||
self.last_request_time = 0
|
||||
|
||||
def get_year_feed_url(self, year: int) -> str:
|
||||
"""Get the URL for a specific year's CVE feed"""
|
||||
return f"{self.base_url}/nvdcve-1.1-{year}.json.gz"
|
||||
|
||||
def get_meta_url(self, feed_url: str) -> str:
|
||||
"""Get the metadata URL for a feed"""
|
||||
return feed_url.replace(".json.gz", ".meta")
|
||||
|
||||
async def download_file(self, session: aiohttp.ClientSession, url: str,
|
||||
destination: Path, check_meta: bool = True) -> bool:
|
||||
"""Download a file with metadata checking"""
|
||||
try:
|
||||
# Check if we should download based on metadata
|
||||
if check_meta:
|
||||
meta_url = self.get_meta_url(url)
|
||||
should_download = await self._should_download_file(session, meta_url, destination)
|
||||
if not should_download:
|
||||
logger.info(f"Skipping {url} - file is up to date")
|
||||
return True
|
||||
|
||||
# Rate limiting
|
||||
current_time = time.time()
|
||||
time_since_last = current_time - self.last_request_time
|
||||
if time_since_last < self.rate_limit_delay:
|
||||
await asyncio.sleep(self.rate_limit_delay - time_since_last)
|
||||
|
||||
# Download the file
|
||||
headers = {}
|
||||
if self.api_key:
|
||||
headers["apiKey"] = self.api_key
|
||||
|
||||
async with session.get(url, headers=headers, timeout=30) as response:
|
||||
if response.status == 200:
|
||||
content = await response.read()
|
||||
destination.write_bytes(content)
|
||||
logger.info(f"Downloaded {url} -> {destination}")
|
||||
self.last_request_time = time.time()
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Failed to download {url}: HTTP {response.status}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading {url}: {e}")
|
||||
return False
|
||||
|
||||
async def _should_download_file(self, session: aiohttp.ClientSession,
|
||||
meta_url: str, destination: Path) -> bool:
|
||||
"""Check if file should be downloaded based on metadata"""
|
||||
try:
|
||||
# Download metadata
|
||||
async with session.get(meta_url, timeout=10) as response:
|
||||
if response.status != 200:
|
||||
return True # Download if we can't get metadata
|
||||
|
||||
meta_content = await response.text()
|
||||
|
||||
# Parse metadata
|
||||
meta_data = {}
|
||||
for line in meta_content.strip().split('\n'):
|
||||
if ':' in line:
|
||||
key, value = line.split(':', 1)
|
||||
meta_data[key.strip()] = value.strip()
|
||||
|
||||
# Check if local file exists and matches
|
||||
if destination.exists():
|
||||
local_size = destination.stat().st_size
|
||||
remote_size = int(meta_data.get('size', 0))
|
||||
remote_sha256 = meta_data.get('sha256', '')
|
||||
|
||||
if local_size == remote_size and remote_sha256:
|
||||
# Verify SHA256 if available
|
||||
local_sha256 = self._calculate_sha256(destination)
|
||||
if local_sha256 == remote_sha256:
|
||||
return False # File is up to date
|
||||
|
||||
return True # Download needed
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking metadata for {meta_url}: {e}")
|
||||
return True # Download if metadata check fails
|
||||
|
||||
def _calculate_sha256(self, file_path: Path) -> str:
|
||||
"""Calculate SHA256 hash of a file"""
|
||||
sha256_hash = hashlib.sha256()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
sha256_hash.update(chunk)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
async def download_all_feeds(self, start_year: int = 2002,
|
||||
end_year: Optional[int] = None) -> List[Path]:
|
||||
"""Download all NVD JSON feeds"""
|
||||
if end_year is None:
|
||||
end_year = datetime.now().year
|
||||
|
||||
downloaded_files = []
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Download year-based feeds
|
||||
for year in range(start_year, end_year + 1):
|
||||
url = self.get_year_feed_url(year)
|
||||
filename = f"nvdcve-1.1-{year}.json.gz"
|
||||
destination = self.data_dir / filename
|
||||
|
||||
if await self.download_file(session, url, destination):
|
||||
downloaded_files.append(destination)
|
||||
|
||||
# Download modified and recent feeds
|
||||
for feed_name, url in self.feed_urls.items():
|
||||
filename = f"nvdcve-1.1-{feed_name}.json.gz"
|
||||
destination = self.data_dir / filename
|
||||
|
||||
if await self.download_file(session, url, destination):
|
||||
downloaded_files.append(destination)
|
||||
|
||||
return downloaded_files
|
||||
|
||||
def extract_json_file(self, compressed_file: Path) -> Path:
|
||||
"""Extract JSON from compressed file"""
|
||||
json_file = compressed_file.with_suffix('.json')
|
||||
|
||||
try:
|
||||
if compressed_file.suffix == '.gz':
|
||||
with gzip.open(compressed_file, 'rt', encoding='utf-8') as f_in:
|
||||
with open(json_file, 'w', encoding='utf-8') as f_out:
|
||||
f_out.write(f_in.read())
|
||||
elif compressed_file.suffix == '.zip':
|
||||
with zipfile.ZipFile(compressed_file, 'r') as zip_ref:
|
||||
zip_ref.extractall(self.data_dir)
|
||||
else:
|
||||
# File is already uncompressed
|
||||
return compressed_file
|
||||
|
||||
logger.info(f"Extracted {compressed_file} -> {json_file}")
|
||||
return json_file
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting {compressed_file}: {e}")
|
||||
raise
|
||||
|
||||
def process_json_file(self, json_file: Path) -> Tuple[int, int]:
|
||||
"""Process a single JSON file and return (processed, failed) counts"""
|
||||
from main import CVE, BulkProcessingJob
|
||||
|
||||
processed_count = 0
|
||||
failed_count = 0
|
||||
|
||||
try:
|
||||
with open(json_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
cve_items = data.get('CVE_Items', [])
|
||||
logger.info(f"Processing {len(cve_items)} CVEs from {json_file}")
|
||||
|
||||
for cve_item in cve_items:
|
||||
try:
|
||||
cve_data = self._extract_cve_data(cve_item)
|
||||
if cve_data:
|
||||
self._store_cve_data(cve_data)
|
||||
processed_count += 1
|
||||
else:
|
||||
failed_count += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing CVE item: {e}")
|
||||
failed_count += 1
|
||||
|
||||
# Commit changes
|
||||
self.db_session.commit()
|
||||
logger.info(f"Processed {processed_count} CVEs, failed: {failed_count}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {json_file}: {e}")
|
||||
self.db_session.rollback()
|
||||
raise
|
||||
|
||||
return processed_count, failed_count
|
||||
|
||||
def _extract_cve_data(self, cve_item: dict) -> Optional[dict]:
|
||||
"""Extract CVE data from JSON item"""
|
||||
try:
|
||||
cve = cve_item.get('cve', {})
|
||||
impact = cve_item.get('impact', {})
|
||||
|
||||
cve_id = cve.get('CVE_data_meta', {}).get('ID', '')
|
||||
if not cve_id:
|
||||
return None
|
||||
|
||||
# Description
|
||||
description_data = cve.get('description', {}).get('description_data', [])
|
||||
description = ''
|
||||
if description_data:
|
||||
description = description_data[0].get('value', '')
|
||||
|
||||
# CVSS Score
|
||||
cvss_score = None
|
||||
severity = None
|
||||
if 'baseMetricV3' in impact:
|
||||
cvss_v3 = impact['baseMetricV3'].get('cvssV3', {})
|
||||
cvss_score = cvss_v3.get('baseScore')
|
||||
severity = cvss_v3.get('baseSeverity', '').lower()
|
||||
elif 'baseMetricV2' in impact:
|
||||
cvss_v2 = impact['baseMetricV2'].get('cvssV2', {})
|
||||
cvss_score = cvss_v2.get('baseScore')
|
||||
severity = impact['baseMetricV2'].get('severity', '').lower()
|
||||
|
||||
# Dates
|
||||
published_date = None
|
||||
modified_date = None
|
||||
if 'publishedDate' in cve_item:
|
||||
published_date = datetime.fromisoformat(
|
||||
cve_item['publishedDate'].replace('Z', '+00:00')
|
||||
)
|
||||
if 'lastModifiedDate' in cve_item:
|
||||
modified_date = datetime.fromisoformat(
|
||||
cve_item['lastModifiedDate'].replace('Z', '+00:00')
|
||||
)
|
||||
|
||||
# Affected products (from CPE data)
|
||||
affected_products = []
|
||||
configurations = cve_item.get('configurations', {})
|
||||
for node in configurations.get('nodes', []):
|
||||
for cpe_match in node.get('cpe_match', []):
|
||||
if cpe_match.get('vulnerable', False):
|
||||
cpe_uri = cpe_match.get('cpe23Uri', '')
|
||||
if cpe_uri:
|
||||
affected_products.append(cpe_uri)
|
||||
|
||||
# Reference URLs
|
||||
reference_urls = []
|
||||
references = cve.get('references', {}).get('reference_data', [])
|
||||
for ref in references:
|
||||
url = ref.get('url', '')
|
||||
if url:
|
||||
reference_urls.append(url)
|
||||
|
||||
return {
|
||||
'cve_id': cve_id,
|
||||
'description': description,
|
||||
'cvss_score': cvss_score,
|
||||
'severity': severity,
|
||||
'published_date': published_date,
|
||||
'modified_date': modified_date,
|
||||
'affected_products': affected_products,
|
||||
'reference_urls': reference_urls,
|
||||
'data_source': 'nvd_bulk',
|
||||
'nvd_json_version': '1.1',
|
||||
'bulk_processed': True
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting CVE data: {e}")
|
||||
return None
|
||||
|
||||
def _store_cve_data(self, cve_data: dict):
|
||||
"""Store CVE data in database"""
|
||||
from main import CVE
|
||||
|
||||
# Check if CVE already exists
|
||||
existing_cve = self.db_session.query(CVE).filter(
|
||||
CVE.cve_id == cve_data['cve_id']
|
||||
).first()
|
||||
|
||||
if existing_cve:
|
||||
# Update existing CVE
|
||||
for key, value in cve_data.items():
|
||||
setattr(existing_cve, key, value)
|
||||
existing_cve.updated_at = datetime.utcnow()
|
||||
logger.debug(f"Updated CVE {cve_data['cve_id']}")
|
||||
else:
|
||||
# Create new CVE
|
||||
new_cve = CVE(**cve_data)
|
||||
self.db_session.add(new_cve)
|
||||
logger.debug(f"Created new CVE {cve_data['cve_id']}")
|
||||
|
||||
async def bulk_seed_database(self, start_year: int = 2002,
|
||||
end_year: Optional[int] = None) -> dict:
|
||||
"""Perform complete bulk seeding of the database"""
|
||||
from main import BulkProcessingJob
|
||||
|
||||
if end_year is None:
|
||||
end_year = datetime.now().year
|
||||
|
||||
# Create bulk processing job
|
||||
job = BulkProcessingJob(
|
||||
job_type='nvd_bulk_seed',
|
||||
status='running',
|
||||
started_at=datetime.utcnow(),
|
||||
job_metadata={
|
||||
'start_year': start_year,
|
||||
'end_year': end_year,
|
||||
'total_years': end_year - start_year + 1
|
||||
}
|
||||
)
|
||||
self.db_session.add(job)
|
||||
self.db_session.commit()
|
||||
|
||||
total_processed = 0
|
||||
total_failed = 0
|
||||
results = []
|
||||
|
||||
try:
|
||||
# Download all feeds
|
||||
logger.info(f"Starting bulk seed from {start_year} to {end_year}")
|
||||
downloaded_files = await self.download_all_feeds(start_year, end_year)
|
||||
|
||||
job.total_items = len(downloaded_files)
|
||||
self.db_session.commit()
|
||||
|
||||
# Process each file
|
||||
for file_path in downloaded_files:
|
||||
try:
|
||||
# Extract JSON file
|
||||
json_file = self.extract_json_file(file_path)
|
||||
|
||||
# Process the JSON file
|
||||
processed, failed = self.process_json_file(json_file)
|
||||
|
||||
total_processed += processed
|
||||
total_failed += failed
|
||||
job.processed_items += 1
|
||||
|
||||
results.append({
|
||||
'file': file_path.name,
|
||||
'processed': processed,
|
||||
'failed': failed
|
||||
})
|
||||
|
||||
# Clean up extracted file if it's different from original
|
||||
if json_file != file_path:
|
||||
json_file.unlink()
|
||||
|
||||
self.db_session.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {file_path}: {e}")
|
||||
job.failed_items += 1
|
||||
total_failed += 1
|
||||
self.db_session.commit()
|
||||
|
||||
# Update job status
|
||||
job.status = 'completed'
|
||||
job.completed_at = datetime.utcnow()
|
||||
job.job_metadata.update({
|
||||
'total_processed': total_processed,
|
||||
'total_failed': total_failed,
|
||||
'results': results
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
job.status = 'failed'
|
||||
job.error_message = str(e)
|
||||
job.completed_at = datetime.utcnow()
|
||||
logger.error(f"Bulk seed job failed: {e}")
|
||||
|
||||
finally:
|
||||
self.db_session.commit()
|
||||
|
||||
return {
|
||||
'job_id': str(job.id),
|
||||
'status': job.status,
|
||||
'total_processed': total_processed,
|
||||
'total_failed': total_failed,
|
||||
'results': results
|
||||
}
|
||||
|
||||
async def incremental_update(self) -> dict:
|
||||
"""Perform incremental update using modified and recent feeds"""
|
||||
from main import BulkProcessingJob
|
||||
|
||||
# Create incremental update job
|
||||
job = BulkProcessingJob(
|
||||
job_type='incremental_update',
|
||||
status='running',
|
||||
started_at=datetime.utcnow(),
|
||||
job_metadata={'feeds': ['modified', 'recent']}
|
||||
)
|
||||
self.db_session.add(job)
|
||||
self.db_session.commit()
|
||||
|
||||
total_processed = 0
|
||||
total_failed = 0
|
||||
results = []
|
||||
|
||||
try:
|
||||
# Download modified and recent feeds
|
||||
async with aiohttp.ClientSession() as session:
|
||||
for feed_name, url in self.feed_urls.items():
|
||||
filename = f"nvdcve-1.1-{feed_name}.json.gz"
|
||||
destination = self.data_dir / filename
|
||||
|
||||
if await self.download_file(session, url, destination):
|
||||
try:
|
||||
json_file = self.extract_json_file(destination)
|
||||
processed, failed = self.process_json_file(json_file)
|
||||
|
||||
total_processed += processed
|
||||
total_failed += failed
|
||||
|
||||
results.append({
|
||||
'feed': feed_name,
|
||||
'processed': processed,
|
||||
'failed': failed
|
||||
})
|
||||
|
||||
# Clean up
|
||||
if json_file != destination:
|
||||
json_file.unlink()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {feed_name} feed: {e}")
|
||||
total_failed += 1
|
||||
|
||||
job.status = 'completed'
|
||||
job.completed_at = datetime.utcnow()
|
||||
job.job_metadata.update({
|
||||
'total_processed': total_processed,
|
||||
'total_failed': total_failed,
|
||||
'results': results
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
job.status = 'failed'
|
||||
job.error_message = str(e)
|
||||
job.completed_at = datetime.utcnow()
|
||||
logger.error(f"Incremental update job failed: {e}")
|
||||
|
||||
finally:
|
||||
self.db_session.commit()
|
||||
|
||||
return {
|
||||
'job_id': str(job.id),
|
||||
'status': job.status,
|
||||
'total_processed': total_processed,
|
||||
'total_failed': total_failed,
|
||||
'results': results
|
||||
}
|
|
@ -12,3 +12,5 @@ pygithub==2.1.1
|
|||
gitpython==3.1.40
|
||||
beautifulsoup4==4.12.2
|
||||
lxml==4.9.3
|
||||
aiohttp==3.9.1
|
||||
aiofiles
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
db:
|
||||
image: postgres:15
|
||||
|
@ -25,6 +23,7 @@ services:
|
|||
environment:
|
||||
DATABASE_URL: postgresql://cve_user:cve_password@db:5432/cve_sigma_db
|
||||
NVD_API_KEY: ${NVD_API_KEY:-}
|
||||
GITHUB_TOKEN: ${GITHUB_TOKEN}
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
|
|
|
@ -15,6 +15,10 @@ function App() {
|
|||
const [activeTab, setActiveTab] = useState('dashboard');
|
||||
const [fetchingCves, setFetchingCves] = useState(false);
|
||||
const [testResult, setTestResult] = useState(null);
|
||||
const [bulkJobs, setBulkJobs] = useState([]);
|
||||
const [bulkStatus, setBulkStatus] = useState({});
|
||||
const [pocStats, setPocStats] = useState({});
|
||||
const [bulkProcessing, setBulkProcessing] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
fetchData();
|
||||
|
@ -23,15 +27,21 @@ function App() {
|
|||
const fetchData = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const [cvesRes, rulesRes, statsRes] = await Promise.all([
|
||||
const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes] = await Promise.all([
|
||||
axios.get(`${API_BASE_URL}/api/cves`),
|
||||
axios.get(`${API_BASE_URL}/api/sigma-rules`),
|
||||
axios.get(`${API_BASE_URL}/api/stats`)
|
||||
axios.get(`${API_BASE_URL}/api/stats`),
|
||||
axios.get(`${API_BASE_URL}/api/bulk-jobs`),
|
||||
axios.get(`${API_BASE_URL}/api/bulk-status`),
|
||||
axios.get(`${API_BASE_URL}/api/poc-stats`)
|
||||
]);
|
||||
|
||||
setCves(cvesRes.data);
|
||||
setSigmaRules(rulesRes.data);
|
||||
setStats(statsRes.data);
|
||||
setBulkJobs(bulkJobsRes.data);
|
||||
setBulkStatus(bulkStatusRes.data);
|
||||
setPocStats(pocStatsRes.data);
|
||||
} catch (error) {
|
||||
console.error('Error fetching data:', error);
|
||||
} finally {
|
||||
|
@ -39,6 +49,20 @@ function App() {
|
|||
}
|
||||
};
|
||||
|
||||
const cancelJob = async (jobId) => {
|
||||
try {
|
||||
const response = await axios.post(`${API_BASE_URL}/api/cancel-job/${jobId}`);
|
||||
console.log('Cancel job response:', response.data);
|
||||
// Refresh data after cancelling
|
||||
setTimeout(() => {
|
||||
fetchData();
|
||||
}, 1000);
|
||||
} catch (error) {
|
||||
console.error('Error cancelling job:', error);
|
||||
alert('Failed to cancel job. Please try again.');
|
||||
}
|
||||
};
|
||||
|
||||
const handleFetchCves = async () => {
|
||||
try {
|
||||
setFetchingCves(true);
|
||||
|
@ -73,6 +97,73 @@ function App() {
|
|||
}
|
||||
};
|
||||
|
||||
const startBulkSeed = async (startYear = 2020, endYear = null) => {
|
||||
try {
|
||||
setBulkProcessing(true);
|
||||
const response = await axios.post(`${API_BASE_URL}/api/bulk-seed`, {
|
||||
start_year: startYear,
|
||||
end_year: endYear
|
||||
});
|
||||
console.log('Bulk seed response:', response.data);
|
||||
// Refresh data after starting
|
||||
setTimeout(() => {
|
||||
fetchData();
|
||||
}, 2000);
|
||||
} catch (error) {
|
||||
console.error('Error starting bulk seed:', error);
|
||||
setBulkProcessing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const startIncrementalUpdate = async () => {
|
||||
try {
|
||||
setBulkProcessing(true);
|
||||
const response = await axios.post(`${API_BASE_URL}/api/incremental-update`);
|
||||
console.log('Incremental update response:', response.data);
|
||||
setTimeout(() => {
|
||||
fetchData();
|
||||
setBulkProcessing(false);
|
||||
}, 2000);
|
||||
} catch (error) {
|
||||
console.error('Error starting incremental update:', error);
|
||||
setBulkProcessing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const syncNomiSec = async (cveId = null) => {
|
||||
try {
|
||||
setBulkProcessing(true);
|
||||
const response = await axios.post(`${API_BASE_URL}/api/sync-nomi-sec`, {
|
||||
cve_id: cveId
|
||||
});
|
||||
console.log('Nomi-sec sync response:', response.data);
|
||||
setTimeout(() => {
|
||||
fetchData();
|
||||
setBulkProcessing(false);
|
||||
}, 2000);
|
||||
} catch (error) {
|
||||
console.error('Error syncing nomi-sec:', error);
|
||||
setBulkProcessing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const regenerateRules = async (force = false) => {
|
||||
try {
|
||||
setBulkProcessing(true);
|
||||
const response = await axios.post(`${API_BASE_URL}/api/regenerate-rules`, {
|
||||
force: force
|
||||
});
|
||||
console.log('Rule regeneration response:', response.data);
|
||||
setTimeout(() => {
|
||||
fetchData();
|
||||
setBulkProcessing(false);
|
||||
}, 2000);
|
||||
} catch (error) {
|
||||
console.error('Error regenerating rules:', error);
|
||||
setBulkProcessing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const getSeverityColor = (severity) => {
|
||||
switch (severity?.toLowerCase()) {
|
||||
case 'critical': return 'bg-red-100 text-red-800';
|
||||
|
@ -93,18 +184,81 @@ function App() {
|
|||
|
||||
const Dashboard = () => (
|
||||
<div className="space-y-6">
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
|
||||
<div className="grid grid-cols-1 md:grid-cols-5 gap-6">
|
||||
<div className="bg-white p-6 rounded-lg shadow">
|
||||
<h3 className="text-lg font-medium text-gray-900">Total CVEs</h3>
|
||||
<p className="text-3xl font-bold text-blue-600">{stats.total_cves || 0}</p>
|
||||
<p className="text-sm text-gray-500">Bulk: {stats.bulk_processed_cves || 0}</p>
|
||||
</div>
|
||||
<div className="bg-white p-6 rounded-lg shadow">
|
||||
<h3 className="text-lg font-medium text-gray-900">SIGMA Rules</h3>
|
||||
<p className="text-3xl font-bold text-green-600">{stats.total_sigma_rules || 0}</p>
|
||||
<p className="text-sm text-gray-500">Nomi-sec: {stats.nomi_sec_rules || 0}</p>
|
||||
</div>
|
||||
<div className="bg-white p-6 rounded-lg shadow">
|
||||
<h3 className="text-lg font-medium text-gray-900">CVEs with PoCs</h3>
|
||||
<p className="text-3xl font-bold text-purple-600">{stats.cves_with_pocs || 0}</p>
|
||||
<p className="text-sm text-gray-500">{(stats.poc_coverage || 0).toFixed(1)}% coverage</p>
|
||||
</div>
|
||||
<div className="bg-white p-6 rounded-lg shadow">
|
||||
<h3 className="text-lg font-medium text-gray-900">Recent CVEs (7d)</h3>
|
||||
<p className="text-3xl font-bold text-purple-600">{stats.recent_cves_7_days || 0}</p>
|
||||
<p className="text-3xl font-bold text-orange-600">{stats.recent_cves_7_days || 0}</p>
|
||||
</div>
|
||||
<div className="bg-white p-6 rounded-lg shadow">
|
||||
<h3 className="text-lg font-medium text-gray-900">High Quality PoCs</h3>
|
||||
<p className="text-3xl font-bold text-indigo-600">{pocStats.high_quality_cves || 0}</p>
|
||||
<p className="text-sm text-gray-500">Avg: {(pocStats.avg_poc_count || 0).toFixed(1)}</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Bulk Processing Controls */}
|
||||
<div className="bg-white rounded-lg shadow p-6">
|
||||
<h2 className="text-xl font-bold text-gray-900 mb-4">Bulk Processing</h2>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
|
||||
<button
|
||||
onClick={() => startBulkSeed(2020)}
|
||||
disabled={bulkProcessing}
|
||||
className={`px-4 py-2 rounded-md text-white ${
|
||||
bulkProcessing
|
||||
? 'bg-gray-400 cursor-not-allowed'
|
||||
: 'bg-blue-600 hover:bg-blue-700'
|
||||
}`}
|
||||
>
|
||||
{bulkProcessing ? 'Processing...' : 'Bulk Seed (2020+)'}
|
||||
</button>
|
||||
<button
|
||||
onClick={startIncrementalUpdate}
|
||||
disabled={bulkProcessing}
|
||||
className={`px-4 py-2 rounded-md text-white ${
|
||||
bulkProcessing
|
||||
? 'bg-gray-400 cursor-not-allowed'
|
||||
: 'bg-green-600 hover:bg-green-700'
|
||||
}`}
|
||||
>
|
||||
{bulkProcessing ? 'Processing...' : 'Incremental Update'}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => syncNomiSec()}
|
||||
disabled={bulkProcessing}
|
||||
className={`px-4 py-2 rounded-md text-white ${
|
||||
bulkProcessing
|
||||
? 'bg-gray-400 cursor-not-allowed'
|
||||
: 'bg-purple-600 hover:bg-purple-700'
|
||||
}`}
|
||||
>
|
||||
{bulkProcessing ? 'Processing...' : 'Sync nomi-sec PoCs'}
|
||||
</button>
|
||||
<button
|
||||
onClick={() => regenerateRules()}
|
||||
disabled={bulkProcessing}
|
||||
className={`px-4 py-2 rounded-md text-white ${
|
||||
bulkProcessing
|
||||
? 'bg-gray-400 cursor-not-allowed'
|
||||
: 'bg-indigo-600 hover:bg-indigo-700'
|
||||
}`}
|
||||
>
|
||||
{bulkProcessing ? 'Processing...' : 'Regenerate Rules'}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
@ -522,6 +676,178 @@ function App() {
|
|||
);
|
||||
};
|
||||
|
||||
const BulkJobsList = () => (
|
||||
<div className="space-y-6">
|
||||
<div className="flex justify-between items-center">
|
||||
<h1 className="text-2xl font-bold text-gray-900">Bulk Processing Jobs</h1>
|
||||
<button
|
||||
onClick={fetchData}
|
||||
className="bg-blue-600 hover:bg-blue-700 text-white px-4 py-2 rounded-md text-sm"
|
||||
>
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Bulk Status Overview */}
|
||||
<div className="bg-white rounded-lg shadow p-6">
|
||||
<h2 className="text-lg font-bold text-gray-900 mb-4">System Status</h2>
|
||||
{bulkStatus.database_stats && (
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
|
||||
<div className="text-center">
|
||||
<div className="text-2xl font-bold text-blue-600">{bulkStatus.database_stats.total_cves}</div>
|
||||
<div className="text-sm text-gray-500">Total CVEs</div>
|
||||
</div>
|
||||
<div className="text-center">
|
||||
<div className="text-2xl font-bold text-green-600">{bulkStatus.database_stats.bulk_processed_cves}</div>
|
||||
<div className="text-sm text-gray-500">Bulk Processed</div>
|
||||
</div>
|
||||
<div className="text-center">
|
||||
<div className="text-2xl font-bold text-purple-600">{bulkStatus.database_stats.cves_with_pocs}</div>
|
||||
<div className="text-sm text-gray-500">With PoCs</div>
|
||||
</div>
|
||||
<div className="text-center">
|
||||
<div className="text-2xl font-bold text-indigo-600">{bulkStatus.database_stats.nomi_sec_rules}</div>
|
||||
<div className="text-sm text-gray-500">Enhanced Rules</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Running Jobs */}
|
||||
{bulkJobs.some(job => job.status === 'running' || job.status === 'pending') && (
|
||||
<div className="bg-white rounded-lg shadow">
|
||||
<div className="px-6 py-4 border-b border-gray-200">
|
||||
<h2 className="text-lg font-bold text-gray-900">Running Jobs</h2>
|
||||
</div>
|
||||
<div className="divide-y divide-gray-200">
|
||||
{bulkJobs
|
||||
.filter(job => job.status === 'running' || job.status === 'pending')
|
||||
.map((job) => (
|
||||
<div key={job.id} className="px-6 py-4 bg-blue-50">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex-1">
|
||||
<div className="flex items-center space-x-3">
|
||||
<h3 className="text-lg font-medium text-gray-900">{job.job_type}</h3>
|
||||
<span className={`inline-flex px-2 py-1 text-xs font-semibold rounded-full ${
|
||||
job.status === 'running' ? 'bg-blue-100 text-blue-800' :
|
||||
'bg-gray-100 text-gray-800'
|
||||
}`}>
|
||||
{job.status}
|
||||
</span>
|
||||
</div>
|
||||
<div className="mt-2 flex items-center space-x-6 text-sm text-gray-500">
|
||||
<span>Started: {formatDate(job.started_at)}</span>
|
||||
{job.year && <span>Year: {job.year}</span>}
|
||||
</div>
|
||||
{job.total_items > 0 && (
|
||||
<div className="mt-2">
|
||||
<div className="flex items-center space-x-4 text-sm text-gray-600">
|
||||
<span>Progress: {job.processed_items}/{job.total_items}</span>
|
||||
{job.failed_items > 0 && (
|
||||
<span className="text-red-600">Failed: {job.failed_items}</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="mt-1 w-full bg-gray-200 rounded-full h-2">
|
||||
<div
|
||||
className="bg-blue-600 h-2 rounded-full"
|
||||
style={{ width: `${(job.processed_items / job.total_items) * 100}%` }}
|
||||
></div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex-shrink-0 ml-4">
|
||||
<button
|
||||
onClick={() => cancelJob(job.id)}
|
||||
className="bg-red-600 hover:bg-red-700 text-white px-3 py-1 rounded-md text-sm font-medium"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Recent Jobs */}
|
||||
<div className="bg-white rounded-lg shadow">
|
||||
<div className="px-6 py-4 border-b border-gray-200">
|
||||
<h2 className="text-lg font-bold text-gray-900">Recent Jobs</h2>
|
||||
</div>
|
||||
<div className="divide-y divide-gray-200">
|
||||
{bulkJobs.length === 0 ? (
|
||||
<div className="px-6 py-8 text-center text-gray-500">
|
||||
No bulk processing jobs found
|
||||
</div>
|
||||
) : (
|
||||
bulkJobs.map((job) => (
|
||||
<div key={job.id} className="px-6 py-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex-1">
|
||||
<div className="flex items-center space-x-3">
|
||||
<h3 className="text-lg font-medium text-gray-900">{job.job_type}</h3>
|
||||
<span className={`inline-flex px-2 py-1 text-xs font-semibold rounded-full ${
|
||||
job.status === 'completed' ? 'bg-green-100 text-green-800' :
|
||||
job.status === 'running' ? 'bg-blue-100 text-blue-800' :
|
||||
job.status === 'failed' ? 'bg-red-100 text-red-800' :
|
||||
job.status === 'cancelled' ? 'bg-orange-100 text-orange-800' :
|
||||
'bg-gray-100 text-gray-800'
|
||||
}`}>
|
||||
{job.status}
|
||||
</span>
|
||||
</div>
|
||||
<div className="mt-2 flex items-center space-x-6 text-sm text-gray-500">
|
||||
<span>Started: {formatDate(job.started_at)}</span>
|
||||
{job.completed_at && (
|
||||
<span>Completed: {formatDate(job.completed_at)}</span>
|
||||
)}
|
||||
{job.year && (
|
||||
<span>Year: {job.year}</span>
|
||||
)}
|
||||
</div>
|
||||
{job.total_items > 0 && (
|
||||
<div className="mt-2">
|
||||
<div className="flex items-center space-x-4 text-sm text-gray-600">
|
||||
<span>Progress: {job.processed_items}/{job.total_items}</span>
|
||||
{job.failed_items > 0 && (
|
||||
<span className="text-red-600">Failed: {job.failed_items}</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="mt-1 w-full bg-gray-200 rounded-full h-2">
|
||||
<div
|
||||
className="bg-blue-600 h-2 rounded-full"
|
||||
style={{ width: `${(job.processed_items / job.total_items) * 100}%` }}
|
||||
></div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{job.error_message && (
|
||||
<div className="mt-2 p-2 bg-red-50 border border-red-200 rounded text-sm text-red-700">
|
||||
{job.error_message}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex-shrink-0 ml-4">
|
||||
{(job.status === 'running' || job.status === 'pending') && (
|
||||
<button
|
||||
onClick={() => cancelJob(job.id)}
|
||||
className="bg-red-600 hover:bg-red-700 text-white px-3 py-1 rounded-md text-sm font-medium"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="min-h-screen bg-gray-100 flex items-center justify-center">
|
||||
|
@ -573,6 +899,16 @@ function App() {
|
|||
>
|
||||
SIGMA Rules
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setActiveTab('bulk-jobs')}
|
||||
className={`inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium ${
|
||||
activeTab === 'bulk-jobs'
|
||||
? 'border-blue-500 text-gray-900'
|
||||
: 'border-transparent text-gray-500 hover:text-gray-700 hover:border-gray-300'
|
||||
}`}
|
||||
>
|
||||
Bulk Jobs
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -584,6 +920,7 @@ function App() {
|
|||
{activeTab === 'dashboard' && <Dashboard />}
|
||||
{activeTab === 'cves' && <CVEList />}
|
||||
{activeTab === 'rules' && <SigmaRulesList />}
|
||||
{activeTab === 'bulk-jobs' && <BulkJobsList />}
|
||||
</div>
|
||||
</main>
|
||||
|
||||
|
|
28
init.sql
28
init.sql
|
@ -13,6 +13,13 @@ CREATE TABLE cves (
|
|||
modified_date TIMESTAMP,
|
||||
affected_products TEXT[],
|
||||
reference_urls TEXT[],
|
||||
-- Bulk processing fields
|
||||
data_source VARCHAR(20) DEFAULT 'nvd_api',
|
||||
nvd_json_version VARCHAR(10) DEFAULT '2.0',
|
||||
bulk_processed BOOLEAN DEFAULT FALSE,
|
||||
-- nomi-sec PoC fields
|
||||
poc_count INTEGER DEFAULT 0,
|
||||
poc_data JSON,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
updated_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
@ -30,6 +37,10 @@ CREATE TABLE sigma_rules (
|
|||
exploit_based BOOLEAN DEFAULT FALSE,
|
||||
github_repos TEXT[],
|
||||
exploit_indicators TEXT,
|
||||
-- Enhanced fields for new data sources
|
||||
poc_source VARCHAR(20) DEFAULT 'github_search',
|
||||
poc_quality_score INTEGER DEFAULT 0,
|
||||
nomi_sec_data JSON,
|
||||
created_at TIMESTAMP DEFAULT NOW(),
|
||||
updated_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
@ -44,6 +55,23 @@ CREATE TABLE rule_templates (
|
|||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Bulk processing jobs table
|
||||
CREATE TABLE bulk_processing_jobs (
|
||||
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||
job_type VARCHAR(50) NOT NULL,
|
||||
status VARCHAR(20) DEFAULT 'pending',
|
||||
year INTEGER,
|
||||
total_items INTEGER DEFAULT 0,
|
||||
processed_items INTEGER DEFAULT 0,
|
||||
failed_items INTEGER DEFAULT 0,
|
||||
error_message TEXT,
|
||||
job_metadata JSON,
|
||||
started_at TIMESTAMP,
|
||||
completed_at TIMESTAMP,
|
||||
cancelled_at TIMESTAMP,
|
||||
created_at TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Insert some basic rule templates
|
||||
INSERT INTO rule_templates (template_name, template_content, applicable_product_patterns, description) VALUES
|
||||
(
|
||||
|
|
0
start.sh
Normal file → Executable file
0
start.sh
Normal file → Executable file
Loading…
Add table
Reference in a new issue