more updates for bulk
This commit is contained in:
parent
5a9ae34996
commit
790e4bd91f
11 changed files with 2500 additions and 22 deletions
35
README.md
35
README.md
|
@ -1,20 +1,27 @@
|
||||||
# CVE-SIGMA Auto Generator
|
# CVE-SIGMA Auto Generator (Enhanced)
|
||||||
|
|
||||||
An automated platform that fetches CVE data and automatically generates SIGMA rules for threat detection.
|
An advanced automated platform that processes comprehensive CVE data and generates enhanced SIGMA rules for threat detection using curated exploit intelligence.
|
||||||
|
|
||||||
## Features
|
## 🚀 Enhanced Features
|
||||||
|
|
||||||
- **Automated CVE Fetching**: Regularly polls the NVD (National Vulnerability Database) for CVEs from July 2025
|
### Data Processing
|
||||||
- **GitHub Exploit Analysis**: Automatically searches GitHub for exploit code related to each CVE
|
- **Bulk NVD Processing**: Downloads and processes complete NVD JSON datasets (2002-2025)
|
||||||
- **Intelligent SIGMA Rule Generation**: Creates SIGMA rules based on CVE characteristics AND actual exploit code
|
- **nomi-sec PoC Integration**: Uses curated PoC data from github.com/nomi-sec/PoC-in-GitHub
|
||||||
- **Exploit-Based Detection**: Enhanced rules using real indicators extracted from GitHub exploits
|
- **Incremental Updates**: Efficient updates using NVD modified/recent feeds
|
||||||
- **Modern Web Interface**: React-based UI for browsing CVEs and managing SIGMA rules
|
- **Quality Assessment**: Advanced PoC quality scoring with star count, recency, and relevance analysis
|
||||||
- **Real-time Updates**: Background tasks keep CVE data current with current 2025 vulnerabilities
|
|
||||||
- **Rule Templates**: Configurable templates for different types of vulnerabilities
|
### Intelligence Generation
|
||||||
- **MITRE ATT&CK Mapping**: Automatic mapping to MITRE ATT&CK techniques
|
- **Enhanced SIGMA Rules**: Creates rules using real exploit indicators from curated PoCs
|
||||||
- **API Testing**: Built-in NVD API connectivity testing
|
- **Quality Tiers**: Excellent, Good, Fair, Poor, Very Poor classification system
|
||||||
- **Enhanced Error Handling**: Robust fallback mechanisms and detailed logging
|
- **Smart Template Selection**: AI-driven template matching based on PoC characteristics
|
||||||
- **Docker Compose**: Easy deployment and orchestration
|
- **Advanced Indicator Extraction**: Processes, files, network, registry, and command patterns
|
||||||
|
- **MITRE ATT&CK Mapping**: Automatic technique identification based on exploit analysis
|
||||||
|
|
||||||
|
### User Experience
|
||||||
|
- **Modern Web Interface**: React-based UI with enhanced bulk processing controls
|
||||||
|
- **Real-time Monitoring**: Live job tracking and progress monitoring
|
||||||
|
- **Comprehensive Statistics**: PoC coverage, quality metrics, and processing status
|
||||||
|
- **Bulk Operations Dashboard**: Centralized control for all data processing operations
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
|
|
340
backend/bulk_seeder.py
Normal file
340
backend/bulk_seeder.py
Normal file
|
@ -0,0 +1,340 @@
|
||||||
|
"""
|
||||||
|
Bulk Data Seeding Coordinator
|
||||||
|
Orchestrates the complete bulk seeding process using NVD JSON feeds and nomi-sec PoC data
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from nvd_bulk_processor import NVDBulkProcessor
|
||||||
|
from nomi_sec_client import NomiSecClient
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class BulkSeeder:
|
||||||
|
"""Coordinates bulk seeding operations"""
|
||||||
|
|
||||||
|
def __init__(self, db_session: Session):
|
||||||
|
self.db_session = db_session
|
||||||
|
self.nvd_processor = NVDBulkProcessor(db_session)
|
||||||
|
self.nomi_sec_client = NomiSecClient(db_session)
|
||||||
|
|
||||||
|
async def full_bulk_seed(self, start_year: int = 2002,
|
||||||
|
end_year: Optional[int] = None,
|
||||||
|
skip_nvd: bool = False,
|
||||||
|
skip_nomi_sec: bool = False) -> dict:
|
||||||
|
"""
|
||||||
|
Perform complete bulk seeding operation
|
||||||
|
|
||||||
|
Args:
|
||||||
|
start_year: Starting year for NVD data (default: 2002)
|
||||||
|
end_year: Ending year for NVD data (default: current year)
|
||||||
|
skip_nvd: Skip NVD bulk processing (default: False)
|
||||||
|
skip_nomi_sec: Skip nomi-sec PoC synchronization (default: False)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing operation results
|
||||||
|
"""
|
||||||
|
if end_year is None:
|
||||||
|
end_year = datetime.now().year
|
||||||
|
|
||||||
|
results = {
|
||||||
|
'start_time': datetime.utcnow(),
|
||||||
|
'nvd_results': None,
|
||||||
|
'nomi_sec_results': None,
|
||||||
|
'total_time': None,
|
||||||
|
'status': 'running'
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Starting full bulk seed operation ({start_year}-{end_year})")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Phase 1: NVD Bulk Processing
|
||||||
|
if not skip_nvd:
|
||||||
|
logger.info("Phase 1: Starting NVD bulk processing...")
|
||||||
|
nvd_results = await self.nvd_processor.bulk_seed_database(
|
||||||
|
start_year=start_year,
|
||||||
|
end_year=end_year
|
||||||
|
)
|
||||||
|
results['nvd_results'] = nvd_results
|
||||||
|
logger.info(f"Phase 1 complete: {nvd_results['total_processed']} CVEs processed")
|
||||||
|
else:
|
||||||
|
logger.info("Phase 1: Skipping NVD bulk processing")
|
||||||
|
|
||||||
|
# Phase 2: nomi-sec PoC Synchronization
|
||||||
|
if not skip_nomi_sec:
|
||||||
|
logger.info("Phase 2: Starting nomi-sec PoC synchronization...")
|
||||||
|
nomi_sec_results = await self.nomi_sec_client.bulk_sync_all_cves(
|
||||||
|
batch_size=50 # Smaller batches for API stability
|
||||||
|
)
|
||||||
|
results['nomi_sec_results'] = nomi_sec_results
|
||||||
|
logger.info(f"Phase 2 complete: {nomi_sec_results['total_pocs_found']} PoCs found")
|
||||||
|
else:
|
||||||
|
logger.info("Phase 2: Skipping nomi-sec PoC synchronization")
|
||||||
|
|
||||||
|
# Phase 3: Generate Enhanced SIGMA Rules
|
||||||
|
logger.info("Phase 3: Generating enhanced SIGMA rules...")
|
||||||
|
sigma_results = await self.generate_enhanced_sigma_rules()
|
||||||
|
results['sigma_results'] = sigma_results
|
||||||
|
logger.info(f"Phase 3 complete: {sigma_results['rules_generated']} rules generated")
|
||||||
|
|
||||||
|
results['status'] = 'completed'
|
||||||
|
results['end_time'] = datetime.utcnow()
|
||||||
|
results['total_time'] = (results['end_time'] - results['start_time']).total_seconds()
|
||||||
|
|
||||||
|
logger.info(f"Full bulk seed operation completed in {results['total_time']:.2f} seconds")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Bulk seed operation failed: {e}")
|
||||||
|
results['status'] = 'failed'
|
||||||
|
results['error'] = str(e)
|
||||||
|
results['end_time'] = datetime.utcnow()
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
async def incremental_update(self) -> dict:
|
||||||
|
"""
|
||||||
|
Perform incremental update operation
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing update results
|
||||||
|
"""
|
||||||
|
results = {
|
||||||
|
'start_time': datetime.utcnow(),
|
||||||
|
'nvd_update': None,
|
||||||
|
'nomi_sec_update': None,
|
||||||
|
'status': 'running'
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("Starting incremental update...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Update NVD data using modified/recent feeds
|
||||||
|
logger.info("Updating NVD data...")
|
||||||
|
nvd_update = await self.nvd_processor.incremental_update()
|
||||||
|
results['nvd_update'] = nvd_update
|
||||||
|
|
||||||
|
# Update PoC data for newly added/modified CVEs
|
||||||
|
if nvd_update['total_processed'] > 0:
|
||||||
|
logger.info("Updating PoC data for modified CVEs...")
|
||||||
|
# Get recently modified CVEs and sync their PoCs
|
||||||
|
recent_cves = await self._get_recently_modified_cves()
|
||||||
|
nomi_sec_update = await self._sync_specific_cves(recent_cves)
|
||||||
|
results['nomi_sec_update'] = nomi_sec_update
|
||||||
|
|
||||||
|
results['status'] = 'completed'
|
||||||
|
results['end_time'] = datetime.utcnow()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Incremental update failed: {e}")
|
||||||
|
results['status'] = 'failed'
|
||||||
|
results['error'] = str(e)
|
||||||
|
results['end_time'] = datetime.utcnow()
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
async def generate_enhanced_sigma_rules(self) -> dict:
|
||||||
|
"""Generate enhanced SIGMA rules using nomi-sec PoC data"""
|
||||||
|
from main import CVE, SigmaRule
|
||||||
|
|
||||||
|
# Import the enhanced rule generator
|
||||||
|
from enhanced_sigma_generator import EnhancedSigmaGenerator
|
||||||
|
|
||||||
|
generator = EnhancedSigmaGenerator(self.db_session)
|
||||||
|
|
||||||
|
# Get all CVEs that have PoC data but no enhanced rules
|
||||||
|
cves_with_pocs = self.db_session.query(CVE).filter(
|
||||||
|
CVE.poc_count > 0
|
||||||
|
).all()
|
||||||
|
|
||||||
|
rules_generated = 0
|
||||||
|
rules_updated = 0
|
||||||
|
|
||||||
|
for cve in cves_with_pocs:
|
||||||
|
try:
|
||||||
|
# Check if we need to generate/update the rule
|
||||||
|
existing_rule = self.db_session.query(SigmaRule).filter(
|
||||||
|
SigmaRule.cve_id == cve.cve_id
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if existing_rule and existing_rule.poc_source == 'nomi_sec':
|
||||||
|
# Rule already exists and is up to date
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Generate enhanced rule
|
||||||
|
rule_result = await generator.generate_enhanced_rule(cve)
|
||||||
|
|
||||||
|
if rule_result['success']:
|
||||||
|
if existing_rule:
|
||||||
|
rules_updated += 1
|
||||||
|
else:
|
||||||
|
rules_generated += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating rule for {cve.cve_id}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'rules_generated': rules_generated,
|
||||||
|
'rules_updated': rules_updated,
|
||||||
|
'total_processed': len(cves_with_pocs)
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _get_recently_modified_cves(self, hours: int = 24) -> list:
|
||||||
|
"""Get CVEs modified within the last N hours"""
|
||||||
|
from main import CVE
|
||||||
|
|
||||||
|
cutoff_time = datetime.utcnow() - timedelta(hours=hours)
|
||||||
|
|
||||||
|
recent_cves = self.db_session.query(CVE).filter(
|
||||||
|
CVE.updated_at >= cutoff_time
|
||||||
|
).all()
|
||||||
|
|
||||||
|
return [cve.cve_id for cve in recent_cves]
|
||||||
|
|
||||||
|
async def _sync_specific_cves(self, cve_ids: list) -> dict:
|
||||||
|
"""Sync PoC data for specific CVEs"""
|
||||||
|
total_processed = 0
|
||||||
|
total_pocs_found = 0
|
||||||
|
|
||||||
|
for cve_id in cve_ids:
|
||||||
|
try:
|
||||||
|
result = await self.nomi_sec_client.sync_cve_pocs(cve_id)
|
||||||
|
total_processed += 1
|
||||||
|
total_pocs_found += result.get('pocs_found', 0)
|
||||||
|
|
||||||
|
# Small delay to avoid overwhelming the API
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error syncing PoCs for {cve_id}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
return {
|
||||||
|
'total_processed': total_processed,
|
||||||
|
'total_pocs_found': total_pocs_found
|
||||||
|
}
|
||||||
|
|
||||||
|
async def get_seeding_status(self) -> dict:
|
||||||
|
"""Get current seeding status and statistics"""
|
||||||
|
from main import CVE, SigmaRule, BulkProcessingJob
|
||||||
|
|
||||||
|
# Get database statistics
|
||||||
|
total_cves = self.db_session.query(CVE).count()
|
||||||
|
bulk_processed_cves = self.db_session.query(CVE).filter(
|
||||||
|
CVE.bulk_processed == True
|
||||||
|
).count()
|
||||||
|
|
||||||
|
cves_with_pocs = self.db_session.query(CVE).filter(
|
||||||
|
CVE.poc_count > 0
|
||||||
|
).count()
|
||||||
|
|
||||||
|
total_rules = self.db_session.query(SigmaRule).count()
|
||||||
|
nomi_sec_rules = self.db_session.query(SigmaRule).filter(
|
||||||
|
SigmaRule.poc_source == 'nomi_sec'
|
||||||
|
).count()
|
||||||
|
|
||||||
|
# Get recent job status
|
||||||
|
recent_jobs = self.db_session.query(BulkProcessingJob).order_by(
|
||||||
|
BulkProcessingJob.created_at.desc()
|
||||||
|
).limit(5).all()
|
||||||
|
|
||||||
|
job_status = []
|
||||||
|
for job in recent_jobs:
|
||||||
|
job_status.append({
|
||||||
|
'id': str(job.id),
|
||||||
|
'job_type': job.job_type,
|
||||||
|
'status': job.status,
|
||||||
|
'created_at': job.created_at,
|
||||||
|
'completed_at': job.completed_at,
|
||||||
|
'processed_items': job.processed_items,
|
||||||
|
'total_items': job.total_items,
|
||||||
|
'failed_items': job.failed_items
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'database_stats': {
|
||||||
|
'total_cves': total_cves,
|
||||||
|
'bulk_processed_cves': bulk_processed_cves,
|
||||||
|
'cves_with_pocs': cves_with_pocs,
|
||||||
|
'total_rules': total_rules,
|
||||||
|
'nomi_sec_rules': nomi_sec_rules,
|
||||||
|
'poc_coverage': (cves_with_pocs / total_cves * 100) if total_cves > 0 else 0,
|
||||||
|
'nomi_sec_coverage': (nomi_sec_rules / total_rules * 100) if total_rules > 0 else 0
|
||||||
|
},
|
||||||
|
'recent_jobs': job_status,
|
||||||
|
'nvd_data_status': await self._get_nvd_data_status(),
|
||||||
|
'nomi_sec_status': await self.nomi_sec_client.get_sync_status()
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _get_nvd_data_status(self) -> dict:
|
||||||
|
"""Get NVD data status"""
|
||||||
|
from main import CVE
|
||||||
|
|
||||||
|
# Get year distribution
|
||||||
|
year_counts = {}
|
||||||
|
cves = self.db_session.query(CVE).all()
|
||||||
|
|
||||||
|
for cve in cves:
|
||||||
|
if cve.published_date:
|
||||||
|
year = cve.published_date.year
|
||||||
|
year_counts[year] = year_counts.get(year, 0) + 1
|
||||||
|
|
||||||
|
# Get source distribution
|
||||||
|
source_counts = {}
|
||||||
|
for cve in cves:
|
||||||
|
source = cve.data_source or 'unknown'
|
||||||
|
source_counts[source] = source_counts.get(source, 0) + 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
'year_distribution': year_counts,
|
||||||
|
'source_distribution': source_counts,
|
||||||
|
'total_cves': len(cves),
|
||||||
|
'date_range': {
|
||||||
|
'earliest': min(cve.published_date for cve in cves if cve.published_date),
|
||||||
|
'latest': max(cve.published_date for cve in cves if cve.published_date)
|
||||||
|
} if cves else None
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Standalone script functionality
|
||||||
|
async def main():
|
||||||
|
"""Main function for standalone execution"""
|
||||||
|
from main import SessionLocal, engine, Base
|
||||||
|
|
||||||
|
# Create tables
|
||||||
|
Base.metadata.create_all(bind=engine)
|
||||||
|
|
||||||
|
# Create database session
|
||||||
|
db_session = SessionLocal()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create bulk seeder
|
||||||
|
seeder = BulkSeeder(db_session)
|
||||||
|
|
||||||
|
# Get current status
|
||||||
|
status = await seeder.get_seeding_status()
|
||||||
|
print(f"Current Status: {status['database_stats']['total_cves']} CVEs in database")
|
||||||
|
|
||||||
|
# Perform full bulk seed if database is empty
|
||||||
|
if status['database_stats']['total_cves'] == 0:
|
||||||
|
print("Database is empty. Starting full bulk seed...")
|
||||||
|
results = await seeder.full_bulk_seed(start_year=2020) # Start from 2020 for faster testing
|
||||||
|
print(f"Bulk seed completed: {results}")
|
||||||
|
else:
|
||||||
|
print("Database contains data. Running incremental update...")
|
||||||
|
results = await seeder.incremental_update()
|
||||||
|
print(f"Incremental update completed: {results}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
db_session.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
438
backend/enhanced_sigma_generator.py
Normal file
438
backend/enhanced_sigma_generator.py
Normal file
|
@ -0,0 +1,438 @@
|
||||||
|
"""
|
||||||
|
Enhanced SIGMA Rule Generator
|
||||||
|
Generates improved SIGMA rules using nomi-sec PoC data and traditional indicators
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class EnhancedSigmaGenerator:
|
||||||
|
"""Enhanced SIGMA rule generator using nomi-sec PoC data"""
|
||||||
|
|
||||||
|
def __init__(self, db_session: Session):
|
||||||
|
self.db_session = db_session
|
||||||
|
|
||||||
|
async def generate_enhanced_rule(self, cve) -> dict:
|
||||||
|
"""Generate enhanced SIGMA rule for a CVE using PoC data"""
|
||||||
|
from main import SigmaRule, RuleTemplate
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get PoC data
|
||||||
|
poc_data = cve.poc_data or []
|
||||||
|
|
||||||
|
# Find the best quality PoC
|
||||||
|
best_poc = None
|
||||||
|
if poc_data:
|
||||||
|
best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0))
|
||||||
|
|
||||||
|
# Select appropriate template based on PoC analysis
|
||||||
|
template = await self._select_template(cve, best_poc)
|
||||||
|
|
||||||
|
if not template:
|
||||||
|
logger.warning(f"No suitable template found for {cve.cve_id}")
|
||||||
|
return {'success': False, 'error': 'No suitable template'}
|
||||||
|
|
||||||
|
# Generate rule content
|
||||||
|
rule_content = await self._generate_rule_content(cve, template, poc_data)
|
||||||
|
|
||||||
|
# Calculate confidence level
|
||||||
|
confidence_level = self._calculate_confidence_level(cve, poc_data)
|
||||||
|
|
||||||
|
# Store or update SIGMA rule
|
||||||
|
existing_rule = self.db_session.query(SigmaRule).filter(
|
||||||
|
SigmaRule.cve_id == cve.cve_id
|
||||||
|
).first()
|
||||||
|
|
||||||
|
rule_data = {
|
||||||
|
'cve_id': cve.cve_id,
|
||||||
|
'rule_name': f"{cve.cve_id} Enhanced Detection",
|
||||||
|
'rule_content': rule_content,
|
||||||
|
'detection_type': template.template_name,
|
||||||
|
'log_source': self._extract_log_source(template.template_name),
|
||||||
|
'confidence_level': confidence_level,
|
||||||
|
'auto_generated': True,
|
||||||
|
'exploit_based': len(poc_data) > 0,
|
||||||
|
'poc_source': 'nomi_sec',
|
||||||
|
'poc_quality_score': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0,
|
||||||
|
'nomi_sec_data': {
|
||||||
|
'total_pocs': len(poc_data),
|
||||||
|
'best_poc_quality': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0,
|
||||||
|
'total_stars': sum(p.get('stargazers_count', 0) for p in poc_data),
|
||||||
|
'avg_stars': sum(p.get('stargazers_count', 0) for p in poc_data) / len(poc_data) if poc_data else 0
|
||||||
|
},
|
||||||
|
'github_repos': [p.get('html_url', '') for p in poc_data],
|
||||||
|
'exploit_indicators': json.dumps(self._combine_exploit_indicators(poc_data)),
|
||||||
|
'updated_at': datetime.utcnow()
|
||||||
|
}
|
||||||
|
|
||||||
|
if existing_rule:
|
||||||
|
# Update existing rule
|
||||||
|
for key, value in rule_data.items():
|
||||||
|
setattr(existing_rule, key, value)
|
||||||
|
logger.info(f"Updated SIGMA rule for {cve.cve_id}")
|
||||||
|
else:
|
||||||
|
# Create new rule
|
||||||
|
new_rule = SigmaRule(**rule_data)
|
||||||
|
self.db_session.add(new_rule)
|
||||||
|
logger.info(f"Created new SIGMA rule for {cve.cve_id}")
|
||||||
|
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'success': True,
|
||||||
|
'cve_id': cve.cve_id,
|
||||||
|
'template': template.template_name,
|
||||||
|
'confidence_level': confidence_level,
|
||||||
|
'poc_count': len(poc_data),
|
||||||
|
'quality_score': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating enhanced rule for {cve.cve_id}: {e}")
|
||||||
|
return {'success': False, 'error': str(e)}
|
||||||
|
|
||||||
|
async def _select_template(self, cve, best_poc: Optional[dict]) -> Optional[object]:
|
||||||
|
"""Select the most appropriate template based on CVE and PoC analysis"""
|
||||||
|
from main import RuleTemplate
|
||||||
|
|
||||||
|
templates = self.db_session.query(RuleTemplate).all()
|
||||||
|
|
||||||
|
if not templates:
|
||||||
|
logger.warning("No rule templates found in database")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Score templates based on relevance
|
||||||
|
template_scores = {}
|
||||||
|
|
||||||
|
for template in templates:
|
||||||
|
score = 0
|
||||||
|
|
||||||
|
# Score based on PoC indicators (highest priority)
|
||||||
|
if best_poc:
|
||||||
|
indicators = best_poc.get('exploit_indicators', {})
|
||||||
|
score += self._score_template_poc_match(template, indicators)
|
||||||
|
|
||||||
|
# Score based on CVE description
|
||||||
|
score += self._score_template_cve_match(template, cve)
|
||||||
|
|
||||||
|
# Score based on affected products
|
||||||
|
if cve.affected_products:
|
||||||
|
score += self._score_template_product_match(template, cve.affected_products)
|
||||||
|
|
||||||
|
template_scores[template] = score
|
||||||
|
|
||||||
|
# Return template with highest score
|
||||||
|
if template_scores:
|
||||||
|
best_template = max(template_scores, key=template_scores.get)
|
||||||
|
logger.info(f"Selected template {best_template.template_name} with score {template_scores[best_template]}")
|
||||||
|
return best_template
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _score_template_poc_match(self, template: object, indicators: dict) -> int:
|
||||||
|
"""Score template based on PoC indicators"""
|
||||||
|
score = 0
|
||||||
|
template_name = template.template_name.lower()
|
||||||
|
|
||||||
|
# Process-based templates
|
||||||
|
if 'process' in template_name or 'execution' in template_name:
|
||||||
|
if indicators.get('processes') or indicators.get('commands'):
|
||||||
|
score += 30
|
||||||
|
|
||||||
|
# Network-based templates
|
||||||
|
if 'network' in template_name or 'connection' in template_name:
|
||||||
|
if indicators.get('network') or indicators.get('urls'):
|
||||||
|
score += 30
|
||||||
|
|
||||||
|
# File-based templates
|
||||||
|
if 'file' in template_name or 'modification' in template_name:
|
||||||
|
if indicators.get('files'):
|
||||||
|
score += 30
|
||||||
|
|
||||||
|
# PowerShell templates
|
||||||
|
if 'powershell' in template_name:
|
||||||
|
processes = indicators.get('processes', [])
|
||||||
|
if any('powershell' in p.lower() for p in processes):
|
||||||
|
score += 35
|
||||||
|
|
||||||
|
return score
|
||||||
|
|
||||||
|
def _score_template_cve_match(self, template: object, cve) -> int:
|
||||||
|
"""Score template based on CVE description"""
|
||||||
|
score = 0
|
||||||
|
template_name = template.template_name.lower()
|
||||||
|
description = (cve.description or '').lower()
|
||||||
|
|
||||||
|
# Keyword matching
|
||||||
|
if 'remote' in description and 'execution' in description:
|
||||||
|
if 'process' in template_name or 'execution' in template_name:
|
||||||
|
score += 20
|
||||||
|
|
||||||
|
if 'powershell' in description:
|
||||||
|
if 'powershell' in template_name:
|
||||||
|
score += 25
|
||||||
|
|
||||||
|
if 'network' in description or 'http' in description:
|
||||||
|
if 'network' in template_name:
|
||||||
|
score += 20
|
||||||
|
|
||||||
|
if 'file' in description or 'upload' in description:
|
||||||
|
if 'file' in template_name:
|
||||||
|
score += 20
|
||||||
|
|
||||||
|
return score
|
||||||
|
|
||||||
|
def _score_template_product_match(self, template: object, affected_products: list) -> int:
|
||||||
|
"""Score template based on affected products"""
|
||||||
|
score = 0
|
||||||
|
|
||||||
|
if not template.applicable_product_patterns:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
for pattern in template.applicable_product_patterns:
|
||||||
|
pattern_lower = pattern.lower()
|
||||||
|
for product in affected_products:
|
||||||
|
product_lower = product.lower()
|
||||||
|
if pattern_lower in product_lower:
|
||||||
|
score += 10
|
||||||
|
break
|
||||||
|
|
||||||
|
return score
|
||||||
|
|
||||||
|
async def _generate_rule_content(self, cve, template: object, poc_data: list) -> str:
|
||||||
|
"""Generate the actual SIGMA rule content"""
|
||||||
|
# Combine all exploit indicators
|
||||||
|
combined_indicators = self._combine_exploit_indicators(poc_data)
|
||||||
|
|
||||||
|
# Get base template content
|
||||||
|
rule_content = template.template_content
|
||||||
|
|
||||||
|
# Replace template placeholders
|
||||||
|
replacements = {
|
||||||
|
'{{CVE_ID}}': cve.cve_id,
|
||||||
|
'{{TITLE}}': f"{cve.cve_id} Enhanced Detection",
|
||||||
|
'{{DESCRIPTION}}': self._generate_description(cve, poc_data),
|
||||||
|
'{{LEVEL}}': self._calculate_confidence_level(cve, poc_data).lower(),
|
||||||
|
'{{REFERENCES}}': self._generate_references(cve, poc_data),
|
||||||
|
'{{TAGS}}': self._generate_tags(cve, poc_data),
|
||||||
|
'{{PROCESSES}}': self._format_indicators(combined_indicators.get('processes', [])),
|
||||||
|
'{{FILES}}': self._format_indicators(combined_indicators.get('files', [])),
|
||||||
|
'{{COMMANDS}}': self._format_indicators(combined_indicators.get('commands', [])),
|
||||||
|
'{{NETWORK}}': self._format_indicators(combined_indicators.get('network', [])),
|
||||||
|
'{{URLS}}': self._format_indicators(combined_indicators.get('urls', [])),
|
||||||
|
'{{REGISTRY}}': self._format_indicators(combined_indicators.get('registry', []))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Apply replacements
|
||||||
|
for placeholder, value in replacements.items():
|
||||||
|
rule_content = rule_content.replace(placeholder, value)
|
||||||
|
|
||||||
|
# Add enhanced detection based on PoC quality
|
||||||
|
if poc_data:
|
||||||
|
rule_content = self._enhance_detection_logic(rule_content, combined_indicators, poc_data)
|
||||||
|
|
||||||
|
return rule_content
|
||||||
|
|
||||||
|
def _combine_exploit_indicators(self, poc_data: list) -> dict:
|
||||||
|
"""Combine exploit indicators from all PoCs"""
|
||||||
|
combined = {
|
||||||
|
'processes': [],
|
||||||
|
'files': [],
|
||||||
|
'commands': [],
|
||||||
|
'network': [],
|
||||||
|
'urls': [],
|
||||||
|
'registry': []
|
||||||
|
}
|
||||||
|
|
||||||
|
for poc in poc_data:
|
||||||
|
indicators = poc.get('exploit_indicators', {})
|
||||||
|
for key in combined.keys():
|
||||||
|
if key in indicators:
|
||||||
|
combined[key].extend(indicators[key])
|
||||||
|
|
||||||
|
# Deduplicate and filter
|
||||||
|
for key in combined.keys():
|
||||||
|
combined[key] = list(set(combined[key]))
|
||||||
|
# Remove empty and invalid entries
|
||||||
|
combined[key] = [item for item in combined[key] if item and len(item) > 2]
|
||||||
|
|
||||||
|
return combined
|
||||||
|
|
||||||
|
def _generate_description(self, cve, poc_data: list) -> str:
|
||||||
|
"""Generate enhanced rule description"""
|
||||||
|
base_desc = f"Detection for {cve.cve_id}"
|
||||||
|
|
||||||
|
if cve.description:
|
||||||
|
# Extract key terms from CVE description
|
||||||
|
desc_words = cve.description.lower().split()
|
||||||
|
key_terms = [word for word in desc_words if word in [
|
||||||
|
'remote', 'execution', 'injection', 'bypass', 'privilege', 'escalation',
|
||||||
|
'overflow', 'disclosure', 'traversal', 'deserialization'
|
||||||
|
]]
|
||||||
|
|
||||||
|
if key_terms:
|
||||||
|
base_desc += f" involving {', '.join(set(key_terms[:3]))}"
|
||||||
|
|
||||||
|
if poc_data:
|
||||||
|
total_pocs = len(poc_data)
|
||||||
|
total_stars = sum(p.get('stargazers_count', 0) for p in poc_data)
|
||||||
|
base_desc += f" [Enhanced with {total_pocs} PoC(s), {total_stars} stars]"
|
||||||
|
|
||||||
|
return base_desc
|
||||||
|
|
||||||
|
def _generate_references(self, cve, poc_data: list) -> str:
|
||||||
|
"""Generate references section"""
|
||||||
|
refs = []
|
||||||
|
|
||||||
|
# Add CVE reference
|
||||||
|
refs.append(f"https://nvd.nist.gov/vuln/detail/{cve.cve_id}")
|
||||||
|
|
||||||
|
# Add top PoC references (max 3)
|
||||||
|
if poc_data:
|
||||||
|
sorted_pocs = sorted(poc_data, key=lambda x: x.get('stargazers_count', 0), reverse=True)
|
||||||
|
for poc in sorted_pocs[:3]:
|
||||||
|
if poc.get('html_url'):
|
||||||
|
refs.append(poc['html_url'])
|
||||||
|
|
||||||
|
return '\\n'.join(f" - {ref}" for ref in refs)
|
||||||
|
|
||||||
|
def _generate_tags(self, cve, poc_data: list) -> str:
|
||||||
|
"""Generate MITRE ATT&CK tags and other tags"""
|
||||||
|
tags = []
|
||||||
|
|
||||||
|
# CVE tag
|
||||||
|
tags.append(cve.cve_id.lower())
|
||||||
|
|
||||||
|
# Add technique tags based on indicators
|
||||||
|
combined_indicators = self._combine_exploit_indicators(poc_data)
|
||||||
|
|
||||||
|
if combined_indicators.get('processes'):
|
||||||
|
tags.append('attack.t1059') # Command and Scripting Interpreter
|
||||||
|
|
||||||
|
if combined_indicators.get('network'):
|
||||||
|
tags.append('attack.t1071') # Application Layer Protocol
|
||||||
|
|
||||||
|
if combined_indicators.get('files'):
|
||||||
|
tags.append('attack.t1105') # Ingress Tool Transfer
|
||||||
|
|
||||||
|
if any('powershell' in p.lower() for p in combined_indicators.get('processes', [])):
|
||||||
|
tags.append('attack.t1059.001') # PowerShell
|
||||||
|
|
||||||
|
# Add PoC quality tags
|
||||||
|
if poc_data:
|
||||||
|
tags.append('exploit.poc')
|
||||||
|
best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0))
|
||||||
|
quality_tier = best_poc.get('quality_analysis', {}).get('quality_tier', 'poor')
|
||||||
|
tags.append(f'poc.quality.{quality_tier}')
|
||||||
|
|
||||||
|
return '\\n'.join(f" - {tag}" for tag in tags)
|
||||||
|
|
||||||
|
def _format_indicators(self, indicators: list) -> str:
|
||||||
|
"""Format indicators for SIGMA rule"""
|
||||||
|
if not indicators:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
# Limit indicators to avoid overly complex rules
|
||||||
|
limited_indicators = indicators[:10]
|
||||||
|
|
||||||
|
formatted = []
|
||||||
|
for indicator in limited_indicators:
|
||||||
|
# Escape special characters for SIGMA
|
||||||
|
escaped = indicator.replace('\\\\', '\\\\\\\\').replace('*', '\\\\*').replace('?', '\\\\?')
|
||||||
|
formatted.append(f' - "{escaped}"')
|
||||||
|
|
||||||
|
return '\\n'.join(formatted)
|
||||||
|
|
||||||
|
def _enhance_detection_logic(self, rule_content: str, indicators: dict, poc_data: list) -> str:
|
||||||
|
"""Enhance detection logic based on PoC quality and indicators"""
|
||||||
|
|
||||||
|
# If we have high-quality PoCs, add additional detection conditions
|
||||||
|
best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0))
|
||||||
|
quality_score = best_poc.get('quality_analysis', {}).get('quality_score', 0)
|
||||||
|
|
||||||
|
if quality_score > 60: # High quality PoC
|
||||||
|
# Add more specific detection conditions
|
||||||
|
if indicators.get('processes') and indicators.get('commands'):
|
||||||
|
additional_condition = """
|
||||||
|
process_and_command:
|
||||||
|
Image|contains: {{PROCESSES}}
|
||||||
|
CommandLine|contains: {{COMMANDS}}"""
|
||||||
|
|
||||||
|
# Insert before the condition line
|
||||||
|
rule_content = rule_content.replace(
|
||||||
|
'condition: selection',
|
||||||
|
additional_condition + '\\n condition: selection or process_and_command'
|
||||||
|
)
|
||||||
|
|
||||||
|
return rule_content
|
||||||
|
|
||||||
|
def _calculate_confidence_level(self, cve, poc_data: list) -> str:
|
||||||
|
"""Calculate confidence level based on CVE and PoC data"""
|
||||||
|
score = 0
|
||||||
|
|
||||||
|
# CVSS score factor
|
||||||
|
if cve.cvss_score:
|
||||||
|
if cve.cvss_score >= 9.0:
|
||||||
|
score += 40
|
||||||
|
elif cve.cvss_score >= 7.0:
|
||||||
|
score += 30
|
||||||
|
elif cve.cvss_score >= 5.0:
|
||||||
|
score += 20
|
||||||
|
else:
|
||||||
|
score += 10
|
||||||
|
|
||||||
|
# PoC quality factor
|
||||||
|
if poc_data:
|
||||||
|
total_stars = sum(p.get('stargazers_count', 0) for p in poc_data)
|
||||||
|
poc_count = len(poc_data)
|
||||||
|
|
||||||
|
score += min(total_stars, 30) # Max 30 points for stars
|
||||||
|
score += min(poc_count * 5, 20) # Max 20 points for PoC count
|
||||||
|
|
||||||
|
# Quality tier bonus
|
||||||
|
best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0))
|
||||||
|
quality_tier = best_poc.get('quality_analysis', {}).get('quality_tier', 'poor')
|
||||||
|
|
||||||
|
tier_bonus = {
|
||||||
|
'excellent': 20,
|
||||||
|
'good': 15,
|
||||||
|
'fair': 10,
|
||||||
|
'poor': 5,
|
||||||
|
'very_poor': 0
|
||||||
|
}
|
||||||
|
score += tier_bonus.get(quality_tier, 0)
|
||||||
|
|
||||||
|
# Determine confidence level
|
||||||
|
if score >= 80:
|
||||||
|
return 'HIGH'
|
||||||
|
elif score >= 60:
|
||||||
|
return 'MEDIUM'
|
||||||
|
elif score >= 40:
|
||||||
|
return 'LOW'
|
||||||
|
else:
|
||||||
|
return 'INFORMATIONAL'
|
||||||
|
|
||||||
|
def _extract_log_source(self, template_name: str) -> str:
|
||||||
|
"""Extract log source from template name"""
|
||||||
|
template_lower = template_name.lower()
|
||||||
|
|
||||||
|
if 'process' in template_lower or 'execution' in template_lower:
|
||||||
|
return 'process_creation'
|
||||||
|
elif 'network' in template_lower:
|
||||||
|
return 'network_connection'
|
||||||
|
elif 'file' in template_lower:
|
||||||
|
return 'file_event'
|
||||||
|
elif 'powershell' in template_lower:
|
||||||
|
return 'powershell'
|
||||||
|
elif 'registry' in template_lower:
|
||||||
|
return 'registry_event'
|
||||||
|
else:
|
||||||
|
return 'generic'
|
371
backend/main.py
371
backend/main.py
|
@ -1,7 +1,7 @@
|
||||||
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends
|
from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from sqlalchemy import create_engine, Column, String, Text, DECIMAL, TIMESTAMP, Boolean, ARRAY
|
from sqlalchemy import create_engine, Column, String, Text, DECIMAL, TIMESTAMP, Boolean, ARRAY, Integer, JSON, func
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
from sqlalchemy.orm import sessionmaker, Session
|
from sqlalchemy.orm import sessionmaker, Session
|
||||||
from sqlalchemy.dialects.postgresql import UUID
|
from sqlalchemy.dialects.postgresql import UUID
|
||||||
|
@ -19,6 +19,16 @@ import base64
|
||||||
from github import Github
|
from github import Github
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
|
||||||
|
# Setup logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Global job tracking
|
||||||
|
running_jobs = {}
|
||||||
|
job_cancellation_flags = {}
|
||||||
|
|
||||||
# Database setup
|
# Database setup
|
||||||
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://cve_user:cve_password@localhost:5432/cve_sigma_db")
|
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://cve_user:cve_password@localhost:5432/cve_sigma_db")
|
||||||
|
@ -39,6 +49,13 @@ class CVE(Base):
|
||||||
modified_date = Column(TIMESTAMP)
|
modified_date = Column(TIMESTAMP)
|
||||||
affected_products = Column(ARRAY(String))
|
affected_products = Column(ARRAY(String))
|
||||||
reference_urls = Column(ARRAY(String))
|
reference_urls = Column(ARRAY(String))
|
||||||
|
# Bulk processing fields
|
||||||
|
data_source = Column(String(20), default='nvd_api') # 'nvd_api', 'nvd_bulk', 'manual'
|
||||||
|
nvd_json_version = Column(String(10), default='2.0')
|
||||||
|
bulk_processed = Column(Boolean, default=False)
|
||||||
|
# nomi-sec PoC fields
|
||||||
|
poc_count = Column(Integer, default=0)
|
||||||
|
poc_data = Column(JSON) # Store nomi-sec PoC metadata
|
||||||
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||||
updated_at = Column(TIMESTAMP, default=datetime.utcnow)
|
updated_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||||
|
|
||||||
|
@ -56,6 +73,10 @@ class SigmaRule(Base):
|
||||||
exploit_based = Column(Boolean, default=False)
|
exploit_based = Column(Boolean, default=False)
|
||||||
github_repos = Column(ARRAY(String))
|
github_repos = Column(ARRAY(String))
|
||||||
exploit_indicators = Column(Text) # JSON string of extracted indicators
|
exploit_indicators = Column(Text) # JSON string of extracted indicators
|
||||||
|
# Enhanced fields for new data sources
|
||||||
|
poc_source = Column(String(20), default='github_search') # 'github_search', 'nomi_sec', 'manual'
|
||||||
|
poc_quality_score = Column(Integer, default=0) # Based on star count, activity, etc.
|
||||||
|
nomi_sec_data = Column(JSON) # Store nomi-sec PoC metadata
|
||||||
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||||
updated_at = Column(TIMESTAMP, default=datetime.utcnow)
|
updated_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||||
|
|
||||||
|
@ -69,6 +90,23 @@ class RuleTemplate(Base):
|
||||||
description = Column(Text)
|
description = Column(Text)
|
||||||
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||||
|
|
||||||
|
class BulkProcessingJob(Base):
|
||||||
|
__tablename__ = "bulk_processing_jobs"
|
||||||
|
|
||||||
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||||
|
job_type = Column(String(50), nullable=False) # 'nvd_bulk_seed', 'nomi_sec_sync', 'incremental_update'
|
||||||
|
status = Column(String(20), default='pending') # 'pending', 'running', 'completed', 'failed', 'cancelled'
|
||||||
|
year = Column(Integer) # For year-based processing
|
||||||
|
total_items = Column(Integer, default=0)
|
||||||
|
processed_items = Column(Integer, default=0)
|
||||||
|
failed_items = Column(Integer, default=0)
|
||||||
|
error_message = Column(Text)
|
||||||
|
job_metadata = Column(JSON) # Additional job-specific data
|
||||||
|
started_at = Column(TIMESTAMP)
|
||||||
|
completed_at = Column(TIMESTAMP)
|
||||||
|
cancelled_at = Column(TIMESTAMP)
|
||||||
|
created_at = Column(TIMESTAMP, default=datetime.utcnow)
|
||||||
|
|
||||||
# Pydantic models
|
# Pydantic models
|
||||||
class CVEResponse(BaseModel):
|
class CVEResponse(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
|
@ -941,12 +979,341 @@ async def get_stats(db: Session = Depends(get_db)):
|
||||||
total_rules = db.query(SigmaRule).count()
|
total_rules = db.query(SigmaRule).count()
|
||||||
recent_cves = db.query(CVE).filter(CVE.published_date >= datetime.utcnow() - timedelta(days=7)).count()
|
recent_cves = db.query(CVE).filter(CVE.published_date >= datetime.utcnow() - timedelta(days=7)).count()
|
||||||
|
|
||||||
|
# Enhanced stats with bulk processing info
|
||||||
|
bulk_processed_cves = db.query(CVE).filter(CVE.bulk_processed == True).count()
|
||||||
|
cves_with_pocs = db.query(CVE).filter(CVE.poc_count > 0).count()
|
||||||
|
nomi_sec_rules = db.query(SigmaRule).filter(SigmaRule.poc_source == 'nomi_sec').count()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"total_cves": total_cves,
|
"total_cves": total_cves,
|
||||||
"total_sigma_rules": total_rules,
|
"total_sigma_rules": total_rules,
|
||||||
"recent_cves_7_days": recent_cves
|
"recent_cves_7_days": recent_cves,
|
||||||
|
"bulk_processed_cves": bulk_processed_cves,
|
||||||
|
"cves_with_pocs": cves_with_pocs,
|
||||||
|
"nomi_sec_rules": nomi_sec_rules,
|
||||||
|
"poc_coverage": (cves_with_pocs / total_cves * 100) if total_cves > 0 else 0,
|
||||||
|
"nomi_sec_coverage": (nomi_sec_rules / total_rules * 100) if total_rules > 0 else 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# New bulk processing endpoints
|
||||||
|
@app.post("/api/bulk-seed")
|
||||||
|
async def start_bulk_seed(background_tasks: BackgroundTasks,
|
||||||
|
start_year: int = 2002,
|
||||||
|
end_year: Optional[int] = None,
|
||||||
|
skip_nvd: bool = False,
|
||||||
|
skip_nomi_sec: bool = False,
|
||||||
|
db: Session = Depends(get_db)):
|
||||||
|
"""Start bulk seeding process"""
|
||||||
|
|
||||||
|
async def bulk_seed_task():
|
||||||
|
try:
|
||||||
|
from bulk_seeder import BulkSeeder
|
||||||
|
seeder = BulkSeeder(db)
|
||||||
|
result = await seeder.full_bulk_seed(
|
||||||
|
start_year=start_year,
|
||||||
|
end_year=end_year,
|
||||||
|
skip_nvd=skip_nvd,
|
||||||
|
skip_nomi_sec=skip_nomi_sec
|
||||||
|
)
|
||||||
|
logger.info(f"Bulk seed completed: {result}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Bulk seed failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
background_tasks.add_task(bulk_seed_task)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"message": "Bulk seeding process started",
|
||||||
|
"status": "started",
|
||||||
|
"start_year": start_year,
|
||||||
|
"end_year": end_year or datetime.now().year,
|
||||||
|
"skip_nvd": skip_nvd,
|
||||||
|
"skip_nomi_sec": skip_nomi_sec
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.post("/api/incremental-update")
|
||||||
|
async def start_incremental_update(background_tasks: BackgroundTasks, db: Session = Depends(get_db)):
|
||||||
|
"""Start incremental update process"""
|
||||||
|
|
||||||
|
async def incremental_update_task():
|
||||||
|
try:
|
||||||
|
from bulk_seeder import BulkSeeder
|
||||||
|
seeder = BulkSeeder(db)
|
||||||
|
result = await seeder.incremental_update()
|
||||||
|
logger.info(f"Incremental update completed: {result}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Incremental update failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
background_tasks.add_task(incremental_update_task)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"message": "Incremental update process started",
|
||||||
|
"status": "started"
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.post("/api/sync-nomi-sec")
|
||||||
|
async def sync_nomi_sec(background_tasks: BackgroundTasks,
|
||||||
|
cve_id: Optional[str] = None,
|
||||||
|
batch_size: int = 50,
|
||||||
|
db: Session = Depends(get_db)):
|
||||||
|
"""Synchronize nomi-sec PoC data"""
|
||||||
|
|
||||||
|
# Create job record
|
||||||
|
job = BulkProcessingJob(
|
||||||
|
job_type='nomi_sec_sync',
|
||||||
|
status='pending',
|
||||||
|
job_metadata={
|
||||||
|
'cve_id': cve_id,
|
||||||
|
'batch_size': batch_size
|
||||||
|
}
|
||||||
|
)
|
||||||
|
db.add(job)
|
||||||
|
db.commit()
|
||||||
|
db.refresh(job)
|
||||||
|
|
||||||
|
job_id = str(job.id)
|
||||||
|
running_jobs[job_id] = job
|
||||||
|
job_cancellation_flags[job_id] = False
|
||||||
|
|
||||||
|
async def sync_task():
|
||||||
|
try:
|
||||||
|
job.status = 'running'
|
||||||
|
job.started_at = datetime.utcnow()
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
from nomi_sec_client import NomiSecClient
|
||||||
|
client = NomiSecClient(db)
|
||||||
|
|
||||||
|
if cve_id:
|
||||||
|
# Sync specific CVE
|
||||||
|
if job_cancellation_flags.get(job_id, False):
|
||||||
|
logger.info(f"Job {job_id} cancelled before starting")
|
||||||
|
return
|
||||||
|
|
||||||
|
result = await client.sync_cve_pocs(cve_id)
|
||||||
|
logger.info(f"Nomi-sec sync for {cve_id}: {result}")
|
||||||
|
else:
|
||||||
|
# Sync all CVEs with cancellation support
|
||||||
|
result = await client.bulk_sync_all_cves(
|
||||||
|
batch_size=batch_size,
|
||||||
|
cancellation_flag=lambda: job_cancellation_flags.get(job_id, False)
|
||||||
|
)
|
||||||
|
logger.info(f"Nomi-sec bulk sync completed: {result}")
|
||||||
|
|
||||||
|
# Update job status if not cancelled
|
||||||
|
if not job_cancellation_flags.get(job_id, False):
|
||||||
|
job.status = 'completed'
|
||||||
|
job.completed_at = datetime.utcnow()
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
if not job_cancellation_flags.get(job_id, False):
|
||||||
|
job.status = 'failed'
|
||||||
|
job.error_message = str(e)
|
||||||
|
job.completed_at = datetime.utcnow()
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
logger.error(f"Nomi-sec sync failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
finally:
|
||||||
|
# Clean up tracking
|
||||||
|
running_jobs.pop(job_id, None)
|
||||||
|
job_cancellation_flags.pop(job_id, None)
|
||||||
|
|
||||||
|
background_tasks.add_task(sync_task)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"message": f"Nomi-sec sync started" + (f" for {cve_id}" if cve_id else " for all CVEs"),
|
||||||
|
"status": "started",
|
||||||
|
"job_id": job_id,
|
||||||
|
"cve_id": cve_id,
|
||||||
|
"batch_size": batch_size
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.get("/api/bulk-jobs")
|
||||||
|
async def get_bulk_jobs(limit: int = 10, db: Session = Depends(get_db)):
|
||||||
|
"""Get bulk processing job status"""
|
||||||
|
|
||||||
|
jobs = db.query(BulkProcessingJob).order_by(
|
||||||
|
BulkProcessingJob.created_at.desc()
|
||||||
|
).limit(limit).all()
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for job in jobs:
|
||||||
|
job_dict = {
|
||||||
|
'id': str(job.id),
|
||||||
|
'job_type': job.job_type,
|
||||||
|
'status': job.status,
|
||||||
|
'year': job.year,
|
||||||
|
'total_items': job.total_items,
|
||||||
|
'processed_items': job.processed_items,
|
||||||
|
'failed_items': job.failed_items,
|
||||||
|
'error_message': job.error_message,
|
||||||
|
'metadata': job.job_metadata,
|
||||||
|
'started_at': job.started_at,
|
||||||
|
'completed_at': job.completed_at,
|
||||||
|
'created_at': job.created_at
|
||||||
|
}
|
||||||
|
result.append(job_dict)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
@app.get("/api/bulk-status")
|
||||||
|
async def get_bulk_status(db: Session = Depends(get_db)):
|
||||||
|
"""Get comprehensive bulk processing status"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
from bulk_seeder import BulkSeeder
|
||||||
|
seeder = BulkSeeder(db)
|
||||||
|
status = await seeder.get_seeding_status()
|
||||||
|
return status
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting bulk status: {e}")
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
@app.get("/api/poc-stats")
|
||||||
|
async def get_poc_stats(db: Session = Depends(get_db)):
|
||||||
|
"""Get PoC-related statistics"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
from nomi_sec_client import NomiSecClient
|
||||||
|
client = NomiSecClient(db)
|
||||||
|
stats = await client.get_sync_status()
|
||||||
|
|
||||||
|
# Additional PoC statistics
|
||||||
|
high_quality_cves = db.query(CVE).filter(
|
||||||
|
CVE.poc_count > 0,
|
||||||
|
func.json_extract_path_text(CVE.poc_data, '0', 'quality_analysis', 'quality_score').cast(Integer) > 60
|
||||||
|
).count()
|
||||||
|
|
||||||
|
stats.update({
|
||||||
|
'high_quality_cves': high_quality_cves,
|
||||||
|
'avg_poc_count': db.query(func.avg(CVE.poc_count)).filter(CVE.poc_count > 0).scalar() or 0
|
||||||
|
})
|
||||||
|
|
||||||
|
return stats
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting PoC stats: {e}")
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
@app.post("/api/regenerate-rules")
|
||||||
|
async def regenerate_sigma_rules(background_tasks: BackgroundTasks,
|
||||||
|
force: bool = False,
|
||||||
|
db: Session = Depends(get_db)):
|
||||||
|
"""Regenerate SIGMA rules using enhanced nomi-sec data"""
|
||||||
|
|
||||||
|
async def regenerate_task():
|
||||||
|
try:
|
||||||
|
from enhanced_sigma_generator import EnhancedSigmaGenerator
|
||||||
|
generator = EnhancedSigmaGenerator(db)
|
||||||
|
|
||||||
|
# Get CVEs with PoC data
|
||||||
|
cves_with_pocs = db.query(CVE).filter(CVE.poc_count > 0).all()
|
||||||
|
|
||||||
|
rules_generated = 0
|
||||||
|
rules_updated = 0
|
||||||
|
|
||||||
|
for cve in cves_with_pocs:
|
||||||
|
# Check if we should regenerate
|
||||||
|
existing_rule = db.query(SigmaRule).filter(
|
||||||
|
SigmaRule.cve_id == cve.cve_id
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if existing_rule and existing_rule.poc_source == 'nomi_sec' and not force:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Generate enhanced rule
|
||||||
|
result = await generator.generate_enhanced_rule(cve)
|
||||||
|
|
||||||
|
if result['success']:
|
||||||
|
if existing_rule:
|
||||||
|
rules_updated += 1
|
||||||
|
else:
|
||||||
|
rules_generated += 1
|
||||||
|
|
||||||
|
logger.info(f"Rule regeneration completed: {rules_generated} new, {rules_updated} updated")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Rule regeneration failed: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
background_tasks.add_task(regenerate_task)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"message": "SIGMA rule regeneration started",
|
||||||
|
"status": "started",
|
||||||
|
"force": force
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.post("/api/cancel-job/{job_id}")
|
||||||
|
async def cancel_job(job_id: str, db: Session = Depends(get_db)):
|
||||||
|
"""Cancel a running job"""
|
||||||
|
try:
|
||||||
|
# Find the job in the database
|
||||||
|
job = db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job_id).first()
|
||||||
|
if not job:
|
||||||
|
raise HTTPException(status_code=404, detail="Job not found")
|
||||||
|
|
||||||
|
if job.status not in ['pending', 'running']:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Cannot cancel job with status: {job.status}")
|
||||||
|
|
||||||
|
# Set cancellation flag
|
||||||
|
job_cancellation_flags[job_id] = True
|
||||||
|
|
||||||
|
# Update job status
|
||||||
|
job.status = 'cancelled'
|
||||||
|
job.cancelled_at = datetime.utcnow()
|
||||||
|
job.error_message = "Job cancelled by user"
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
logger.info(f"Job {job_id} cancellation requested")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"message": f"Job {job_id} cancellation requested",
|
||||||
|
"status": "cancelled",
|
||||||
|
"job_id": job_id
|
||||||
|
}
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error cancelling job {job_id}: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.get("/api/running-jobs")
|
||||||
|
async def get_running_jobs(db: Session = Depends(get_db)):
|
||||||
|
"""Get all currently running jobs"""
|
||||||
|
try:
|
||||||
|
jobs = db.query(BulkProcessingJob).filter(
|
||||||
|
BulkProcessingJob.status.in_(['pending', 'running'])
|
||||||
|
).order_by(BulkProcessingJob.created_at.desc()).all()
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for job in jobs:
|
||||||
|
result.append({
|
||||||
|
'id': str(job.id),
|
||||||
|
'job_type': job.job_type,
|
||||||
|
'status': job.status,
|
||||||
|
'year': job.year,
|
||||||
|
'total_items': job.total_items,
|
||||||
|
'processed_items': job.processed_items,
|
||||||
|
'failed_items': job.failed_items,
|
||||||
|
'error_message': job.error_message,
|
||||||
|
'started_at': job.started_at,
|
||||||
|
'created_at': job.created_at,
|
||||||
|
'can_cancel': job.status in ['pending', 'running']
|
||||||
|
})
|
||||||
|
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting running jobs: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||||
|
|
477
backend/nomi_sec_client.py
Normal file
477
backend/nomi_sec_client.py
Normal file
|
@ -0,0 +1,477 @@
|
||||||
|
"""
|
||||||
|
Nomi-sec PoC-in-GitHub Integration Client
|
||||||
|
Interfaces with the nomi-sec PoC-in-GitHub API for curated exploit data
|
||||||
|
"""
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from sqlalchemy import and_, or_
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class NomiSecClient:
|
||||||
|
"""Client for interacting with nomi-sec PoC-in-GitHub API"""
|
||||||
|
|
||||||
|
def __init__(self, db_session: Session):
|
||||||
|
self.db_session = db_session
|
||||||
|
self.base_url = "https://poc-in-github.motikan2010.net/api/v1"
|
||||||
|
self.rss_url = "https://poc-in-github.motikan2010.net/rss"
|
||||||
|
|
||||||
|
# Rate limiting
|
||||||
|
self.rate_limit_delay = 1.0 # 1 second between requests
|
||||||
|
self.last_request_time = 0
|
||||||
|
|
||||||
|
# Cache for recently fetched data
|
||||||
|
self.cache = {}
|
||||||
|
self.cache_ttl = 300 # 5 minutes
|
||||||
|
|
||||||
|
async def _make_request(self, session: aiohttp.ClientSession,
|
||||||
|
url: str, params: dict = None) -> Optional[dict]:
|
||||||
|
"""Make a rate-limited request to the API"""
|
||||||
|
try:
|
||||||
|
# Rate limiting
|
||||||
|
current_time = time.time()
|
||||||
|
time_since_last = current_time - self.last_request_time
|
||||||
|
if time_since_last < self.rate_limit_delay:
|
||||||
|
await asyncio.sleep(self.rate_limit_delay - time_since_last)
|
||||||
|
|
||||||
|
async with session.get(url, params=params, timeout=30) as response:
|
||||||
|
self.last_request_time = time.time()
|
||||||
|
|
||||||
|
if response.status == 200:
|
||||||
|
return await response.json()
|
||||||
|
else:
|
||||||
|
logger.warning(f"API request failed: {response.status} for {url}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error making request to {url}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def get_pocs_for_cve(self, cve_id: str) -> List[dict]:
|
||||||
|
"""Get all PoC repositories for a specific CVE"""
|
||||||
|
cache_key = f"cve_{cve_id}"
|
||||||
|
|
||||||
|
# Check cache
|
||||||
|
if cache_key in self.cache:
|
||||||
|
cached_data, timestamp = self.cache[cache_key]
|
||||||
|
if time.time() - timestamp < self.cache_ttl:
|
||||||
|
return cached_data
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
params = {"cve_id": cve_id}
|
||||||
|
data = await self._make_request(session, self.base_url, params)
|
||||||
|
|
||||||
|
if data and "pocs" in data:
|
||||||
|
pocs = data["pocs"]
|
||||||
|
# Cache the result
|
||||||
|
self.cache[cache_key] = (pocs, time.time())
|
||||||
|
logger.info(f"Found {len(pocs)} PoCs for {cve_id}")
|
||||||
|
return pocs
|
||||||
|
else:
|
||||||
|
logger.info(f"No PoCs found for {cve_id}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def get_recent_pocs(self, limit: int = 100) -> List[dict]:
|
||||||
|
"""Get recent PoCs from the API"""
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
params = {"limit": limit, "sort": "created_at"}
|
||||||
|
data = await self._make_request(session, self.base_url, params)
|
||||||
|
|
||||||
|
if data and "pocs" in data:
|
||||||
|
return data["pocs"]
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def get_high_quality_pocs(self, min_stars: int = 5, limit: int = 100) -> List[dict]:
|
||||||
|
"""Get high-quality PoCs sorted by star count"""
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
params = {"limit": limit, "sort": "stargazers_count"}
|
||||||
|
data = await self._make_request(session, self.base_url, params)
|
||||||
|
|
||||||
|
if data and "pocs" in data:
|
||||||
|
# Filter by star count
|
||||||
|
filtered_pocs = [
|
||||||
|
poc for poc in data["pocs"]
|
||||||
|
if int(poc.get("stargazers_count", "0")) >= min_stars
|
||||||
|
]
|
||||||
|
return filtered_pocs
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def search_pocs(self, query: str, limit: int = 50) -> List[dict]:
|
||||||
|
"""Search for PoCs using a query string"""
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
params = {"limit": limit, "q": query}
|
||||||
|
data = await self._make_request(session, self.base_url, params)
|
||||||
|
|
||||||
|
if data and "pocs" in data:
|
||||||
|
return data["pocs"]
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
def analyze_poc_quality(self, poc: dict) -> dict:
|
||||||
|
"""Analyze the quality of a PoC repository"""
|
||||||
|
quality_score = 0
|
||||||
|
factors = {}
|
||||||
|
|
||||||
|
# Star count factor (0-40 points)
|
||||||
|
stars = int(poc.get("stargazers_count", "0"))
|
||||||
|
star_score = min(stars * 2, 40) # 2 points per star, max 40
|
||||||
|
quality_score += star_score
|
||||||
|
factors["star_score"] = star_score
|
||||||
|
|
||||||
|
# Recency factor (0-20 points)
|
||||||
|
try:
|
||||||
|
updated_at = datetime.fromisoformat(poc.get("updated_at", "").replace('Z', '+00:00'))
|
||||||
|
days_old = (datetime.now(updated_at.tzinfo) - updated_at).days
|
||||||
|
recency_score = max(20 - (days_old // 30), 0) # Lose 1 point per month
|
||||||
|
quality_score += recency_score
|
||||||
|
factors["recency_score"] = recency_score
|
||||||
|
except:
|
||||||
|
factors["recency_score"] = 0
|
||||||
|
|
||||||
|
# Description quality factor (0-15 points)
|
||||||
|
description = poc.get("description", "")
|
||||||
|
desc_score = 0
|
||||||
|
if description:
|
||||||
|
desc_score = min(len(description) // 10, 15) # 1 point per 10 chars, max 15
|
||||||
|
quality_score += desc_score
|
||||||
|
factors["description_score"] = desc_score
|
||||||
|
|
||||||
|
# Vulnerability description factor (0-15 points)
|
||||||
|
vuln_desc = poc.get("vuln_description", "")
|
||||||
|
vuln_score = 0
|
||||||
|
if vuln_desc:
|
||||||
|
vuln_score = min(len(vuln_desc) // 20, 15) # 1 point per 20 chars, max 15
|
||||||
|
quality_score += vuln_score
|
||||||
|
factors["vuln_description_score"] = vuln_score
|
||||||
|
|
||||||
|
# Repository name relevance factor (0-10 points)
|
||||||
|
repo_name = poc.get("name", "").lower()
|
||||||
|
cve_id = poc.get("cve_id", "").lower()
|
||||||
|
name_score = 0
|
||||||
|
if cve_id and cve_id.replace("-", "") in repo_name.replace("-", ""):
|
||||||
|
name_score = 10
|
||||||
|
elif any(keyword in repo_name for keyword in ["exploit", "poc", "cve", "vuln"]):
|
||||||
|
name_score = 5
|
||||||
|
quality_score += name_score
|
||||||
|
factors["name_relevance_score"] = name_score
|
||||||
|
|
||||||
|
return {
|
||||||
|
"quality_score": quality_score,
|
||||||
|
"factors": factors,
|
||||||
|
"quality_tier": self._get_quality_tier(quality_score)
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_quality_tier(self, score: int) -> str:
|
||||||
|
"""Get quality tier based on score"""
|
||||||
|
if score >= 80:
|
||||||
|
return "excellent"
|
||||||
|
elif score >= 60:
|
||||||
|
return "good"
|
||||||
|
elif score >= 40:
|
||||||
|
return "fair"
|
||||||
|
elif score >= 20:
|
||||||
|
return "poor"
|
||||||
|
else:
|
||||||
|
return "very_poor"
|
||||||
|
|
||||||
|
def extract_exploit_indicators(self, poc: dict) -> dict:
|
||||||
|
"""Extract exploit indicators from PoC metadata"""
|
||||||
|
indicators = {
|
||||||
|
"processes": [],
|
||||||
|
"files": [],
|
||||||
|
"network": [],
|
||||||
|
"registry": [],
|
||||||
|
"commands": [],
|
||||||
|
"urls": [],
|
||||||
|
"techniques": []
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract from description and vulnerability description
|
||||||
|
text_sources = [
|
||||||
|
poc.get("description", ""),
|
||||||
|
poc.get("vuln_description", ""),
|
||||||
|
poc.get("name", "")
|
||||||
|
]
|
||||||
|
|
||||||
|
full_text = " ".join(text_sources).lower()
|
||||||
|
|
||||||
|
# Process patterns
|
||||||
|
process_patterns = [
|
||||||
|
r'\b(cmd\.exe|powershell\.exe|bash|sh|python\.exe|java\.exe)\b',
|
||||||
|
r'\b(createprocess|shellexecute|system)\b',
|
||||||
|
r'\b(reverse.?shell|bind.?shell)\b'
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in process_patterns:
|
||||||
|
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||||
|
indicators["processes"].extend(matches)
|
||||||
|
|
||||||
|
# File patterns
|
||||||
|
file_patterns = [
|
||||||
|
r'\b([a-zA-Z]:\\[^\\]+\\[^\\]+\.[a-zA-Z0-9]+)\b', # Windows paths
|
||||||
|
r'\b(/[^/\s]+/[^/\s]+\.[a-zA-Z0-9]+)\b', # Unix paths
|
||||||
|
r'\b(\w+\.(exe|dll|bat|ps1|py|sh|jar))\b' # Common executable files
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in file_patterns:
|
||||||
|
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||||
|
if isinstance(matches[0], tuple) if matches else False:
|
||||||
|
indicators["files"].extend([m[0] for m in matches])
|
||||||
|
else:
|
||||||
|
indicators["files"].extend(matches)
|
||||||
|
|
||||||
|
# Network patterns
|
||||||
|
network_patterns = [
|
||||||
|
r'\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b', # IP addresses
|
||||||
|
r'\b((?:\d{1,5})|(?:0x[a-fA-F0-9]{1,4}))\b', # Ports
|
||||||
|
r'\b(http[s]?://[^\s]+)\b' # URLs
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in network_patterns:
|
||||||
|
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||||
|
if pattern.startswith(r'\b(http'):
|
||||||
|
indicators["urls"].extend(matches)
|
||||||
|
else:
|
||||||
|
indicators["network"].extend(matches)
|
||||||
|
|
||||||
|
# Command patterns
|
||||||
|
command_patterns = [
|
||||||
|
r'\b(curl|wget|nc|netcat|ncat)\b',
|
||||||
|
r'\b(whoami|id|uname|systeminfo)\b',
|
||||||
|
r'\b(cat|type|more|less)\b'
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in command_patterns:
|
||||||
|
matches = re.findall(pattern, full_text, re.IGNORECASE)
|
||||||
|
indicators["commands"].extend(matches)
|
||||||
|
|
||||||
|
# Clean up and deduplicate
|
||||||
|
for key in indicators:
|
||||||
|
indicators[key] = list(set(indicators[key]))
|
||||||
|
|
||||||
|
return indicators
|
||||||
|
|
||||||
|
async def sync_cve_pocs(self, cve_id: str) -> dict:
|
||||||
|
"""Synchronize PoC data for a specific CVE"""
|
||||||
|
from main import CVE, SigmaRule
|
||||||
|
|
||||||
|
# Get existing CVE
|
||||||
|
cve = self.db_session.query(CVE).filter(CVE.cve_id == cve_id).first()
|
||||||
|
if not cve:
|
||||||
|
logger.warning(f"CVE {cve_id} not found in database")
|
||||||
|
return {"error": "CVE not found"}
|
||||||
|
|
||||||
|
# Fetch PoCs from nomi-sec API
|
||||||
|
pocs = await self.get_pocs_for_cve(cve_id)
|
||||||
|
|
||||||
|
if not pocs:
|
||||||
|
logger.info(f"No PoCs found for {cve_id}")
|
||||||
|
return {"cve_id": cve_id, "pocs_found": 0}
|
||||||
|
|
||||||
|
# Analyze and store PoC data
|
||||||
|
poc_data = []
|
||||||
|
github_repos = []
|
||||||
|
total_quality_score = 0
|
||||||
|
|
||||||
|
for poc in pocs:
|
||||||
|
quality_analysis = self.analyze_poc_quality(poc)
|
||||||
|
exploit_indicators = self.extract_exploit_indicators(poc)
|
||||||
|
|
||||||
|
poc_entry = {
|
||||||
|
"id": poc.get("id"),
|
||||||
|
"name": poc.get("name"),
|
||||||
|
"owner": poc.get("owner"),
|
||||||
|
"full_name": poc.get("full_name"),
|
||||||
|
"html_url": poc.get("html_url"),
|
||||||
|
"description": poc.get("description"),
|
||||||
|
"stargazers_count": int(poc.get("stargazers_count", "0")),
|
||||||
|
"created_at": poc.get("created_at"),
|
||||||
|
"updated_at": poc.get("updated_at"),
|
||||||
|
"quality_analysis": quality_analysis,
|
||||||
|
"exploit_indicators": exploit_indicators
|
||||||
|
}
|
||||||
|
|
||||||
|
poc_data.append(poc_entry)
|
||||||
|
github_repos.append(poc.get("html_url", ""))
|
||||||
|
total_quality_score += quality_analysis["quality_score"]
|
||||||
|
|
||||||
|
# Update CVE with PoC data
|
||||||
|
cve.poc_count = len(pocs)
|
||||||
|
cve.poc_data = poc_data
|
||||||
|
cve.updated_at = datetime.utcnow()
|
||||||
|
|
||||||
|
# Update or create SIGMA rule with enhanced PoC data
|
||||||
|
sigma_rule = self.db_session.query(SigmaRule).filter(
|
||||||
|
SigmaRule.cve_id == cve_id
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if sigma_rule:
|
||||||
|
sigma_rule.poc_source = 'nomi_sec'
|
||||||
|
sigma_rule.poc_quality_score = total_quality_score // len(pocs) if pocs else 0
|
||||||
|
sigma_rule.nomi_sec_data = {
|
||||||
|
"total_pocs": len(pocs),
|
||||||
|
"average_quality": total_quality_score // len(pocs) if pocs else 0,
|
||||||
|
"best_poc": max(poc_data, key=lambda x: x["quality_analysis"]["quality_score"]) if poc_data else None,
|
||||||
|
"total_stars": sum(p["stargazers_count"] for p in poc_data)
|
||||||
|
}
|
||||||
|
sigma_rule.github_repos = github_repos
|
||||||
|
sigma_rule.updated_at = datetime.utcnow()
|
||||||
|
|
||||||
|
# Extract best exploit indicators
|
||||||
|
best_indicators = {}
|
||||||
|
for poc in poc_data:
|
||||||
|
for key, values in poc["exploit_indicators"].items():
|
||||||
|
if key not in best_indicators:
|
||||||
|
best_indicators[key] = []
|
||||||
|
best_indicators[key].extend(values)
|
||||||
|
|
||||||
|
# Deduplicate and store
|
||||||
|
for key in best_indicators:
|
||||||
|
best_indicators[key] = list(set(best_indicators[key]))
|
||||||
|
|
||||||
|
sigma_rule.exploit_indicators = json.dumps(best_indicators)
|
||||||
|
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
logger.info(f"Synchronized {len(pocs)} PoCs for {cve_id}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"cve_id": cve_id,
|
||||||
|
"pocs_found": len(pocs),
|
||||||
|
"total_quality_score": total_quality_score,
|
||||||
|
"average_quality": total_quality_score // len(pocs) if pocs else 0,
|
||||||
|
"github_repos": github_repos
|
||||||
|
}
|
||||||
|
|
||||||
|
async def bulk_sync_all_cves(self, batch_size: int = 100, cancellation_flag: Optional[callable] = None) -> dict:
|
||||||
|
"""Synchronize PoC data for all CVEs in database"""
|
||||||
|
from main import CVE, BulkProcessingJob
|
||||||
|
|
||||||
|
# Create bulk processing job
|
||||||
|
job = BulkProcessingJob(
|
||||||
|
job_type='nomi_sec_sync',
|
||||||
|
status='running',
|
||||||
|
started_at=datetime.utcnow(),
|
||||||
|
job_metadata={'batch_size': batch_size}
|
||||||
|
)
|
||||||
|
self.db_session.add(job)
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
total_processed = 0
|
||||||
|
total_found = 0
|
||||||
|
results = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get all CVEs from database
|
||||||
|
cves = self.db_session.query(CVE).all()
|
||||||
|
job.total_items = len(cves)
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
# Process in batches
|
||||||
|
for i in range(0, len(cves), batch_size):
|
||||||
|
# Check for cancellation before each batch
|
||||||
|
if cancellation_flag and cancellation_flag():
|
||||||
|
logger.info("Bulk sync cancelled by user")
|
||||||
|
job.status = 'cancelled'
|
||||||
|
job.cancelled_at = datetime.utcnow()
|
||||||
|
job.error_message = "Job cancelled by user"
|
||||||
|
break
|
||||||
|
|
||||||
|
batch = cves[i:i + batch_size]
|
||||||
|
|
||||||
|
for cve in batch:
|
||||||
|
# Check for cancellation before each CVE
|
||||||
|
if cancellation_flag and cancellation_flag():
|
||||||
|
logger.info("Bulk sync cancelled by user")
|
||||||
|
job.status = 'cancelled'
|
||||||
|
job.cancelled_at = datetime.utcnow()
|
||||||
|
job.error_message = "Job cancelled by user"
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await self.sync_cve_pocs(cve.cve_id)
|
||||||
|
total_processed += 1
|
||||||
|
|
||||||
|
if result.get("pocs_found", 0) > 0:
|
||||||
|
total_found += result["pocs_found"]
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
job.processed_items += 1
|
||||||
|
|
||||||
|
# Small delay to avoid overwhelming the API
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error syncing PoCs for {cve.cve_id}: {e}")
|
||||||
|
job.failed_items += 1
|
||||||
|
|
||||||
|
# Break out of outer loop if cancelled
|
||||||
|
if job.status == 'cancelled':
|
||||||
|
break
|
||||||
|
|
||||||
|
# Commit after each batch
|
||||||
|
self.db_session.commit()
|
||||||
|
logger.info(f"Processed batch {i//batch_size + 1}/{(len(cves) + batch_size - 1)//batch_size}")
|
||||||
|
|
||||||
|
# Update job status (only if not cancelled)
|
||||||
|
if job.status != 'cancelled':
|
||||||
|
job.status = 'completed'
|
||||||
|
job.completed_at = datetime.utcnow()
|
||||||
|
|
||||||
|
job.job_metadata.update({
|
||||||
|
'total_processed': total_processed,
|
||||||
|
'total_pocs_found': total_found,
|
||||||
|
'cves_with_pocs': len(results)
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
job.status = 'failed'
|
||||||
|
job.error_message = str(e)
|
||||||
|
job.completed_at = datetime.utcnow()
|
||||||
|
logger.error(f"Bulk PoC sync job failed: {e}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'job_id': str(job.id),
|
||||||
|
'status': job.status,
|
||||||
|
'total_processed': total_processed,
|
||||||
|
'total_pocs_found': total_found,
|
||||||
|
'cves_with_pocs': len(results)
|
||||||
|
}
|
||||||
|
|
||||||
|
async def get_sync_status(self) -> dict:
|
||||||
|
"""Get synchronization status"""
|
||||||
|
from main import CVE, SigmaRule
|
||||||
|
|
||||||
|
# Count CVEs with PoC data
|
||||||
|
total_cves = self.db_session.query(CVE).count()
|
||||||
|
cves_with_pocs = self.db_session.query(CVE).filter(CVE.poc_count > 0).count()
|
||||||
|
|
||||||
|
# Count SIGMA rules with nomi-sec data
|
||||||
|
total_rules = self.db_session.query(SigmaRule).count()
|
||||||
|
rules_with_nomi_sec = self.db_session.query(SigmaRule).filter(
|
||||||
|
SigmaRule.poc_source == 'nomi_sec'
|
||||||
|
).count()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'total_cves': total_cves,
|
||||||
|
'cves_with_pocs': cves_with_pocs,
|
||||||
|
'poc_coverage': (cves_with_pocs / total_cves * 100) if total_cves > 0 else 0,
|
||||||
|
'total_rules': total_rules,
|
||||||
|
'rules_with_nomi_sec': rules_with_nomi_sec,
|
||||||
|
'nomi_sec_coverage': (rules_with_nomi_sec / total_rules * 100) if total_rules > 0 else 0
|
||||||
|
}
|
483
backend/nvd_bulk_processor.py
Normal file
483
backend/nvd_bulk_processor.py
Normal file
|
@ -0,0 +1,483 @@
|
||||||
|
"""
|
||||||
|
NVD JSON Dataset Bulk Processor
|
||||||
|
Downloads and processes NVD JSON data feeds for comprehensive CVE seeding
|
||||||
|
"""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import gzip
|
||||||
|
import zipfile
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from sqlalchemy import and_, or_
|
||||||
|
import asyncio
|
||||||
|
import aiohttp
|
||||||
|
from pathlib import Path
|
||||||
|
import hashlib
|
||||||
|
import time
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class NVDBulkProcessor:
|
||||||
|
"""Handles bulk downloading and processing of NVD JSON data feeds"""
|
||||||
|
|
||||||
|
def __init__(self, db_session: Session, data_dir: str = "./nvd_data"):
|
||||||
|
self.db_session = db_session
|
||||||
|
self.data_dir = Path(data_dir)
|
||||||
|
self.data_dir.mkdir(exist_ok=True)
|
||||||
|
self.api_key = os.getenv("NVD_API_KEY")
|
||||||
|
|
||||||
|
# NVD JSON 2.0 feed URLs
|
||||||
|
self.base_url = "https://nvd.nist.gov/feeds/json/cve/1.1"
|
||||||
|
self.feed_urls = {
|
||||||
|
"modified": f"{self.base_url}/nvdcve-1.1-modified.json.gz",
|
||||||
|
"recent": f"{self.base_url}/nvdcve-1.1-recent.json.gz"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Rate limiting
|
||||||
|
self.rate_limit_delay = 0.6 # 600ms between requests
|
||||||
|
self.last_request_time = 0
|
||||||
|
|
||||||
|
def get_year_feed_url(self, year: int) -> str:
|
||||||
|
"""Get the URL for a specific year's CVE feed"""
|
||||||
|
return f"{self.base_url}/nvdcve-1.1-{year}.json.gz"
|
||||||
|
|
||||||
|
def get_meta_url(self, feed_url: str) -> str:
|
||||||
|
"""Get the metadata URL for a feed"""
|
||||||
|
return feed_url.replace(".json.gz", ".meta")
|
||||||
|
|
||||||
|
async def download_file(self, session: aiohttp.ClientSession, url: str,
|
||||||
|
destination: Path, check_meta: bool = True) -> bool:
|
||||||
|
"""Download a file with metadata checking"""
|
||||||
|
try:
|
||||||
|
# Check if we should download based on metadata
|
||||||
|
if check_meta:
|
||||||
|
meta_url = self.get_meta_url(url)
|
||||||
|
should_download = await self._should_download_file(session, meta_url, destination)
|
||||||
|
if not should_download:
|
||||||
|
logger.info(f"Skipping {url} - file is up to date")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Rate limiting
|
||||||
|
current_time = time.time()
|
||||||
|
time_since_last = current_time - self.last_request_time
|
||||||
|
if time_since_last < self.rate_limit_delay:
|
||||||
|
await asyncio.sleep(self.rate_limit_delay - time_since_last)
|
||||||
|
|
||||||
|
# Download the file
|
||||||
|
headers = {}
|
||||||
|
if self.api_key:
|
||||||
|
headers["apiKey"] = self.api_key
|
||||||
|
|
||||||
|
async with session.get(url, headers=headers, timeout=30) as response:
|
||||||
|
if response.status == 200:
|
||||||
|
content = await response.read()
|
||||||
|
destination.write_bytes(content)
|
||||||
|
logger.info(f"Downloaded {url} -> {destination}")
|
||||||
|
self.last_request_time = time.time()
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to download {url}: HTTP {response.status}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error downloading {url}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def _should_download_file(self, session: aiohttp.ClientSession,
|
||||||
|
meta_url: str, destination: Path) -> bool:
|
||||||
|
"""Check if file should be downloaded based on metadata"""
|
||||||
|
try:
|
||||||
|
# Download metadata
|
||||||
|
async with session.get(meta_url, timeout=10) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
return True # Download if we can't get metadata
|
||||||
|
|
||||||
|
meta_content = await response.text()
|
||||||
|
|
||||||
|
# Parse metadata
|
||||||
|
meta_data = {}
|
||||||
|
for line in meta_content.strip().split('\n'):
|
||||||
|
if ':' in line:
|
||||||
|
key, value = line.split(':', 1)
|
||||||
|
meta_data[key.strip()] = value.strip()
|
||||||
|
|
||||||
|
# Check if local file exists and matches
|
||||||
|
if destination.exists():
|
||||||
|
local_size = destination.stat().st_size
|
||||||
|
remote_size = int(meta_data.get('size', 0))
|
||||||
|
remote_sha256 = meta_data.get('sha256', '')
|
||||||
|
|
||||||
|
if local_size == remote_size and remote_sha256:
|
||||||
|
# Verify SHA256 if available
|
||||||
|
local_sha256 = self._calculate_sha256(destination)
|
||||||
|
if local_sha256 == remote_sha256:
|
||||||
|
return False # File is up to date
|
||||||
|
|
||||||
|
return True # Download needed
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error checking metadata for {meta_url}: {e}")
|
||||||
|
return True # Download if metadata check fails
|
||||||
|
|
||||||
|
def _calculate_sha256(self, file_path: Path) -> str:
|
||||||
|
"""Calculate SHA256 hash of a file"""
|
||||||
|
sha256_hash = hashlib.sha256()
|
||||||
|
with open(file_path, "rb") as f:
|
||||||
|
for chunk in iter(lambda: f.read(4096), b""):
|
||||||
|
sha256_hash.update(chunk)
|
||||||
|
return sha256_hash.hexdigest()
|
||||||
|
|
||||||
|
async def download_all_feeds(self, start_year: int = 2002,
|
||||||
|
end_year: Optional[int] = None) -> List[Path]:
|
||||||
|
"""Download all NVD JSON feeds"""
|
||||||
|
if end_year is None:
|
||||||
|
end_year = datetime.now().year
|
||||||
|
|
||||||
|
downloaded_files = []
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
# Download year-based feeds
|
||||||
|
for year in range(start_year, end_year + 1):
|
||||||
|
url = self.get_year_feed_url(year)
|
||||||
|
filename = f"nvdcve-1.1-{year}.json.gz"
|
||||||
|
destination = self.data_dir / filename
|
||||||
|
|
||||||
|
if await self.download_file(session, url, destination):
|
||||||
|
downloaded_files.append(destination)
|
||||||
|
|
||||||
|
# Download modified and recent feeds
|
||||||
|
for feed_name, url in self.feed_urls.items():
|
||||||
|
filename = f"nvdcve-1.1-{feed_name}.json.gz"
|
||||||
|
destination = self.data_dir / filename
|
||||||
|
|
||||||
|
if await self.download_file(session, url, destination):
|
||||||
|
downloaded_files.append(destination)
|
||||||
|
|
||||||
|
return downloaded_files
|
||||||
|
|
||||||
|
def extract_json_file(self, compressed_file: Path) -> Path:
|
||||||
|
"""Extract JSON from compressed file"""
|
||||||
|
json_file = compressed_file.with_suffix('.json')
|
||||||
|
|
||||||
|
try:
|
||||||
|
if compressed_file.suffix == '.gz':
|
||||||
|
with gzip.open(compressed_file, 'rt', encoding='utf-8') as f_in:
|
||||||
|
with open(json_file, 'w', encoding='utf-8') as f_out:
|
||||||
|
f_out.write(f_in.read())
|
||||||
|
elif compressed_file.suffix == '.zip':
|
||||||
|
with zipfile.ZipFile(compressed_file, 'r') as zip_ref:
|
||||||
|
zip_ref.extractall(self.data_dir)
|
||||||
|
else:
|
||||||
|
# File is already uncompressed
|
||||||
|
return compressed_file
|
||||||
|
|
||||||
|
logger.info(f"Extracted {compressed_file} -> {json_file}")
|
||||||
|
return json_file
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting {compressed_file}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def process_json_file(self, json_file: Path) -> Tuple[int, int]:
|
||||||
|
"""Process a single JSON file and return (processed, failed) counts"""
|
||||||
|
from main import CVE, BulkProcessingJob
|
||||||
|
|
||||||
|
processed_count = 0
|
||||||
|
failed_count = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(json_file, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
cve_items = data.get('CVE_Items', [])
|
||||||
|
logger.info(f"Processing {len(cve_items)} CVEs from {json_file}")
|
||||||
|
|
||||||
|
for cve_item in cve_items:
|
||||||
|
try:
|
||||||
|
cve_data = self._extract_cve_data(cve_item)
|
||||||
|
if cve_data:
|
||||||
|
self._store_cve_data(cve_data)
|
||||||
|
processed_count += 1
|
||||||
|
else:
|
||||||
|
failed_count += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing CVE item: {e}")
|
||||||
|
failed_count += 1
|
||||||
|
|
||||||
|
# Commit changes
|
||||||
|
self.db_session.commit()
|
||||||
|
logger.info(f"Processed {processed_count} CVEs, failed: {failed_count}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing {json_file}: {e}")
|
||||||
|
self.db_session.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
|
return processed_count, failed_count
|
||||||
|
|
||||||
|
def _extract_cve_data(self, cve_item: dict) -> Optional[dict]:
|
||||||
|
"""Extract CVE data from JSON item"""
|
||||||
|
try:
|
||||||
|
cve = cve_item.get('cve', {})
|
||||||
|
impact = cve_item.get('impact', {})
|
||||||
|
|
||||||
|
cve_id = cve.get('CVE_data_meta', {}).get('ID', '')
|
||||||
|
if not cve_id:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Description
|
||||||
|
description_data = cve.get('description', {}).get('description_data', [])
|
||||||
|
description = ''
|
||||||
|
if description_data:
|
||||||
|
description = description_data[0].get('value', '')
|
||||||
|
|
||||||
|
# CVSS Score
|
||||||
|
cvss_score = None
|
||||||
|
severity = None
|
||||||
|
if 'baseMetricV3' in impact:
|
||||||
|
cvss_v3 = impact['baseMetricV3'].get('cvssV3', {})
|
||||||
|
cvss_score = cvss_v3.get('baseScore')
|
||||||
|
severity = cvss_v3.get('baseSeverity', '').lower()
|
||||||
|
elif 'baseMetricV2' in impact:
|
||||||
|
cvss_v2 = impact['baseMetricV2'].get('cvssV2', {})
|
||||||
|
cvss_score = cvss_v2.get('baseScore')
|
||||||
|
severity = impact['baseMetricV2'].get('severity', '').lower()
|
||||||
|
|
||||||
|
# Dates
|
||||||
|
published_date = None
|
||||||
|
modified_date = None
|
||||||
|
if 'publishedDate' in cve_item:
|
||||||
|
published_date = datetime.fromisoformat(
|
||||||
|
cve_item['publishedDate'].replace('Z', '+00:00')
|
||||||
|
)
|
||||||
|
if 'lastModifiedDate' in cve_item:
|
||||||
|
modified_date = datetime.fromisoformat(
|
||||||
|
cve_item['lastModifiedDate'].replace('Z', '+00:00')
|
||||||
|
)
|
||||||
|
|
||||||
|
# Affected products (from CPE data)
|
||||||
|
affected_products = []
|
||||||
|
configurations = cve_item.get('configurations', {})
|
||||||
|
for node in configurations.get('nodes', []):
|
||||||
|
for cpe_match in node.get('cpe_match', []):
|
||||||
|
if cpe_match.get('vulnerable', False):
|
||||||
|
cpe_uri = cpe_match.get('cpe23Uri', '')
|
||||||
|
if cpe_uri:
|
||||||
|
affected_products.append(cpe_uri)
|
||||||
|
|
||||||
|
# Reference URLs
|
||||||
|
reference_urls = []
|
||||||
|
references = cve.get('references', {}).get('reference_data', [])
|
||||||
|
for ref in references:
|
||||||
|
url = ref.get('url', '')
|
||||||
|
if url:
|
||||||
|
reference_urls.append(url)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'cve_id': cve_id,
|
||||||
|
'description': description,
|
||||||
|
'cvss_score': cvss_score,
|
||||||
|
'severity': severity,
|
||||||
|
'published_date': published_date,
|
||||||
|
'modified_date': modified_date,
|
||||||
|
'affected_products': affected_products,
|
||||||
|
'reference_urls': reference_urls,
|
||||||
|
'data_source': 'nvd_bulk',
|
||||||
|
'nvd_json_version': '1.1',
|
||||||
|
'bulk_processed': True
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting CVE data: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _store_cve_data(self, cve_data: dict):
|
||||||
|
"""Store CVE data in database"""
|
||||||
|
from main import CVE
|
||||||
|
|
||||||
|
# Check if CVE already exists
|
||||||
|
existing_cve = self.db_session.query(CVE).filter(
|
||||||
|
CVE.cve_id == cve_data['cve_id']
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if existing_cve:
|
||||||
|
# Update existing CVE
|
||||||
|
for key, value in cve_data.items():
|
||||||
|
setattr(existing_cve, key, value)
|
||||||
|
existing_cve.updated_at = datetime.utcnow()
|
||||||
|
logger.debug(f"Updated CVE {cve_data['cve_id']}")
|
||||||
|
else:
|
||||||
|
# Create new CVE
|
||||||
|
new_cve = CVE(**cve_data)
|
||||||
|
self.db_session.add(new_cve)
|
||||||
|
logger.debug(f"Created new CVE {cve_data['cve_id']}")
|
||||||
|
|
||||||
|
async def bulk_seed_database(self, start_year: int = 2002,
|
||||||
|
end_year: Optional[int] = None) -> dict:
|
||||||
|
"""Perform complete bulk seeding of the database"""
|
||||||
|
from main import BulkProcessingJob
|
||||||
|
|
||||||
|
if end_year is None:
|
||||||
|
end_year = datetime.now().year
|
||||||
|
|
||||||
|
# Create bulk processing job
|
||||||
|
job = BulkProcessingJob(
|
||||||
|
job_type='nvd_bulk_seed',
|
||||||
|
status='running',
|
||||||
|
started_at=datetime.utcnow(),
|
||||||
|
job_metadata={
|
||||||
|
'start_year': start_year,
|
||||||
|
'end_year': end_year,
|
||||||
|
'total_years': end_year - start_year + 1
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self.db_session.add(job)
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
total_processed = 0
|
||||||
|
total_failed = 0
|
||||||
|
results = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Download all feeds
|
||||||
|
logger.info(f"Starting bulk seed from {start_year} to {end_year}")
|
||||||
|
downloaded_files = await self.download_all_feeds(start_year, end_year)
|
||||||
|
|
||||||
|
job.total_items = len(downloaded_files)
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
# Process each file
|
||||||
|
for file_path in downloaded_files:
|
||||||
|
try:
|
||||||
|
# Extract JSON file
|
||||||
|
json_file = self.extract_json_file(file_path)
|
||||||
|
|
||||||
|
# Process the JSON file
|
||||||
|
processed, failed = self.process_json_file(json_file)
|
||||||
|
|
||||||
|
total_processed += processed
|
||||||
|
total_failed += failed
|
||||||
|
job.processed_items += 1
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
'file': file_path.name,
|
||||||
|
'processed': processed,
|
||||||
|
'failed': failed
|
||||||
|
})
|
||||||
|
|
||||||
|
# Clean up extracted file if it's different from original
|
||||||
|
if json_file != file_path:
|
||||||
|
json_file.unlink()
|
||||||
|
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing {file_path}: {e}")
|
||||||
|
job.failed_items += 1
|
||||||
|
total_failed += 1
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
# Update job status
|
||||||
|
job.status = 'completed'
|
||||||
|
job.completed_at = datetime.utcnow()
|
||||||
|
job.job_metadata.update({
|
||||||
|
'total_processed': total_processed,
|
||||||
|
'total_failed': total_failed,
|
||||||
|
'results': results
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
job.status = 'failed'
|
||||||
|
job.error_message = str(e)
|
||||||
|
job.completed_at = datetime.utcnow()
|
||||||
|
logger.error(f"Bulk seed job failed: {e}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'job_id': str(job.id),
|
||||||
|
'status': job.status,
|
||||||
|
'total_processed': total_processed,
|
||||||
|
'total_failed': total_failed,
|
||||||
|
'results': results
|
||||||
|
}
|
||||||
|
|
||||||
|
async def incremental_update(self) -> dict:
|
||||||
|
"""Perform incremental update using modified and recent feeds"""
|
||||||
|
from main import BulkProcessingJob
|
||||||
|
|
||||||
|
# Create incremental update job
|
||||||
|
job = BulkProcessingJob(
|
||||||
|
job_type='incremental_update',
|
||||||
|
status='running',
|
||||||
|
started_at=datetime.utcnow(),
|
||||||
|
job_metadata={'feeds': ['modified', 'recent']}
|
||||||
|
)
|
||||||
|
self.db_session.add(job)
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
total_processed = 0
|
||||||
|
total_failed = 0
|
||||||
|
results = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Download modified and recent feeds
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
for feed_name, url in self.feed_urls.items():
|
||||||
|
filename = f"nvdcve-1.1-{feed_name}.json.gz"
|
||||||
|
destination = self.data_dir / filename
|
||||||
|
|
||||||
|
if await self.download_file(session, url, destination):
|
||||||
|
try:
|
||||||
|
json_file = self.extract_json_file(destination)
|
||||||
|
processed, failed = self.process_json_file(json_file)
|
||||||
|
|
||||||
|
total_processed += processed
|
||||||
|
total_failed += failed
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
'feed': feed_name,
|
||||||
|
'processed': processed,
|
||||||
|
'failed': failed
|
||||||
|
})
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
if json_file != destination:
|
||||||
|
json_file.unlink()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing {feed_name} feed: {e}")
|
||||||
|
total_failed += 1
|
||||||
|
|
||||||
|
job.status = 'completed'
|
||||||
|
job.completed_at = datetime.utcnow()
|
||||||
|
job.job_metadata.update({
|
||||||
|
'total_processed': total_processed,
|
||||||
|
'total_failed': total_failed,
|
||||||
|
'results': results
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
job.status = 'failed'
|
||||||
|
job.error_message = str(e)
|
||||||
|
job.completed_at = datetime.utcnow()
|
||||||
|
logger.error(f"Incremental update job failed: {e}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
self.db_session.commit()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'job_id': str(job.id),
|
||||||
|
'status': job.status,
|
||||||
|
'total_processed': total_processed,
|
||||||
|
'total_failed': total_failed,
|
||||||
|
'results': results
|
||||||
|
}
|
|
@ -12,3 +12,5 @@ pygithub==2.1.1
|
||||||
gitpython==3.1.40
|
gitpython==3.1.40
|
||||||
beautifulsoup4==4.12.2
|
beautifulsoup4==4.12.2
|
||||||
lxml==4.9.3
|
lxml==4.9.3
|
||||||
|
aiohttp==3.9.1
|
||||||
|
aiofiles
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
version: '3.8'
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
db:
|
db:
|
||||||
image: postgres:15
|
image: postgres:15
|
||||||
|
@ -25,6 +23,7 @@ services:
|
||||||
environment:
|
environment:
|
||||||
DATABASE_URL: postgresql://cve_user:cve_password@db:5432/cve_sigma_db
|
DATABASE_URL: postgresql://cve_user:cve_password@db:5432/cve_sigma_db
|
||||||
NVD_API_KEY: ${NVD_API_KEY:-}
|
NVD_API_KEY: ${NVD_API_KEY:-}
|
||||||
|
GITHUB_TOKEN: ${GITHUB_TOKEN}
|
||||||
depends_on:
|
depends_on:
|
||||||
db:
|
db:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
|
@ -15,6 +15,10 @@ function App() {
|
||||||
const [activeTab, setActiveTab] = useState('dashboard');
|
const [activeTab, setActiveTab] = useState('dashboard');
|
||||||
const [fetchingCves, setFetchingCves] = useState(false);
|
const [fetchingCves, setFetchingCves] = useState(false);
|
||||||
const [testResult, setTestResult] = useState(null);
|
const [testResult, setTestResult] = useState(null);
|
||||||
|
const [bulkJobs, setBulkJobs] = useState([]);
|
||||||
|
const [bulkStatus, setBulkStatus] = useState({});
|
||||||
|
const [pocStats, setPocStats] = useState({});
|
||||||
|
const [bulkProcessing, setBulkProcessing] = useState(false);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
fetchData();
|
fetchData();
|
||||||
|
@ -23,15 +27,21 @@ function App() {
|
||||||
const fetchData = async () => {
|
const fetchData = async () => {
|
||||||
try {
|
try {
|
||||||
setLoading(true);
|
setLoading(true);
|
||||||
const [cvesRes, rulesRes, statsRes] = await Promise.all([
|
const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes] = await Promise.all([
|
||||||
axios.get(`${API_BASE_URL}/api/cves`),
|
axios.get(`${API_BASE_URL}/api/cves`),
|
||||||
axios.get(`${API_BASE_URL}/api/sigma-rules`),
|
axios.get(`${API_BASE_URL}/api/sigma-rules`),
|
||||||
axios.get(`${API_BASE_URL}/api/stats`)
|
axios.get(`${API_BASE_URL}/api/stats`),
|
||||||
|
axios.get(`${API_BASE_URL}/api/bulk-jobs`),
|
||||||
|
axios.get(`${API_BASE_URL}/api/bulk-status`),
|
||||||
|
axios.get(`${API_BASE_URL}/api/poc-stats`)
|
||||||
]);
|
]);
|
||||||
|
|
||||||
setCves(cvesRes.data);
|
setCves(cvesRes.data);
|
||||||
setSigmaRules(rulesRes.data);
|
setSigmaRules(rulesRes.data);
|
||||||
setStats(statsRes.data);
|
setStats(statsRes.data);
|
||||||
|
setBulkJobs(bulkJobsRes.data);
|
||||||
|
setBulkStatus(bulkStatusRes.data);
|
||||||
|
setPocStats(pocStatsRes.data);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching data:', error);
|
console.error('Error fetching data:', error);
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -39,6 +49,20 @@ function App() {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const cancelJob = async (jobId) => {
|
||||||
|
try {
|
||||||
|
const response = await axios.post(`${API_BASE_URL}/api/cancel-job/${jobId}`);
|
||||||
|
console.log('Cancel job response:', response.data);
|
||||||
|
// Refresh data after cancelling
|
||||||
|
setTimeout(() => {
|
||||||
|
fetchData();
|
||||||
|
}, 1000);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error cancelling job:', error);
|
||||||
|
alert('Failed to cancel job. Please try again.');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const handleFetchCves = async () => {
|
const handleFetchCves = async () => {
|
||||||
try {
|
try {
|
||||||
setFetchingCves(true);
|
setFetchingCves(true);
|
||||||
|
@ -73,6 +97,73 @@ function App() {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const startBulkSeed = async (startYear = 2020, endYear = null) => {
|
||||||
|
try {
|
||||||
|
setBulkProcessing(true);
|
||||||
|
const response = await axios.post(`${API_BASE_URL}/api/bulk-seed`, {
|
||||||
|
start_year: startYear,
|
||||||
|
end_year: endYear
|
||||||
|
});
|
||||||
|
console.log('Bulk seed response:', response.data);
|
||||||
|
// Refresh data after starting
|
||||||
|
setTimeout(() => {
|
||||||
|
fetchData();
|
||||||
|
}, 2000);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error starting bulk seed:', error);
|
||||||
|
setBulkProcessing(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const startIncrementalUpdate = async () => {
|
||||||
|
try {
|
||||||
|
setBulkProcessing(true);
|
||||||
|
const response = await axios.post(`${API_BASE_URL}/api/incremental-update`);
|
||||||
|
console.log('Incremental update response:', response.data);
|
||||||
|
setTimeout(() => {
|
||||||
|
fetchData();
|
||||||
|
setBulkProcessing(false);
|
||||||
|
}, 2000);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error starting incremental update:', error);
|
||||||
|
setBulkProcessing(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const syncNomiSec = async (cveId = null) => {
|
||||||
|
try {
|
||||||
|
setBulkProcessing(true);
|
||||||
|
const response = await axios.post(`${API_BASE_URL}/api/sync-nomi-sec`, {
|
||||||
|
cve_id: cveId
|
||||||
|
});
|
||||||
|
console.log('Nomi-sec sync response:', response.data);
|
||||||
|
setTimeout(() => {
|
||||||
|
fetchData();
|
||||||
|
setBulkProcessing(false);
|
||||||
|
}, 2000);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error syncing nomi-sec:', error);
|
||||||
|
setBulkProcessing(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const regenerateRules = async (force = false) => {
|
||||||
|
try {
|
||||||
|
setBulkProcessing(true);
|
||||||
|
const response = await axios.post(`${API_BASE_URL}/api/regenerate-rules`, {
|
||||||
|
force: force
|
||||||
|
});
|
||||||
|
console.log('Rule regeneration response:', response.data);
|
||||||
|
setTimeout(() => {
|
||||||
|
fetchData();
|
||||||
|
setBulkProcessing(false);
|
||||||
|
}, 2000);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error regenerating rules:', error);
|
||||||
|
setBulkProcessing(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const getSeverityColor = (severity) => {
|
const getSeverityColor = (severity) => {
|
||||||
switch (severity?.toLowerCase()) {
|
switch (severity?.toLowerCase()) {
|
||||||
case 'critical': return 'bg-red-100 text-red-800';
|
case 'critical': return 'bg-red-100 text-red-800';
|
||||||
|
@ -93,18 +184,81 @@ function App() {
|
||||||
|
|
||||||
const Dashboard = () => (
|
const Dashboard = () => (
|
||||||
<div className="space-y-6">
|
<div className="space-y-6">
|
||||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
|
<div className="grid grid-cols-1 md:grid-cols-5 gap-6">
|
||||||
<div className="bg-white p-6 rounded-lg shadow">
|
<div className="bg-white p-6 rounded-lg shadow">
|
||||||
<h3 className="text-lg font-medium text-gray-900">Total CVEs</h3>
|
<h3 className="text-lg font-medium text-gray-900">Total CVEs</h3>
|
||||||
<p className="text-3xl font-bold text-blue-600">{stats.total_cves || 0}</p>
|
<p className="text-3xl font-bold text-blue-600">{stats.total_cves || 0}</p>
|
||||||
|
<p className="text-sm text-gray-500">Bulk: {stats.bulk_processed_cves || 0}</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="bg-white p-6 rounded-lg shadow">
|
<div className="bg-white p-6 rounded-lg shadow">
|
||||||
<h3 className="text-lg font-medium text-gray-900">SIGMA Rules</h3>
|
<h3 className="text-lg font-medium text-gray-900">SIGMA Rules</h3>
|
||||||
<p className="text-3xl font-bold text-green-600">{stats.total_sigma_rules || 0}</p>
|
<p className="text-3xl font-bold text-green-600">{stats.total_sigma_rules || 0}</p>
|
||||||
|
<p className="text-sm text-gray-500">Nomi-sec: {stats.nomi_sec_rules || 0}</p>
|
||||||
|
</div>
|
||||||
|
<div className="bg-white p-6 rounded-lg shadow">
|
||||||
|
<h3 className="text-lg font-medium text-gray-900">CVEs with PoCs</h3>
|
||||||
|
<p className="text-3xl font-bold text-purple-600">{stats.cves_with_pocs || 0}</p>
|
||||||
|
<p className="text-sm text-gray-500">{(stats.poc_coverage || 0).toFixed(1)}% coverage</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="bg-white p-6 rounded-lg shadow">
|
<div className="bg-white p-6 rounded-lg shadow">
|
||||||
<h3 className="text-lg font-medium text-gray-900">Recent CVEs (7d)</h3>
|
<h3 className="text-lg font-medium text-gray-900">Recent CVEs (7d)</h3>
|
||||||
<p className="text-3xl font-bold text-purple-600">{stats.recent_cves_7_days || 0}</p>
|
<p className="text-3xl font-bold text-orange-600">{stats.recent_cves_7_days || 0}</p>
|
||||||
|
</div>
|
||||||
|
<div className="bg-white p-6 rounded-lg shadow">
|
||||||
|
<h3 className="text-lg font-medium text-gray-900">High Quality PoCs</h3>
|
||||||
|
<p className="text-3xl font-bold text-indigo-600">{pocStats.high_quality_cves || 0}</p>
|
||||||
|
<p className="text-sm text-gray-500">Avg: {(pocStats.avg_poc_count || 0).toFixed(1)}</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Bulk Processing Controls */}
|
||||||
|
<div className="bg-white rounded-lg shadow p-6">
|
||||||
|
<h2 className="text-xl font-bold text-gray-900 mb-4">Bulk Processing</h2>
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
|
||||||
|
<button
|
||||||
|
onClick={() => startBulkSeed(2020)}
|
||||||
|
disabled={bulkProcessing}
|
||||||
|
className={`px-4 py-2 rounded-md text-white ${
|
||||||
|
bulkProcessing
|
||||||
|
? 'bg-gray-400 cursor-not-allowed'
|
||||||
|
: 'bg-blue-600 hover:bg-blue-700'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{bulkProcessing ? 'Processing...' : 'Bulk Seed (2020+)'}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={startIncrementalUpdate}
|
||||||
|
disabled={bulkProcessing}
|
||||||
|
className={`px-4 py-2 rounded-md text-white ${
|
||||||
|
bulkProcessing
|
||||||
|
? 'bg-gray-400 cursor-not-allowed'
|
||||||
|
: 'bg-green-600 hover:bg-green-700'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{bulkProcessing ? 'Processing...' : 'Incremental Update'}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => syncNomiSec()}
|
||||||
|
disabled={bulkProcessing}
|
||||||
|
className={`px-4 py-2 rounded-md text-white ${
|
||||||
|
bulkProcessing
|
||||||
|
? 'bg-gray-400 cursor-not-allowed'
|
||||||
|
: 'bg-purple-600 hover:bg-purple-700'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{bulkProcessing ? 'Processing...' : 'Sync nomi-sec PoCs'}
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => regenerateRules()}
|
||||||
|
disabled={bulkProcessing}
|
||||||
|
className={`px-4 py-2 rounded-md text-white ${
|
||||||
|
bulkProcessing
|
||||||
|
? 'bg-gray-400 cursor-not-allowed'
|
||||||
|
: 'bg-indigo-600 hover:bg-indigo-700'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{bulkProcessing ? 'Processing...' : 'Regenerate Rules'}
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -522,6 +676,178 @@ function App() {
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const BulkJobsList = () => (
|
||||||
|
<div className="space-y-6">
|
||||||
|
<div className="flex justify-between items-center">
|
||||||
|
<h1 className="text-2xl font-bold text-gray-900">Bulk Processing Jobs</h1>
|
||||||
|
<button
|
||||||
|
onClick={fetchData}
|
||||||
|
className="bg-blue-600 hover:bg-blue-700 text-white px-4 py-2 rounded-md text-sm"
|
||||||
|
>
|
||||||
|
Refresh
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Bulk Status Overview */}
|
||||||
|
<div className="bg-white rounded-lg shadow p-6">
|
||||||
|
<h2 className="text-lg font-bold text-gray-900 mb-4">System Status</h2>
|
||||||
|
{bulkStatus.database_stats && (
|
||||||
|
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
|
||||||
|
<div className="text-center">
|
||||||
|
<div className="text-2xl font-bold text-blue-600">{bulkStatus.database_stats.total_cves}</div>
|
||||||
|
<div className="text-sm text-gray-500">Total CVEs</div>
|
||||||
|
</div>
|
||||||
|
<div className="text-center">
|
||||||
|
<div className="text-2xl font-bold text-green-600">{bulkStatus.database_stats.bulk_processed_cves}</div>
|
||||||
|
<div className="text-sm text-gray-500">Bulk Processed</div>
|
||||||
|
</div>
|
||||||
|
<div className="text-center">
|
||||||
|
<div className="text-2xl font-bold text-purple-600">{bulkStatus.database_stats.cves_with_pocs}</div>
|
||||||
|
<div className="text-sm text-gray-500">With PoCs</div>
|
||||||
|
</div>
|
||||||
|
<div className="text-center">
|
||||||
|
<div className="text-2xl font-bold text-indigo-600">{bulkStatus.database_stats.nomi_sec_rules}</div>
|
||||||
|
<div className="text-sm text-gray-500">Enhanced Rules</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Running Jobs */}
|
||||||
|
{bulkJobs.some(job => job.status === 'running' || job.status === 'pending') && (
|
||||||
|
<div className="bg-white rounded-lg shadow">
|
||||||
|
<div className="px-6 py-4 border-b border-gray-200">
|
||||||
|
<h2 className="text-lg font-bold text-gray-900">Running Jobs</h2>
|
||||||
|
</div>
|
||||||
|
<div className="divide-y divide-gray-200">
|
||||||
|
{bulkJobs
|
||||||
|
.filter(job => job.status === 'running' || job.status === 'pending')
|
||||||
|
.map((job) => (
|
||||||
|
<div key={job.id} className="px-6 py-4 bg-blue-50">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div className="flex-1">
|
||||||
|
<div className="flex items-center space-x-3">
|
||||||
|
<h3 className="text-lg font-medium text-gray-900">{job.job_type}</h3>
|
||||||
|
<span className={`inline-flex px-2 py-1 text-xs font-semibold rounded-full ${
|
||||||
|
job.status === 'running' ? 'bg-blue-100 text-blue-800' :
|
||||||
|
'bg-gray-100 text-gray-800'
|
||||||
|
}`}>
|
||||||
|
{job.status}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div className="mt-2 flex items-center space-x-6 text-sm text-gray-500">
|
||||||
|
<span>Started: {formatDate(job.started_at)}</span>
|
||||||
|
{job.year && <span>Year: {job.year}</span>}
|
||||||
|
</div>
|
||||||
|
{job.total_items > 0 && (
|
||||||
|
<div className="mt-2">
|
||||||
|
<div className="flex items-center space-x-4 text-sm text-gray-600">
|
||||||
|
<span>Progress: {job.processed_items}/{job.total_items}</span>
|
||||||
|
{job.failed_items > 0 && (
|
||||||
|
<span className="text-red-600">Failed: {job.failed_items}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="mt-1 w-full bg-gray-200 rounded-full h-2">
|
||||||
|
<div
|
||||||
|
className="bg-blue-600 h-2 rounded-full"
|
||||||
|
style={{ width: `${(job.processed_items / job.total_items) * 100}%` }}
|
||||||
|
></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="flex-shrink-0 ml-4">
|
||||||
|
<button
|
||||||
|
onClick={() => cancelJob(job.id)}
|
||||||
|
className="bg-red-600 hover:bg-red-700 text-white px-3 py-1 rounded-md text-sm font-medium"
|
||||||
|
>
|
||||||
|
Cancel
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Recent Jobs */}
|
||||||
|
<div className="bg-white rounded-lg shadow">
|
||||||
|
<div className="px-6 py-4 border-b border-gray-200">
|
||||||
|
<h2 className="text-lg font-bold text-gray-900">Recent Jobs</h2>
|
||||||
|
</div>
|
||||||
|
<div className="divide-y divide-gray-200">
|
||||||
|
{bulkJobs.length === 0 ? (
|
||||||
|
<div className="px-6 py-8 text-center text-gray-500">
|
||||||
|
No bulk processing jobs found
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
bulkJobs.map((job) => (
|
||||||
|
<div key={job.id} className="px-6 py-4">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div className="flex-1">
|
||||||
|
<div className="flex items-center space-x-3">
|
||||||
|
<h3 className="text-lg font-medium text-gray-900">{job.job_type}</h3>
|
||||||
|
<span className={`inline-flex px-2 py-1 text-xs font-semibold rounded-full ${
|
||||||
|
job.status === 'completed' ? 'bg-green-100 text-green-800' :
|
||||||
|
job.status === 'running' ? 'bg-blue-100 text-blue-800' :
|
||||||
|
job.status === 'failed' ? 'bg-red-100 text-red-800' :
|
||||||
|
job.status === 'cancelled' ? 'bg-orange-100 text-orange-800' :
|
||||||
|
'bg-gray-100 text-gray-800'
|
||||||
|
}`}>
|
||||||
|
{job.status}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div className="mt-2 flex items-center space-x-6 text-sm text-gray-500">
|
||||||
|
<span>Started: {formatDate(job.started_at)}</span>
|
||||||
|
{job.completed_at && (
|
||||||
|
<span>Completed: {formatDate(job.completed_at)}</span>
|
||||||
|
)}
|
||||||
|
{job.year && (
|
||||||
|
<span>Year: {job.year}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{job.total_items > 0 && (
|
||||||
|
<div className="mt-2">
|
||||||
|
<div className="flex items-center space-x-4 text-sm text-gray-600">
|
||||||
|
<span>Progress: {job.processed_items}/{job.total_items}</span>
|
||||||
|
{job.failed_items > 0 && (
|
||||||
|
<span className="text-red-600">Failed: {job.failed_items}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="mt-1 w-full bg-gray-200 rounded-full h-2">
|
||||||
|
<div
|
||||||
|
className="bg-blue-600 h-2 rounded-full"
|
||||||
|
style={{ width: `${(job.processed_items / job.total_items) * 100}%` }}
|
||||||
|
></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{job.error_message && (
|
||||||
|
<div className="mt-2 p-2 bg-red-50 border border-red-200 rounded text-sm text-red-700">
|
||||||
|
{job.error_message}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div className="flex-shrink-0 ml-4">
|
||||||
|
{(job.status === 'running' || job.status === 'pending') && (
|
||||||
|
<button
|
||||||
|
onClick={() => cancelJob(job.id)}
|
||||||
|
className="bg-red-600 hover:bg-red-700 text-white px-3 py-1 rounded-md text-sm font-medium"
|
||||||
|
>
|
||||||
|
Cancel
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
|
||||||
if (loading) {
|
if (loading) {
|
||||||
return (
|
return (
|
||||||
<div className="min-h-screen bg-gray-100 flex items-center justify-center">
|
<div className="min-h-screen bg-gray-100 flex items-center justify-center">
|
||||||
|
@ -573,6 +899,16 @@ function App() {
|
||||||
>
|
>
|
||||||
SIGMA Rules
|
SIGMA Rules
|
||||||
</button>
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => setActiveTab('bulk-jobs')}
|
||||||
|
className={`inline-flex items-center px-1 pt-1 border-b-2 text-sm font-medium ${
|
||||||
|
activeTab === 'bulk-jobs'
|
||||||
|
? 'border-blue-500 text-gray-900'
|
||||||
|
: 'border-transparent text-gray-500 hover:text-gray-700 hover:border-gray-300'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
Bulk Jobs
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -584,6 +920,7 @@ function App() {
|
||||||
{activeTab === 'dashboard' && <Dashboard />}
|
{activeTab === 'dashboard' && <Dashboard />}
|
||||||
{activeTab === 'cves' && <CVEList />}
|
{activeTab === 'cves' && <CVEList />}
|
||||||
{activeTab === 'rules' && <SigmaRulesList />}
|
{activeTab === 'rules' && <SigmaRulesList />}
|
||||||
|
{activeTab === 'bulk-jobs' && <BulkJobsList />}
|
||||||
</div>
|
</div>
|
||||||
</main>
|
</main>
|
||||||
|
|
||||||
|
|
28
init.sql
28
init.sql
|
@ -13,6 +13,13 @@ CREATE TABLE cves (
|
||||||
modified_date TIMESTAMP,
|
modified_date TIMESTAMP,
|
||||||
affected_products TEXT[],
|
affected_products TEXT[],
|
||||||
reference_urls TEXT[],
|
reference_urls TEXT[],
|
||||||
|
-- Bulk processing fields
|
||||||
|
data_source VARCHAR(20) DEFAULT 'nvd_api',
|
||||||
|
nvd_json_version VARCHAR(10) DEFAULT '2.0',
|
||||||
|
bulk_processed BOOLEAN DEFAULT FALSE,
|
||||||
|
-- nomi-sec PoC fields
|
||||||
|
poc_count INTEGER DEFAULT 0,
|
||||||
|
poc_data JSON,
|
||||||
created_at TIMESTAMP DEFAULT NOW(),
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
updated_at TIMESTAMP DEFAULT NOW()
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
);
|
);
|
||||||
|
@ -30,6 +37,10 @@ CREATE TABLE sigma_rules (
|
||||||
exploit_based BOOLEAN DEFAULT FALSE,
|
exploit_based BOOLEAN DEFAULT FALSE,
|
||||||
github_repos TEXT[],
|
github_repos TEXT[],
|
||||||
exploit_indicators TEXT,
|
exploit_indicators TEXT,
|
||||||
|
-- Enhanced fields for new data sources
|
||||||
|
poc_source VARCHAR(20) DEFAULT 'github_search',
|
||||||
|
poc_quality_score INTEGER DEFAULT 0,
|
||||||
|
nomi_sec_data JSON,
|
||||||
created_at TIMESTAMP DEFAULT NOW(),
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
updated_at TIMESTAMP DEFAULT NOW()
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
);
|
);
|
||||||
|
@ -44,6 +55,23 @@ CREATE TABLE rule_templates (
|
||||||
created_at TIMESTAMP DEFAULT NOW()
|
created_at TIMESTAMP DEFAULT NOW()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- Bulk processing jobs table
|
||||||
|
CREATE TABLE bulk_processing_jobs (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
job_type VARCHAR(50) NOT NULL,
|
||||||
|
status VARCHAR(20) DEFAULT 'pending',
|
||||||
|
year INTEGER,
|
||||||
|
total_items INTEGER DEFAULT 0,
|
||||||
|
processed_items INTEGER DEFAULT 0,
|
||||||
|
failed_items INTEGER DEFAULT 0,
|
||||||
|
error_message TEXT,
|
||||||
|
job_metadata JSON,
|
||||||
|
started_at TIMESTAMP,
|
||||||
|
completed_at TIMESTAMP,
|
||||||
|
cancelled_at TIMESTAMP,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
-- Insert some basic rule templates
|
-- Insert some basic rule templates
|
||||||
INSERT INTO rule_templates (template_name, template_content, applicable_product_patterns, description) VALUES
|
INSERT INTO rule_templates (template_name, template_content, applicable_product_patterns, description) VALUES
|
||||||
(
|
(
|
||||||
|
|
0
start.sh
Normal file → Executable file
0
start.sh
Normal file → Executable file
Loading…
Add table
Reference in a new issue