diff --git a/README.md b/README.md index 0c8b25d..590fe1a 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,27 @@ -# CVE-SIGMA Auto Generator +# CVE-SIGMA Auto Generator (Enhanced) -An automated platform that fetches CVE data and automatically generates SIGMA rules for threat detection. +An advanced automated platform that processes comprehensive CVE data and generates enhanced SIGMA rules for threat detection using curated exploit intelligence. -## Features +## 🚀 Enhanced Features -- **Automated CVE Fetching**: Regularly polls the NVD (National Vulnerability Database) for CVEs from July 2025 -- **GitHub Exploit Analysis**: Automatically searches GitHub for exploit code related to each CVE -- **Intelligent SIGMA Rule Generation**: Creates SIGMA rules based on CVE characteristics AND actual exploit code -- **Exploit-Based Detection**: Enhanced rules using real indicators extracted from GitHub exploits -- **Modern Web Interface**: React-based UI for browsing CVEs and managing SIGMA rules -- **Real-time Updates**: Background tasks keep CVE data current with current 2025 vulnerabilities -- **Rule Templates**: Configurable templates for different types of vulnerabilities -- **MITRE ATT&CK Mapping**: Automatic mapping to MITRE ATT&CK techniques -- **API Testing**: Built-in NVD API connectivity testing -- **Enhanced Error Handling**: Robust fallback mechanisms and detailed logging -- **Docker Compose**: Easy deployment and orchestration +### Data Processing +- **Bulk NVD Processing**: Downloads and processes complete NVD JSON datasets (2002-2025) +- **nomi-sec PoC Integration**: Uses curated PoC data from github.com/nomi-sec/PoC-in-GitHub +- **Incremental Updates**: Efficient updates using NVD modified/recent feeds +- **Quality Assessment**: Advanced PoC quality scoring with star count, recency, and relevance analysis + +### Intelligence Generation +- **Enhanced SIGMA Rules**: Creates rules using real exploit indicators from curated PoCs +- **Quality Tiers**: Excellent, Good, Fair, Poor, Very Poor classification system +- **Smart Template Selection**: AI-driven template matching based on PoC characteristics +- **Advanced Indicator Extraction**: Processes, files, network, registry, and command patterns +- **MITRE ATT&CK Mapping**: Automatic technique identification based on exploit analysis + +### User Experience +- **Modern Web Interface**: React-based UI with enhanced bulk processing controls +- **Real-time Monitoring**: Live job tracking and progress monitoring +- **Comprehensive Statistics**: PoC coverage, quality metrics, and processing status +- **Bulk Operations Dashboard**: Centralized control for all data processing operations ## Architecture diff --git a/backend/bulk_seeder.py b/backend/bulk_seeder.py new file mode 100644 index 0000000..89e78ed --- /dev/null +++ b/backend/bulk_seeder.py @@ -0,0 +1,340 @@ +""" +Bulk Data Seeding Coordinator +Orchestrates the complete bulk seeding process using NVD JSON feeds and nomi-sec PoC data +""" + +import asyncio +import logging +from datetime import datetime +from typing import Optional +from sqlalchemy.orm import Session +from nvd_bulk_processor import NVDBulkProcessor +from nomi_sec_client import NomiSecClient + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class BulkSeeder: + """Coordinates bulk seeding operations""" + + def __init__(self, db_session: Session): + self.db_session = db_session + self.nvd_processor = NVDBulkProcessor(db_session) + self.nomi_sec_client = NomiSecClient(db_session) + + async def full_bulk_seed(self, start_year: int = 2002, + end_year: Optional[int] = None, + skip_nvd: bool = False, + skip_nomi_sec: bool = False) -> dict: + """ + Perform complete bulk seeding operation + + Args: + start_year: Starting year for NVD data (default: 2002) + end_year: Ending year for NVD data (default: current year) + skip_nvd: Skip NVD bulk processing (default: False) + skip_nomi_sec: Skip nomi-sec PoC synchronization (default: False) + + Returns: + Dictionary containing operation results + """ + if end_year is None: + end_year = datetime.now().year + + results = { + 'start_time': datetime.utcnow(), + 'nvd_results': None, + 'nomi_sec_results': None, + 'total_time': None, + 'status': 'running' + } + + logger.info(f"Starting full bulk seed operation ({start_year}-{end_year})") + + try: + # Phase 1: NVD Bulk Processing + if not skip_nvd: + logger.info("Phase 1: Starting NVD bulk processing...") + nvd_results = await self.nvd_processor.bulk_seed_database( + start_year=start_year, + end_year=end_year + ) + results['nvd_results'] = nvd_results + logger.info(f"Phase 1 complete: {nvd_results['total_processed']} CVEs processed") + else: + logger.info("Phase 1: Skipping NVD bulk processing") + + # Phase 2: nomi-sec PoC Synchronization + if not skip_nomi_sec: + logger.info("Phase 2: Starting nomi-sec PoC synchronization...") + nomi_sec_results = await self.nomi_sec_client.bulk_sync_all_cves( + batch_size=50 # Smaller batches for API stability + ) + results['nomi_sec_results'] = nomi_sec_results + logger.info(f"Phase 2 complete: {nomi_sec_results['total_pocs_found']} PoCs found") + else: + logger.info("Phase 2: Skipping nomi-sec PoC synchronization") + + # Phase 3: Generate Enhanced SIGMA Rules + logger.info("Phase 3: Generating enhanced SIGMA rules...") + sigma_results = await self.generate_enhanced_sigma_rules() + results['sigma_results'] = sigma_results + logger.info(f"Phase 3 complete: {sigma_results['rules_generated']} rules generated") + + results['status'] = 'completed' + results['end_time'] = datetime.utcnow() + results['total_time'] = (results['end_time'] - results['start_time']).total_seconds() + + logger.info(f"Full bulk seed operation completed in {results['total_time']:.2f} seconds") + + except Exception as e: + logger.error(f"Bulk seed operation failed: {e}") + results['status'] = 'failed' + results['error'] = str(e) + results['end_time'] = datetime.utcnow() + + return results + + async def incremental_update(self) -> dict: + """ + Perform incremental update operation + + Returns: + Dictionary containing update results + """ + results = { + 'start_time': datetime.utcnow(), + 'nvd_update': None, + 'nomi_sec_update': None, + 'status': 'running' + } + + logger.info("Starting incremental update...") + + try: + # Update NVD data using modified/recent feeds + logger.info("Updating NVD data...") + nvd_update = await self.nvd_processor.incremental_update() + results['nvd_update'] = nvd_update + + # Update PoC data for newly added/modified CVEs + if nvd_update['total_processed'] > 0: + logger.info("Updating PoC data for modified CVEs...") + # Get recently modified CVEs and sync their PoCs + recent_cves = await self._get_recently_modified_cves() + nomi_sec_update = await self._sync_specific_cves(recent_cves) + results['nomi_sec_update'] = nomi_sec_update + + results['status'] = 'completed' + results['end_time'] = datetime.utcnow() + + except Exception as e: + logger.error(f"Incremental update failed: {e}") + results['status'] = 'failed' + results['error'] = str(e) + results['end_time'] = datetime.utcnow() + + return results + + async def generate_enhanced_sigma_rules(self) -> dict: + """Generate enhanced SIGMA rules using nomi-sec PoC data""" + from main import CVE, SigmaRule + + # Import the enhanced rule generator + from enhanced_sigma_generator import EnhancedSigmaGenerator + + generator = EnhancedSigmaGenerator(self.db_session) + + # Get all CVEs that have PoC data but no enhanced rules + cves_with_pocs = self.db_session.query(CVE).filter( + CVE.poc_count > 0 + ).all() + + rules_generated = 0 + rules_updated = 0 + + for cve in cves_with_pocs: + try: + # Check if we need to generate/update the rule + existing_rule = self.db_session.query(SigmaRule).filter( + SigmaRule.cve_id == cve.cve_id + ).first() + + if existing_rule and existing_rule.poc_source == 'nomi_sec': + # Rule already exists and is up to date + continue + + # Generate enhanced rule + rule_result = await generator.generate_enhanced_rule(cve) + + if rule_result['success']: + if existing_rule: + rules_updated += 1 + else: + rules_generated += 1 + + except Exception as e: + logger.error(f"Error generating rule for {cve.cve_id}: {e}") + continue + + self.db_session.commit() + + return { + 'rules_generated': rules_generated, + 'rules_updated': rules_updated, + 'total_processed': len(cves_with_pocs) + } + + async def _get_recently_modified_cves(self, hours: int = 24) -> list: + """Get CVEs modified within the last N hours""" + from main import CVE + + cutoff_time = datetime.utcnow() - timedelta(hours=hours) + + recent_cves = self.db_session.query(CVE).filter( + CVE.updated_at >= cutoff_time + ).all() + + return [cve.cve_id for cve in recent_cves] + + async def _sync_specific_cves(self, cve_ids: list) -> dict: + """Sync PoC data for specific CVEs""" + total_processed = 0 + total_pocs_found = 0 + + for cve_id in cve_ids: + try: + result = await self.nomi_sec_client.sync_cve_pocs(cve_id) + total_processed += 1 + total_pocs_found += result.get('pocs_found', 0) + + # Small delay to avoid overwhelming the API + await asyncio.sleep(0.5) + + except Exception as e: + logger.error(f"Error syncing PoCs for {cve_id}: {e}") + continue + + return { + 'total_processed': total_processed, + 'total_pocs_found': total_pocs_found + } + + async def get_seeding_status(self) -> dict: + """Get current seeding status and statistics""" + from main import CVE, SigmaRule, BulkProcessingJob + + # Get database statistics + total_cves = self.db_session.query(CVE).count() + bulk_processed_cves = self.db_session.query(CVE).filter( + CVE.bulk_processed == True + ).count() + + cves_with_pocs = self.db_session.query(CVE).filter( + CVE.poc_count > 0 + ).count() + + total_rules = self.db_session.query(SigmaRule).count() + nomi_sec_rules = self.db_session.query(SigmaRule).filter( + SigmaRule.poc_source == 'nomi_sec' + ).count() + + # Get recent job status + recent_jobs = self.db_session.query(BulkProcessingJob).order_by( + BulkProcessingJob.created_at.desc() + ).limit(5).all() + + job_status = [] + for job in recent_jobs: + job_status.append({ + 'id': str(job.id), + 'job_type': job.job_type, + 'status': job.status, + 'created_at': job.created_at, + 'completed_at': job.completed_at, + 'processed_items': job.processed_items, + 'total_items': job.total_items, + 'failed_items': job.failed_items + }) + + return { + 'database_stats': { + 'total_cves': total_cves, + 'bulk_processed_cves': bulk_processed_cves, + 'cves_with_pocs': cves_with_pocs, + 'total_rules': total_rules, + 'nomi_sec_rules': nomi_sec_rules, + 'poc_coverage': (cves_with_pocs / total_cves * 100) if total_cves > 0 else 0, + 'nomi_sec_coverage': (nomi_sec_rules / total_rules * 100) if total_rules > 0 else 0 + }, + 'recent_jobs': job_status, + 'nvd_data_status': await self._get_nvd_data_status(), + 'nomi_sec_status': await self.nomi_sec_client.get_sync_status() + } + + async def _get_nvd_data_status(self) -> dict: + """Get NVD data status""" + from main import CVE + + # Get year distribution + year_counts = {} + cves = self.db_session.query(CVE).all() + + for cve in cves: + if cve.published_date: + year = cve.published_date.year + year_counts[year] = year_counts.get(year, 0) + 1 + + # Get source distribution + source_counts = {} + for cve in cves: + source = cve.data_source or 'unknown' + source_counts[source] = source_counts.get(source, 0) + 1 + + return { + 'year_distribution': year_counts, + 'source_distribution': source_counts, + 'total_cves': len(cves), + 'date_range': { + 'earliest': min(cve.published_date for cve in cves if cve.published_date), + 'latest': max(cve.published_date for cve in cves if cve.published_date) + } if cves else None + } + + +# Standalone script functionality +async def main(): + """Main function for standalone execution""" + from main import SessionLocal, engine, Base + + # Create tables + Base.metadata.create_all(bind=engine) + + # Create database session + db_session = SessionLocal() + + try: + # Create bulk seeder + seeder = BulkSeeder(db_session) + + # Get current status + status = await seeder.get_seeding_status() + print(f"Current Status: {status['database_stats']['total_cves']} CVEs in database") + + # Perform full bulk seed if database is empty + if status['database_stats']['total_cves'] == 0: + print("Database is empty. Starting full bulk seed...") + results = await seeder.full_bulk_seed(start_year=2020) # Start from 2020 for faster testing + print(f"Bulk seed completed: {results}") + else: + print("Database contains data. Running incremental update...") + results = await seeder.incremental_update() + print(f"Incremental update completed: {results}") + + finally: + db_session.close() + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/backend/enhanced_sigma_generator.py b/backend/enhanced_sigma_generator.py new file mode 100644 index 0000000..c1907fe --- /dev/null +++ b/backend/enhanced_sigma_generator.py @@ -0,0 +1,438 @@ +""" +Enhanced SIGMA Rule Generator +Generates improved SIGMA rules using nomi-sec PoC data and traditional indicators +""" + +import json +import logging +from datetime import datetime +from typing import Dict, List, Optional, Tuple +from sqlalchemy.orm import Session +import re + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class EnhancedSigmaGenerator: + """Enhanced SIGMA rule generator using nomi-sec PoC data""" + + def __init__(self, db_session: Session): + self.db_session = db_session + + async def generate_enhanced_rule(self, cve) -> dict: + """Generate enhanced SIGMA rule for a CVE using PoC data""" + from main import SigmaRule, RuleTemplate + + try: + # Get PoC data + poc_data = cve.poc_data or [] + + # Find the best quality PoC + best_poc = None + if poc_data: + best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0)) + + # Select appropriate template based on PoC analysis + template = await self._select_template(cve, best_poc) + + if not template: + logger.warning(f"No suitable template found for {cve.cve_id}") + return {'success': False, 'error': 'No suitable template'} + + # Generate rule content + rule_content = await self._generate_rule_content(cve, template, poc_data) + + # Calculate confidence level + confidence_level = self._calculate_confidence_level(cve, poc_data) + + # Store or update SIGMA rule + existing_rule = self.db_session.query(SigmaRule).filter( + SigmaRule.cve_id == cve.cve_id + ).first() + + rule_data = { + 'cve_id': cve.cve_id, + 'rule_name': f"{cve.cve_id} Enhanced Detection", + 'rule_content': rule_content, + 'detection_type': template.template_name, + 'log_source': self._extract_log_source(template.template_name), + 'confidence_level': confidence_level, + 'auto_generated': True, + 'exploit_based': len(poc_data) > 0, + 'poc_source': 'nomi_sec', + 'poc_quality_score': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0, + 'nomi_sec_data': { + 'total_pocs': len(poc_data), + 'best_poc_quality': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0, + 'total_stars': sum(p.get('stargazers_count', 0) for p in poc_data), + 'avg_stars': sum(p.get('stargazers_count', 0) for p in poc_data) / len(poc_data) if poc_data else 0 + }, + 'github_repos': [p.get('html_url', '') for p in poc_data], + 'exploit_indicators': json.dumps(self._combine_exploit_indicators(poc_data)), + 'updated_at': datetime.utcnow() + } + + if existing_rule: + # Update existing rule + for key, value in rule_data.items(): + setattr(existing_rule, key, value) + logger.info(f"Updated SIGMA rule for {cve.cve_id}") + else: + # Create new rule + new_rule = SigmaRule(**rule_data) + self.db_session.add(new_rule) + logger.info(f"Created new SIGMA rule for {cve.cve_id}") + + self.db_session.commit() + + return { + 'success': True, + 'cve_id': cve.cve_id, + 'template': template.template_name, + 'confidence_level': confidence_level, + 'poc_count': len(poc_data), + 'quality_score': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0 + } + + except Exception as e: + logger.error(f"Error generating enhanced rule for {cve.cve_id}: {e}") + return {'success': False, 'error': str(e)} + + async def _select_template(self, cve, best_poc: Optional[dict]) -> Optional[object]: + """Select the most appropriate template based on CVE and PoC analysis""" + from main import RuleTemplate + + templates = self.db_session.query(RuleTemplate).all() + + if not templates: + logger.warning("No rule templates found in database") + return None + + # Score templates based on relevance + template_scores = {} + + for template in templates: + score = 0 + + # Score based on PoC indicators (highest priority) + if best_poc: + indicators = best_poc.get('exploit_indicators', {}) + score += self._score_template_poc_match(template, indicators) + + # Score based on CVE description + score += self._score_template_cve_match(template, cve) + + # Score based on affected products + if cve.affected_products: + score += self._score_template_product_match(template, cve.affected_products) + + template_scores[template] = score + + # Return template with highest score + if template_scores: + best_template = max(template_scores, key=template_scores.get) + logger.info(f"Selected template {best_template.template_name} with score {template_scores[best_template]}") + return best_template + + return None + + def _score_template_poc_match(self, template: object, indicators: dict) -> int: + """Score template based on PoC indicators""" + score = 0 + template_name = template.template_name.lower() + + # Process-based templates + if 'process' in template_name or 'execution' in template_name: + if indicators.get('processes') or indicators.get('commands'): + score += 30 + + # Network-based templates + if 'network' in template_name or 'connection' in template_name: + if indicators.get('network') or indicators.get('urls'): + score += 30 + + # File-based templates + if 'file' in template_name or 'modification' in template_name: + if indicators.get('files'): + score += 30 + + # PowerShell templates + if 'powershell' in template_name: + processes = indicators.get('processes', []) + if any('powershell' in p.lower() for p in processes): + score += 35 + + return score + + def _score_template_cve_match(self, template: object, cve) -> int: + """Score template based on CVE description""" + score = 0 + template_name = template.template_name.lower() + description = (cve.description or '').lower() + + # Keyword matching + if 'remote' in description and 'execution' in description: + if 'process' in template_name or 'execution' in template_name: + score += 20 + + if 'powershell' in description: + if 'powershell' in template_name: + score += 25 + + if 'network' in description or 'http' in description: + if 'network' in template_name: + score += 20 + + if 'file' in description or 'upload' in description: + if 'file' in template_name: + score += 20 + + return score + + def _score_template_product_match(self, template: object, affected_products: list) -> int: + """Score template based on affected products""" + score = 0 + + if not template.applicable_product_patterns: + return 0 + + for pattern in template.applicable_product_patterns: + pattern_lower = pattern.lower() + for product in affected_products: + product_lower = product.lower() + if pattern_lower in product_lower: + score += 10 + break + + return score + + async def _generate_rule_content(self, cve, template: object, poc_data: list) -> str: + """Generate the actual SIGMA rule content""" + # Combine all exploit indicators + combined_indicators = self._combine_exploit_indicators(poc_data) + + # Get base template content + rule_content = template.template_content + + # Replace template placeholders + replacements = { + '{{CVE_ID}}': cve.cve_id, + '{{TITLE}}': f"{cve.cve_id} Enhanced Detection", + '{{DESCRIPTION}}': self._generate_description(cve, poc_data), + '{{LEVEL}}': self._calculate_confidence_level(cve, poc_data).lower(), + '{{REFERENCES}}': self._generate_references(cve, poc_data), + '{{TAGS}}': self._generate_tags(cve, poc_data), + '{{PROCESSES}}': self._format_indicators(combined_indicators.get('processes', [])), + '{{FILES}}': self._format_indicators(combined_indicators.get('files', [])), + '{{COMMANDS}}': self._format_indicators(combined_indicators.get('commands', [])), + '{{NETWORK}}': self._format_indicators(combined_indicators.get('network', [])), + '{{URLS}}': self._format_indicators(combined_indicators.get('urls', [])), + '{{REGISTRY}}': self._format_indicators(combined_indicators.get('registry', [])) + } + + # Apply replacements + for placeholder, value in replacements.items(): + rule_content = rule_content.replace(placeholder, value) + + # Add enhanced detection based on PoC quality + if poc_data: + rule_content = self._enhance_detection_logic(rule_content, combined_indicators, poc_data) + + return rule_content + + def _combine_exploit_indicators(self, poc_data: list) -> dict: + """Combine exploit indicators from all PoCs""" + combined = { + 'processes': [], + 'files': [], + 'commands': [], + 'network': [], + 'urls': [], + 'registry': [] + } + + for poc in poc_data: + indicators = poc.get('exploit_indicators', {}) + for key in combined.keys(): + if key in indicators: + combined[key].extend(indicators[key]) + + # Deduplicate and filter + for key in combined.keys(): + combined[key] = list(set(combined[key])) + # Remove empty and invalid entries + combined[key] = [item for item in combined[key] if item and len(item) > 2] + + return combined + + def _generate_description(self, cve, poc_data: list) -> str: + """Generate enhanced rule description""" + base_desc = f"Detection for {cve.cve_id}" + + if cve.description: + # Extract key terms from CVE description + desc_words = cve.description.lower().split() + key_terms = [word for word in desc_words if word in [ + 'remote', 'execution', 'injection', 'bypass', 'privilege', 'escalation', + 'overflow', 'disclosure', 'traversal', 'deserialization' + ]] + + if key_terms: + base_desc += f" involving {', '.join(set(key_terms[:3]))}" + + if poc_data: + total_pocs = len(poc_data) + total_stars = sum(p.get('stargazers_count', 0) for p in poc_data) + base_desc += f" [Enhanced with {total_pocs} PoC(s), {total_stars} stars]" + + return base_desc + + def _generate_references(self, cve, poc_data: list) -> str: + """Generate references section""" + refs = [] + + # Add CVE reference + refs.append(f"https://nvd.nist.gov/vuln/detail/{cve.cve_id}") + + # Add top PoC references (max 3) + if poc_data: + sorted_pocs = sorted(poc_data, key=lambda x: x.get('stargazers_count', 0), reverse=True) + for poc in sorted_pocs[:3]: + if poc.get('html_url'): + refs.append(poc['html_url']) + + return '\\n'.join(f" - {ref}" for ref in refs) + + def _generate_tags(self, cve, poc_data: list) -> str: + """Generate MITRE ATT&CK tags and other tags""" + tags = [] + + # CVE tag + tags.append(cve.cve_id.lower()) + + # Add technique tags based on indicators + combined_indicators = self._combine_exploit_indicators(poc_data) + + if combined_indicators.get('processes'): + tags.append('attack.t1059') # Command and Scripting Interpreter + + if combined_indicators.get('network'): + tags.append('attack.t1071') # Application Layer Protocol + + if combined_indicators.get('files'): + tags.append('attack.t1105') # Ingress Tool Transfer + + if any('powershell' in p.lower() for p in combined_indicators.get('processes', [])): + tags.append('attack.t1059.001') # PowerShell + + # Add PoC quality tags + if poc_data: + tags.append('exploit.poc') + best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0)) + quality_tier = best_poc.get('quality_analysis', {}).get('quality_tier', 'poor') + tags.append(f'poc.quality.{quality_tier}') + + return '\\n'.join(f" - {tag}" for tag in tags) + + def _format_indicators(self, indicators: list) -> str: + """Format indicators for SIGMA rule""" + if not indicators: + return '' + + # Limit indicators to avoid overly complex rules + limited_indicators = indicators[:10] + + formatted = [] + for indicator in limited_indicators: + # Escape special characters for SIGMA + escaped = indicator.replace('\\\\', '\\\\\\\\').replace('*', '\\\\*').replace('?', '\\\\?') + formatted.append(f' - "{escaped}"') + + return '\\n'.join(formatted) + + def _enhance_detection_logic(self, rule_content: str, indicators: dict, poc_data: list) -> str: + """Enhance detection logic based on PoC quality and indicators""" + + # If we have high-quality PoCs, add additional detection conditions + best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0)) + quality_score = best_poc.get('quality_analysis', {}).get('quality_score', 0) + + if quality_score > 60: # High quality PoC + # Add more specific detection conditions + if indicators.get('processes') and indicators.get('commands'): + additional_condition = """ + process_and_command: + Image|contains: {{PROCESSES}} + CommandLine|contains: {{COMMANDS}}""" + + # Insert before the condition line + rule_content = rule_content.replace( + 'condition: selection', + additional_condition + '\\n condition: selection or process_and_command' + ) + + return rule_content + + def _calculate_confidence_level(self, cve, poc_data: list) -> str: + """Calculate confidence level based on CVE and PoC data""" + score = 0 + + # CVSS score factor + if cve.cvss_score: + if cve.cvss_score >= 9.0: + score += 40 + elif cve.cvss_score >= 7.0: + score += 30 + elif cve.cvss_score >= 5.0: + score += 20 + else: + score += 10 + + # PoC quality factor + if poc_data: + total_stars = sum(p.get('stargazers_count', 0) for p in poc_data) + poc_count = len(poc_data) + + score += min(total_stars, 30) # Max 30 points for stars + score += min(poc_count * 5, 20) # Max 20 points for PoC count + + # Quality tier bonus + best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0)) + quality_tier = best_poc.get('quality_analysis', {}).get('quality_tier', 'poor') + + tier_bonus = { + 'excellent': 20, + 'good': 15, + 'fair': 10, + 'poor': 5, + 'very_poor': 0 + } + score += tier_bonus.get(quality_tier, 0) + + # Determine confidence level + if score >= 80: + return 'HIGH' + elif score >= 60: + return 'MEDIUM' + elif score >= 40: + return 'LOW' + else: + return 'INFORMATIONAL' + + def _extract_log_source(self, template_name: str) -> str: + """Extract log source from template name""" + template_lower = template_name.lower() + + if 'process' in template_lower or 'execution' in template_lower: + return 'process_creation' + elif 'network' in template_lower: + return 'network_connection' + elif 'file' in template_lower: + return 'file_event' + elif 'powershell' in template_lower: + return 'powershell' + elif 'registry' in template_lower: + return 'registry_event' + else: + return 'generic' \ No newline at end of file diff --git a/backend/main.py b/backend/main.py index d1ff519..caf5acd 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1,7 +1,7 @@ from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse -from sqlalchemy import create_engine, Column, String, Text, DECIMAL, TIMESTAMP, Boolean, ARRAY +from sqlalchemy import create_engine, Column, String, Text, DECIMAL, TIMESTAMP, Boolean, ARRAY, Integer, JSON, func from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker, Session from sqlalchemy.dialects.postgresql import UUID @@ -19,6 +19,16 @@ import base64 from github import Github from urllib.parse import urlparse import hashlib +import logging +import threading + +# Setup logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Global job tracking +running_jobs = {} +job_cancellation_flags = {} # Database setup DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://cve_user:cve_password@localhost:5432/cve_sigma_db") @@ -39,6 +49,13 @@ class CVE(Base): modified_date = Column(TIMESTAMP) affected_products = Column(ARRAY(String)) reference_urls = Column(ARRAY(String)) + # Bulk processing fields + data_source = Column(String(20), default='nvd_api') # 'nvd_api', 'nvd_bulk', 'manual' + nvd_json_version = Column(String(10), default='2.0') + bulk_processed = Column(Boolean, default=False) + # nomi-sec PoC fields + poc_count = Column(Integer, default=0) + poc_data = Column(JSON) # Store nomi-sec PoC metadata created_at = Column(TIMESTAMP, default=datetime.utcnow) updated_at = Column(TIMESTAMP, default=datetime.utcnow) @@ -56,6 +73,10 @@ class SigmaRule(Base): exploit_based = Column(Boolean, default=False) github_repos = Column(ARRAY(String)) exploit_indicators = Column(Text) # JSON string of extracted indicators + # Enhanced fields for new data sources + poc_source = Column(String(20), default='github_search') # 'github_search', 'nomi_sec', 'manual' + poc_quality_score = Column(Integer, default=0) # Based on star count, activity, etc. + nomi_sec_data = Column(JSON) # Store nomi-sec PoC metadata created_at = Column(TIMESTAMP, default=datetime.utcnow) updated_at = Column(TIMESTAMP, default=datetime.utcnow) @@ -69,6 +90,23 @@ class RuleTemplate(Base): description = Column(Text) created_at = Column(TIMESTAMP, default=datetime.utcnow) +class BulkProcessingJob(Base): + __tablename__ = "bulk_processing_jobs" + + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + job_type = Column(String(50), nullable=False) # 'nvd_bulk_seed', 'nomi_sec_sync', 'incremental_update' + status = Column(String(20), default='pending') # 'pending', 'running', 'completed', 'failed', 'cancelled' + year = Column(Integer) # For year-based processing + total_items = Column(Integer, default=0) + processed_items = Column(Integer, default=0) + failed_items = Column(Integer, default=0) + error_message = Column(Text) + job_metadata = Column(JSON) # Additional job-specific data + started_at = Column(TIMESTAMP) + completed_at = Column(TIMESTAMP) + cancelled_at = Column(TIMESTAMP) + created_at = Column(TIMESTAMP, default=datetime.utcnow) + # Pydantic models class CVEResponse(BaseModel): id: str @@ -941,12 +979,341 @@ async def get_stats(db: Session = Depends(get_db)): total_rules = db.query(SigmaRule).count() recent_cves = db.query(CVE).filter(CVE.published_date >= datetime.utcnow() - timedelta(days=7)).count() + # Enhanced stats with bulk processing info + bulk_processed_cves = db.query(CVE).filter(CVE.bulk_processed == True).count() + cves_with_pocs = db.query(CVE).filter(CVE.poc_count > 0).count() + nomi_sec_rules = db.query(SigmaRule).filter(SigmaRule.poc_source == 'nomi_sec').count() + return { "total_cves": total_cves, "total_sigma_rules": total_rules, - "recent_cves_7_days": recent_cves + "recent_cves_7_days": recent_cves, + "bulk_processed_cves": bulk_processed_cves, + "cves_with_pocs": cves_with_pocs, + "nomi_sec_rules": nomi_sec_rules, + "poc_coverage": (cves_with_pocs / total_cves * 100) if total_cves > 0 else 0, + "nomi_sec_coverage": (nomi_sec_rules / total_rules * 100) if total_rules > 0 else 0 } +# New bulk processing endpoints +@app.post("/api/bulk-seed") +async def start_bulk_seed(background_tasks: BackgroundTasks, + start_year: int = 2002, + end_year: Optional[int] = None, + skip_nvd: bool = False, + skip_nomi_sec: bool = False, + db: Session = Depends(get_db)): + """Start bulk seeding process""" + + async def bulk_seed_task(): + try: + from bulk_seeder import BulkSeeder + seeder = BulkSeeder(db) + result = await seeder.full_bulk_seed( + start_year=start_year, + end_year=end_year, + skip_nvd=skip_nvd, + skip_nomi_sec=skip_nomi_sec + ) + logger.info(f"Bulk seed completed: {result}") + except Exception as e: + logger.error(f"Bulk seed failed: {e}") + import traceback + traceback.print_exc() + + background_tasks.add_task(bulk_seed_task) + + return { + "message": "Bulk seeding process started", + "status": "started", + "start_year": start_year, + "end_year": end_year or datetime.now().year, + "skip_nvd": skip_nvd, + "skip_nomi_sec": skip_nomi_sec + } + +@app.post("/api/incremental-update") +async def start_incremental_update(background_tasks: BackgroundTasks, db: Session = Depends(get_db)): + """Start incremental update process""" + + async def incremental_update_task(): + try: + from bulk_seeder import BulkSeeder + seeder = BulkSeeder(db) + result = await seeder.incremental_update() + logger.info(f"Incremental update completed: {result}") + except Exception as e: + logger.error(f"Incremental update failed: {e}") + import traceback + traceback.print_exc() + + background_tasks.add_task(incremental_update_task) + + return { + "message": "Incremental update process started", + "status": "started" + } + +@app.post("/api/sync-nomi-sec") +async def sync_nomi_sec(background_tasks: BackgroundTasks, + cve_id: Optional[str] = None, + batch_size: int = 50, + db: Session = Depends(get_db)): + """Synchronize nomi-sec PoC data""" + + # Create job record + job = BulkProcessingJob( + job_type='nomi_sec_sync', + status='pending', + job_metadata={ + 'cve_id': cve_id, + 'batch_size': batch_size + } + ) + db.add(job) + db.commit() + db.refresh(job) + + job_id = str(job.id) + running_jobs[job_id] = job + job_cancellation_flags[job_id] = False + + async def sync_task(): + try: + job.status = 'running' + job.started_at = datetime.utcnow() + db.commit() + + from nomi_sec_client import NomiSecClient + client = NomiSecClient(db) + + if cve_id: + # Sync specific CVE + if job_cancellation_flags.get(job_id, False): + logger.info(f"Job {job_id} cancelled before starting") + return + + result = await client.sync_cve_pocs(cve_id) + logger.info(f"Nomi-sec sync for {cve_id}: {result}") + else: + # Sync all CVEs with cancellation support + result = await client.bulk_sync_all_cves( + batch_size=batch_size, + cancellation_flag=lambda: job_cancellation_flags.get(job_id, False) + ) + logger.info(f"Nomi-sec bulk sync completed: {result}") + + # Update job status if not cancelled + if not job_cancellation_flags.get(job_id, False): + job.status = 'completed' + job.completed_at = datetime.utcnow() + db.commit() + + except Exception as e: + if not job_cancellation_flags.get(job_id, False): + job.status = 'failed' + job.error_message = str(e) + job.completed_at = datetime.utcnow() + db.commit() + + logger.error(f"Nomi-sec sync failed: {e}") + import traceback + traceback.print_exc() + finally: + # Clean up tracking + running_jobs.pop(job_id, None) + job_cancellation_flags.pop(job_id, None) + + background_tasks.add_task(sync_task) + + return { + "message": f"Nomi-sec sync started" + (f" for {cve_id}" if cve_id else " for all CVEs"), + "status": "started", + "job_id": job_id, + "cve_id": cve_id, + "batch_size": batch_size + } + +@app.get("/api/bulk-jobs") +async def get_bulk_jobs(limit: int = 10, db: Session = Depends(get_db)): + """Get bulk processing job status""" + + jobs = db.query(BulkProcessingJob).order_by( + BulkProcessingJob.created_at.desc() + ).limit(limit).all() + + result = [] + for job in jobs: + job_dict = { + 'id': str(job.id), + 'job_type': job.job_type, + 'status': job.status, + 'year': job.year, + 'total_items': job.total_items, + 'processed_items': job.processed_items, + 'failed_items': job.failed_items, + 'error_message': job.error_message, + 'metadata': job.job_metadata, + 'started_at': job.started_at, + 'completed_at': job.completed_at, + 'created_at': job.created_at + } + result.append(job_dict) + + return result + +@app.get("/api/bulk-status") +async def get_bulk_status(db: Session = Depends(get_db)): + """Get comprehensive bulk processing status""" + + try: + from bulk_seeder import BulkSeeder + seeder = BulkSeeder(db) + status = await seeder.get_seeding_status() + return status + except Exception as e: + logger.error(f"Error getting bulk status: {e}") + return {"error": str(e)} + +@app.get("/api/poc-stats") +async def get_poc_stats(db: Session = Depends(get_db)): + """Get PoC-related statistics""" + + try: + from nomi_sec_client import NomiSecClient + client = NomiSecClient(db) + stats = await client.get_sync_status() + + # Additional PoC statistics + high_quality_cves = db.query(CVE).filter( + CVE.poc_count > 0, + func.json_extract_path_text(CVE.poc_data, '0', 'quality_analysis', 'quality_score').cast(Integer) > 60 + ).count() + + stats.update({ + 'high_quality_cves': high_quality_cves, + 'avg_poc_count': db.query(func.avg(CVE.poc_count)).filter(CVE.poc_count > 0).scalar() or 0 + }) + + return stats + except Exception as e: + logger.error(f"Error getting PoC stats: {e}") + return {"error": str(e)} + +@app.post("/api/regenerate-rules") +async def regenerate_sigma_rules(background_tasks: BackgroundTasks, + force: bool = False, + db: Session = Depends(get_db)): + """Regenerate SIGMA rules using enhanced nomi-sec data""" + + async def regenerate_task(): + try: + from enhanced_sigma_generator import EnhancedSigmaGenerator + generator = EnhancedSigmaGenerator(db) + + # Get CVEs with PoC data + cves_with_pocs = db.query(CVE).filter(CVE.poc_count > 0).all() + + rules_generated = 0 + rules_updated = 0 + + for cve in cves_with_pocs: + # Check if we should regenerate + existing_rule = db.query(SigmaRule).filter( + SigmaRule.cve_id == cve.cve_id + ).first() + + if existing_rule and existing_rule.poc_source == 'nomi_sec' and not force: + continue + + # Generate enhanced rule + result = await generator.generate_enhanced_rule(cve) + + if result['success']: + if existing_rule: + rules_updated += 1 + else: + rules_generated += 1 + + logger.info(f"Rule regeneration completed: {rules_generated} new, {rules_updated} updated") + + except Exception as e: + logger.error(f"Rule regeneration failed: {e}") + import traceback + traceback.print_exc() + + background_tasks.add_task(regenerate_task) + + return { + "message": "SIGMA rule regeneration started", + "status": "started", + "force": force + } + +@app.post("/api/cancel-job/{job_id}") +async def cancel_job(job_id: str, db: Session = Depends(get_db)): + """Cancel a running job""" + try: + # Find the job in the database + job = db.query(BulkProcessingJob).filter(BulkProcessingJob.id == job_id).first() + if not job: + raise HTTPException(status_code=404, detail="Job not found") + + if job.status not in ['pending', 'running']: + raise HTTPException(status_code=400, detail=f"Cannot cancel job with status: {job.status}") + + # Set cancellation flag + job_cancellation_flags[job_id] = True + + # Update job status + job.status = 'cancelled' + job.cancelled_at = datetime.utcnow() + job.error_message = "Job cancelled by user" + + db.commit() + + logger.info(f"Job {job_id} cancellation requested") + + return { + "message": f"Job {job_id} cancellation requested", + "status": "cancelled", + "job_id": job_id + } + except HTTPException: + raise + except Exception as e: + logger.error(f"Error cancelling job {job_id}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/api/running-jobs") +async def get_running_jobs(db: Session = Depends(get_db)): + """Get all currently running jobs""" + try: + jobs = db.query(BulkProcessingJob).filter( + BulkProcessingJob.status.in_(['pending', 'running']) + ).order_by(BulkProcessingJob.created_at.desc()).all() + + result = [] + for job in jobs: + result.append({ + 'id': str(job.id), + 'job_type': job.job_type, + 'status': job.status, + 'year': job.year, + 'total_items': job.total_items, + 'processed_items': job.processed_items, + 'failed_items': job.failed_items, + 'error_message': job.error_message, + 'started_at': job.started_at, + 'created_at': job.created_at, + 'can_cancel': job.status in ['pending', 'running'] + }) + + return result + except Exception as e: + logger.error(f"Error getting running jobs: {e}") + raise HTTPException(status_code=500, detail=str(e)) + if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/backend/nomi_sec_client.py b/backend/nomi_sec_client.py new file mode 100644 index 0000000..27c8731 --- /dev/null +++ b/backend/nomi_sec_client.py @@ -0,0 +1,477 @@ +""" +Nomi-sec PoC-in-GitHub Integration Client +Interfaces with the nomi-sec PoC-in-GitHub API for curated exploit data +""" + +import aiohttp +import asyncio +import json +import logging +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple +from sqlalchemy.orm import Session +from sqlalchemy import and_, or_ +import time +import re + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class NomiSecClient: + """Client for interacting with nomi-sec PoC-in-GitHub API""" + + def __init__(self, db_session: Session): + self.db_session = db_session + self.base_url = "https://poc-in-github.motikan2010.net/api/v1" + self.rss_url = "https://poc-in-github.motikan2010.net/rss" + + # Rate limiting + self.rate_limit_delay = 1.0 # 1 second between requests + self.last_request_time = 0 + + # Cache for recently fetched data + self.cache = {} + self.cache_ttl = 300 # 5 minutes + + async def _make_request(self, session: aiohttp.ClientSession, + url: str, params: dict = None) -> Optional[dict]: + """Make a rate-limited request to the API""" + try: + # Rate limiting + current_time = time.time() + time_since_last = current_time - self.last_request_time + if time_since_last < self.rate_limit_delay: + await asyncio.sleep(self.rate_limit_delay - time_since_last) + + async with session.get(url, params=params, timeout=30) as response: + self.last_request_time = time.time() + + if response.status == 200: + return await response.json() + else: + logger.warning(f"API request failed: {response.status} for {url}") + return None + + except Exception as e: + logger.error(f"Error making request to {url}: {e}") + return None + + async def get_pocs_for_cve(self, cve_id: str) -> List[dict]: + """Get all PoC repositories for a specific CVE""" + cache_key = f"cve_{cve_id}" + + # Check cache + if cache_key in self.cache: + cached_data, timestamp = self.cache[cache_key] + if time.time() - timestamp < self.cache_ttl: + return cached_data + + async with aiohttp.ClientSession() as session: + params = {"cve_id": cve_id} + data = await self._make_request(session, self.base_url, params) + + if data and "pocs" in data: + pocs = data["pocs"] + # Cache the result + self.cache[cache_key] = (pocs, time.time()) + logger.info(f"Found {len(pocs)} PoCs for {cve_id}") + return pocs + else: + logger.info(f"No PoCs found for {cve_id}") + return [] + + async def get_recent_pocs(self, limit: int = 100) -> List[dict]: + """Get recent PoCs from the API""" + async with aiohttp.ClientSession() as session: + params = {"limit": limit, "sort": "created_at"} + data = await self._make_request(session, self.base_url, params) + + if data and "pocs" in data: + return data["pocs"] + else: + return [] + + async def get_high_quality_pocs(self, min_stars: int = 5, limit: int = 100) -> List[dict]: + """Get high-quality PoCs sorted by star count""" + async with aiohttp.ClientSession() as session: + params = {"limit": limit, "sort": "stargazers_count"} + data = await self._make_request(session, self.base_url, params) + + if data and "pocs" in data: + # Filter by star count + filtered_pocs = [ + poc for poc in data["pocs"] + if int(poc.get("stargazers_count", "0")) >= min_stars + ] + return filtered_pocs + else: + return [] + + async def search_pocs(self, query: str, limit: int = 50) -> List[dict]: + """Search for PoCs using a query string""" + async with aiohttp.ClientSession() as session: + params = {"limit": limit, "q": query} + data = await self._make_request(session, self.base_url, params) + + if data and "pocs" in data: + return data["pocs"] + else: + return [] + + def analyze_poc_quality(self, poc: dict) -> dict: + """Analyze the quality of a PoC repository""" + quality_score = 0 + factors = {} + + # Star count factor (0-40 points) + stars = int(poc.get("stargazers_count", "0")) + star_score = min(stars * 2, 40) # 2 points per star, max 40 + quality_score += star_score + factors["star_score"] = star_score + + # Recency factor (0-20 points) + try: + updated_at = datetime.fromisoformat(poc.get("updated_at", "").replace('Z', '+00:00')) + days_old = (datetime.now(updated_at.tzinfo) - updated_at).days + recency_score = max(20 - (days_old // 30), 0) # Lose 1 point per month + quality_score += recency_score + factors["recency_score"] = recency_score + except: + factors["recency_score"] = 0 + + # Description quality factor (0-15 points) + description = poc.get("description", "") + desc_score = 0 + if description: + desc_score = min(len(description) // 10, 15) # 1 point per 10 chars, max 15 + quality_score += desc_score + factors["description_score"] = desc_score + + # Vulnerability description factor (0-15 points) + vuln_desc = poc.get("vuln_description", "") + vuln_score = 0 + if vuln_desc: + vuln_score = min(len(vuln_desc) // 20, 15) # 1 point per 20 chars, max 15 + quality_score += vuln_score + factors["vuln_description_score"] = vuln_score + + # Repository name relevance factor (0-10 points) + repo_name = poc.get("name", "").lower() + cve_id = poc.get("cve_id", "").lower() + name_score = 0 + if cve_id and cve_id.replace("-", "") in repo_name.replace("-", ""): + name_score = 10 + elif any(keyword in repo_name for keyword in ["exploit", "poc", "cve", "vuln"]): + name_score = 5 + quality_score += name_score + factors["name_relevance_score"] = name_score + + return { + "quality_score": quality_score, + "factors": factors, + "quality_tier": self._get_quality_tier(quality_score) + } + + def _get_quality_tier(self, score: int) -> str: + """Get quality tier based on score""" + if score >= 80: + return "excellent" + elif score >= 60: + return "good" + elif score >= 40: + return "fair" + elif score >= 20: + return "poor" + else: + return "very_poor" + + def extract_exploit_indicators(self, poc: dict) -> dict: + """Extract exploit indicators from PoC metadata""" + indicators = { + "processes": [], + "files": [], + "network": [], + "registry": [], + "commands": [], + "urls": [], + "techniques": [] + } + + # Extract from description and vulnerability description + text_sources = [ + poc.get("description", ""), + poc.get("vuln_description", ""), + poc.get("name", "") + ] + + full_text = " ".join(text_sources).lower() + + # Process patterns + process_patterns = [ + r'\b(cmd\.exe|powershell\.exe|bash|sh|python\.exe|java\.exe)\b', + r'\b(createprocess|shellexecute|system)\b', + r'\b(reverse.?shell|bind.?shell)\b' + ] + + for pattern in process_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + indicators["processes"].extend(matches) + + # File patterns + file_patterns = [ + r'\b([a-zA-Z]:\\[^\\]+\\[^\\]+\.[a-zA-Z0-9]+)\b', # Windows paths + r'\b(/[^/\s]+/[^/\s]+\.[a-zA-Z0-9]+)\b', # Unix paths + r'\b(\w+\.(exe|dll|bat|ps1|py|sh|jar))\b' # Common executable files + ] + + for pattern in file_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + if isinstance(matches[0], tuple) if matches else False: + indicators["files"].extend([m[0] for m in matches]) + else: + indicators["files"].extend(matches) + + # Network patterns + network_patterns = [ + r'\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b', # IP addresses + r'\b((?:\d{1,5})|(?:0x[a-fA-F0-9]{1,4}))\b', # Ports + r'\b(http[s]?://[^\s]+)\b' # URLs + ] + + for pattern in network_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + if pattern.startswith(r'\b(http'): + indicators["urls"].extend(matches) + else: + indicators["network"].extend(matches) + + # Command patterns + command_patterns = [ + r'\b(curl|wget|nc|netcat|ncat)\b', + r'\b(whoami|id|uname|systeminfo)\b', + r'\b(cat|type|more|less)\b' + ] + + for pattern in command_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + indicators["commands"].extend(matches) + + # Clean up and deduplicate + for key in indicators: + indicators[key] = list(set(indicators[key])) + + return indicators + + async def sync_cve_pocs(self, cve_id: str) -> dict: + """Synchronize PoC data for a specific CVE""" + from main import CVE, SigmaRule + + # Get existing CVE + cve = self.db_session.query(CVE).filter(CVE.cve_id == cve_id).first() + if not cve: + logger.warning(f"CVE {cve_id} not found in database") + return {"error": "CVE not found"} + + # Fetch PoCs from nomi-sec API + pocs = await self.get_pocs_for_cve(cve_id) + + if not pocs: + logger.info(f"No PoCs found for {cve_id}") + return {"cve_id": cve_id, "pocs_found": 0} + + # Analyze and store PoC data + poc_data = [] + github_repos = [] + total_quality_score = 0 + + for poc in pocs: + quality_analysis = self.analyze_poc_quality(poc) + exploit_indicators = self.extract_exploit_indicators(poc) + + poc_entry = { + "id": poc.get("id"), + "name": poc.get("name"), + "owner": poc.get("owner"), + "full_name": poc.get("full_name"), + "html_url": poc.get("html_url"), + "description": poc.get("description"), + "stargazers_count": int(poc.get("stargazers_count", "0")), + "created_at": poc.get("created_at"), + "updated_at": poc.get("updated_at"), + "quality_analysis": quality_analysis, + "exploit_indicators": exploit_indicators + } + + poc_data.append(poc_entry) + github_repos.append(poc.get("html_url", "")) + total_quality_score += quality_analysis["quality_score"] + + # Update CVE with PoC data + cve.poc_count = len(pocs) + cve.poc_data = poc_data + cve.updated_at = datetime.utcnow() + + # Update or create SIGMA rule with enhanced PoC data + sigma_rule = self.db_session.query(SigmaRule).filter( + SigmaRule.cve_id == cve_id + ).first() + + if sigma_rule: + sigma_rule.poc_source = 'nomi_sec' + sigma_rule.poc_quality_score = total_quality_score // len(pocs) if pocs else 0 + sigma_rule.nomi_sec_data = { + "total_pocs": len(pocs), + "average_quality": total_quality_score // len(pocs) if pocs else 0, + "best_poc": max(poc_data, key=lambda x: x["quality_analysis"]["quality_score"]) if poc_data else None, + "total_stars": sum(p["stargazers_count"] for p in poc_data) + } + sigma_rule.github_repos = github_repos + sigma_rule.updated_at = datetime.utcnow() + + # Extract best exploit indicators + best_indicators = {} + for poc in poc_data: + for key, values in poc["exploit_indicators"].items(): + if key not in best_indicators: + best_indicators[key] = [] + best_indicators[key].extend(values) + + # Deduplicate and store + for key in best_indicators: + best_indicators[key] = list(set(best_indicators[key])) + + sigma_rule.exploit_indicators = json.dumps(best_indicators) + + self.db_session.commit() + + logger.info(f"Synchronized {len(pocs)} PoCs for {cve_id}") + + return { + "cve_id": cve_id, + "pocs_found": len(pocs), + "total_quality_score": total_quality_score, + "average_quality": total_quality_score // len(pocs) if pocs else 0, + "github_repos": github_repos + } + + async def bulk_sync_all_cves(self, batch_size: int = 100, cancellation_flag: Optional[callable] = None) -> dict: + """Synchronize PoC data for all CVEs in database""" + from main import CVE, BulkProcessingJob + + # Create bulk processing job + job = BulkProcessingJob( + job_type='nomi_sec_sync', + status='running', + started_at=datetime.utcnow(), + job_metadata={'batch_size': batch_size} + ) + self.db_session.add(job) + self.db_session.commit() + + total_processed = 0 + total_found = 0 + results = [] + + try: + # Get all CVEs from database + cves = self.db_session.query(CVE).all() + job.total_items = len(cves) + self.db_session.commit() + + # Process in batches + for i in range(0, len(cves), batch_size): + # Check for cancellation before each batch + if cancellation_flag and cancellation_flag(): + logger.info("Bulk sync cancelled by user") + job.status = 'cancelled' + job.cancelled_at = datetime.utcnow() + job.error_message = "Job cancelled by user" + break + + batch = cves[i:i + batch_size] + + for cve in batch: + # Check for cancellation before each CVE + if cancellation_flag and cancellation_flag(): + logger.info("Bulk sync cancelled by user") + job.status = 'cancelled' + job.cancelled_at = datetime.utcnow() + job.error_message = "Job cancelled by user" + break + + try: + result = await self.sync_cve_pocs(cve.cve_id) + total_processed += 1 + + if result.get("pocs_found", 0) > 0: + total_found += result["pocs_found"] + results.append(result) + + job.processed_items += 1 + + # Small delay to avoid overwhelming the API + await asyncio.sleep(0.5) + + except Exception as e: + logger.error(f"Error syncing PoCs for {cve.cve_id}: {e}") + job.failed_items += 1 + + # Break out of outer loop if cancelled + if job.status == 'cancelled': + break + + # Commit after each batch + self.db_session.commit() + logger.info(f"Processed batch {i//batch_size + 1}/{(len(cves) + batch_size - 1)//batch_size}") + + # Update job status (only if not cancelled) + if job.status != 'cancelled': + job.status = 'completed' + job.completed_at = datetime.utcnow() + + job.job_metadata.update({ + 'total_processed': total_processed, + 'total_pocs_found': total_found, + 'cves_with_pocs': len(results) + }) + + except Exception as e: + job.status = 'failed' + job.error_message = str(e) + job.completed_at = datetime.utcnow() + logger.error(f"Bulk PoC sync job failed: {e}") + + finally: + self.db_session.commit() + + return { + 'job_id': str(job.id), + 'status': job.status, + 'total_processed': total_processed, + 'total_pocs_found': total_found, + 'cves_with_pocs': len(results) + } + + async def get_sync_status(self) -> dict: + """Get synchronization status""" + from main import CVE, SigmaRule + + # Count CVEs with PoC data + total_cves = self.db_session.query(CVE).count() + cves_with_pocs = self.db_session.query(CVE).filter(CVE.poc_count > 0).count() + + # Count SIGMA rules with nomi-sec data + total_rules = self.db_session.query(SigmaRule).count() + rules_with_nomi_sec = self.db_session.query(SigmaRule).filter( + SigmaRule.poc_source == 'nomi_sec' + ).count() + + return { + 'total_cves': total_cves, + 'cves_with_pocs': cves_with_pocs, + 'poc_coverage': (cves_with_pocs / total_cves * 100) if total_cves > 0 else 0, + 'total_rules': total_rules, + 'rules_with_nomi_sec': rules_with_nomi_sec, + 'nomi_sec_coverage': (rules_with_nomi_sec / total_rules * 100) if total_rules > 0 else 0 + } \ No newline at end of file diff --git a/backend/nvd_bulk_processor.py b/backend/nvd_bulk_processor.py new file mode 100644 index 0000000..6b5e938 --- /dev/null +++ b/backend/nvd_bulk_processor.py @@ -0,0 +1,483 @@ +""" +NVD JSON Dataset Bulk Processor +Downloads and processes NVD JSON data feeds for comprehensive CVE seeding +""" + +import requests +import json +import gzip +import zipfile +import os +import logging +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple +from sqlalchemy.orm import Session +from sqlalchemy import and_, or_ +import asyncio +import aiohttp +from pathlib import Path +import hashlib +import time + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class NVDBulkProcessor: + """Handles bulk downloading and processing of NVD JSON data feeds""" + + def __init__(self, db_session: Session, data_dir: str = "./nvd_data"): + self.db_session = db_session + self.data_dir = Path(data_dir) + self.data_dir.mkdir(exist_ok=True) + self.api_key = os.getenv("NVD_API_KEY") + + # NVD JSON 2.0 feed URLs + self.base_url = "https://nvd.nist.gov/feeds/json/cve/1.1" + self.feed_urls = { + "modified": f"{self.base_url}/nvdcve-1.1-modified.json.gz", + "recent": f"{self.base_url}/nvdcve-1.1-recent.json.gz" + } + + # Rate limiting + self.rate_limit_delay = 0.6 # 600ms between requests + self.last_request_time = 0 + + def get_year_feed_url(self, year: int) -> str: + """Get the URL for a specific year's CVE feed""" + return f"{self.base_url}/nvdcve-1.1-{year}.json.gz" + + def get_meta_url(self, feed_url: str) -> str: + """Get the metadata URL for a feed""" + return feed_url.replace(".json.gz", ".meta") + + async def download_file(self, session: aiohttp.ClientSession, url: str, + destination: Path, check_meta: bool = True) -> bool: + """Download a file with metadata checking""" + try: + # Check if we should download based on metadata + if check_meta: + meta_url = self.get_meta_url(url) + should_download = await self._should_download_file(session, meta_url, destination) + if not should_download: + logger.info(f"Skipping {url} - file is up to date") + return True + + # Rate limiting + current_time = time.time() + time_since_last = current_time - self.last_request_time + if time_since_last < self.rate_limit_delay: + await asyncio.sleep(self.rate_limit_delay - time_since_last) + + # Download the file + headers = {} + if self.api_key: + headers["apiKey"] = self.api_key + + async with session.get(url, headers=headers, timeout=30) as response: + if response.status == 200: + content = await response.read() + destination.write_bytes(content) + logger.info(f"Downloaded {url} -> {destination}") + self.last_request_time = time.time() + return True + else: + logger.error(f"Failed to download {url}: HTTP {response.status}") + return False + + except Exception as e: + logger.error(f"Error downloading {url}: {e}") + return False + + async def _should_download_file(self, session: aiohttp.ClientSession, + meta_url: str, destination: Path) -> bool: + """Check if file should be downloaded based on metadata""" + try: + # Download metadata + async with session.get(meta_url, timeout=10) as response: + if response.status != 200: + return True # Download if we can't get metadata + + meta_content = await response.text() + + # Parse metadata + meta_data = {} + for line in meta_content.strip().split('\n'): + if ':' in line: + key, value = line.split(':', 1) + meta_data[key.strip()] = value.strip() + + # Check if local file exists and matches + if destination.exists(): + local_size = destination.stat().st_size + remote_size = int(meta_data.get('size', 0)) + remote_sha256 = meta_data.get('sha256', '') + + if local_size == remote_size and remote_sha256: + # Verify SHA256 if available + local_sha256 = self._calculate_sha256(destination) + if local_sha256 == remote_sha256: + return False # File is up to date + + return True # Download needed + + except Exception as e: + logger.warning(f"Error checking metadata for {meta_url}: {e}") + return True # Download if metadata check fails + + def _calculate_sha256(self, file_path: Path) -> str: + """Calculate SHA256 hash of a file""" + sha256_hash = hashlib.sha256() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + sha256_hash.update(chunk) + return sha256_hash.hexdigest() + + async def download_all_feeds(self, start_year: int = 2002, + end_year: Optional[int] = None) -> List[Path]: + """Download all NVD JSON feeds""" + if end_year is None: + end_year = datetime.now().year + + downloaded_files = [] + + async with aiohttp.ClientSession() as session: + # Download year-based feeds + for year in range(start_year, end_year + 1): + url = self.get_year_feed_url(year) + filename = f"nvdcve-1.1-{year}.json.gz" + destination = self.data_dir / filename + + if await self.download_file(session, url, destination): + downloaded_files.append(destination) + + # Download modified and recent feeds + for feed_name, url in self.feed_urls.items(): + filename = f"nvdcve-1.1-{feed_name}.json.gz" + destination = self.data_dir / filename + + if await self.download_file(session, url, destination): + downloaded_files.append(destination) + + return downloaded_files + + def extract_json_file(self, compressed_file: Path) -> Path: + """Extract JSON from compressed file""" + json_file = compressed_file.with_suffix('.json') + + try: + if compressed_file.suffix == '.gz': + with gzip.open(compressed_file, 'rt', encoding='utf-8') as f_in: + with open(json_file, 'w', encoding='utf-8') as f_out: + f_out.write(f_in.read()) + elif compressed_file.suffix == '.zip': + with zipfile.ZipFile(compressed_file, 'r') as zip_ref: + zip_ref.extractall(self.data_dir) + else: + # File is already uncompressed + return compressed_file + + logger.info(f"Extracted {compressed_file} -> {json_file}") + return json_file + + except Exception as e: + logger.error(f"Error extracting {compressed_file}: {e}") + raise + + def process_json_file(self, json_file: Path) -> Tuple[int, int]: + """Process a single JSON file and return (processed, failed) counts""" + from main import CVE, BulkProcessingJob + + processed_count = 0 + failed_count = 0 + + try: + with open(json_file, 'r', encoding='utf-8') as f: + data = json.load(f) + + cve_items = data.get('CVE_Items', []) + logger.info(f"Processing {len(cve_items)} CVEs from {json_file}") + + for cve_item in cve_items: + try: + cve_data = self._extract_cve_data(cve_item) + if cve_data: + self._store_cve_data(cve_data) + processed_count += 1 + else: + failed_count += 1 + + except Exception as e: + logger.error(f"Error processing CVE item: {e}") + failed_count += 1 + + # Commit changes + self.db_session.commit() + logger.info(f"Processed {processed_count} CVEs, failed: {failed_count}") + + except Exception as e: + logger.error(f"Error processing {json_file}: {e}") + self.db_session.rollback() + raise + + return processed_count, failed_count + + def _extract_cve_data(self, cve_item: dict) -> Optional[dict]: + """Extract CVE data from JSON item""" + try: + cve = cve_item.get('cve', {}) + impact = cve_item.get('impact', {}) + + cve_id = cve.get('CVE_data_meta', {}).get('ID', '') + if not cve_id: + return None + + # Description + description_data = cve.get('description', {}).get('description_data', []) + description = '' + if description_data: + description = description_data[0].get('value', '') + + # CVSS Score + cvss_score = None + severity = None + if 'baseMetricV3' in impact: + cvss_v3 = impact['baseMetricV3'].get('cvssV3', {}) + cvss_score = cvss_v3.get('baseScore') + severity = cvss_v3.get('baseSeverity', '').lower() + elif 'baseMetricV2' in impact: + cvss_v2 = impact['baseMetricV2'].get('cvssV2', {}) + cvss_score = cvss_v2.get('baseScore') + severity = impact['baseMetricV2'].get('severity', '').lower() + + # Dates + published_date = None + modified_date = None + if 'publishedDate' in cve_item: + published_date = datetime.fromisoformat( + cve_item['publishedDate'].replace('Z', '+00:00') + ) + if 'lastModifiedDate' in cve_item: + modified_date = datetime.fromisoformat( + cve_item['lastModifiedDate'].replace('Z', '+00:00') + ) + + # Affected products (from CPE data) + affected_products = [] + configurations = cve_item.get('configurations', {}) + for node in configurations.get('nodes', []): + for cpe_match in node.get('cpe_match', []): + if cpe_match.get('vulnerable', False): + cpe_uri = cpe_match.get('cpe23Uri', '') + if cpe_uri: + affected_products.append(cpe_uri) + + # Reference URLs + reference_urls = [] + references = cve.get('references', {}).get('reference_data', []) + for ref in references: + url = ref.get('url', '') + if url: + reference_urls.append(url) + + return { + 'cve_id': cve_id, + 'description': description, + 'cvss_score': cvss_score, + 'severity': severity, + 'published_date': published_date, + 'modified_date': modified_date, + 'affected_products': affected_products, + 'reference_urls': reference_urls, + 'data_source': 'nvd_bulk', + 'nvd_json_version': '1.1', + 'bulk_processed': True + } + + except Exception as e: + logger.error(f"Error extracting CVE data: {e}") + return None + + def _store_cve_data(self, cve_data: dict): + """Store CVE data in database""" + from main import CVE + + # Check if CVE already exists + existing_cve = self.db_session.query(CVE).filter( + CVE.cve_id == cve_data['cve_id'] + ).first() + + if existing_cve: + # Update existing CVE + for key, value in cve_data.items(): + setattr(existing_cve, key, value) + existing_cve.updated_at = datetime.utcnow() + logger.debug(f"Updated CVE {cve_data['cve_id']}") + else: + # Create new CVE + new_cve = CVE(**cve_data) + self.db_session.add(new_cve) + logger.debug(f"Created new CVE {cve_data['cve_id']}") + + async def bulk_seed_database(self, start_year: int = 2002, + end_year: Optional[int] = None) -> dict: + """Perform complete bulk seeding of the database""" + from main import BulkProcessingJob + + if end_year is None: + end_year = datetime.now().year + + # Create bulk processing job + job = BulkProcessingJob( + job_type='nvd_bulk_seed', + status='running', + started_at=datetime.utcnow(), + job_metadata={ + 'start_year': start_year, + 'end_year': end_year, + 'total_years': end_year - start_year + 1 + } + ) + self.db_session.add(job) + self.db_session.commit() + + total_processed = 0 + total_failed = 0 + results = [] + + try: + # Download all feeds + logger.info(f"Starting bulk seed from {start_year} to {end_year}") + downloaded_files = await self.download_all_feeds(start_year, end_year) + + job.total_items = len(downloaded_files) + self.db_session.commit() + + # Process each file + for file_path in downloaded_files: + try: + # Extract JSON file + json_file = self.extract_json_file(file_path) + + # Process the JSON file + processed, failed = self.process_json_file(json_file) + + total_processed += processed + total_failed += failed + job.processed_items += 1 + + results.append({ + 'file': file_path.name, + 'processed': processed, + 'failed': failed + }) + + # Clean up extracted file if it's different from original + if json_file != file_path: + json_file.unlink() + + self.db_session.commit() + + except Exception as e: + logger.error(f"Error processing {file_path}: {e}") + job.failed_items += 1 + total_failed += 1 + self.db_session.commit() + + # Update job status + job.status = 'completed' + job.completed_at = datetime.utcnow() + job.job_metadata.update({ + 'total_processed': total_processed, + 'total_failed': total_failed, + 'results': results + }) + + except Exception as e: + job.status = 'failed' + job.error_message = str(e) + job.completed_at = datetime.utcnow() + logger.error(f"Bulk seed job failed: {e}") + + finally: + self.db_session.commit() + + return { + 'job_id': str(job.id), + 'status': job.status, + 'total_processed': total_processed, + 'total_failed': total_failed, + 'results': results + } + + async def incremental_update(self) -> dict: + """Perform incremental update using modified and recent feeds""" + from main import BulkProcessingJob + + # Create incremental update job + job = BulkProcessingJob( + job_type='incremental_update', + status='running', + started_at=datetime.utcnow(), + job_metadata={'feeds': ['modified', 'recent']} + ) + self.db_session.add(job) + self.db_session.commit() + + total_processed = 0 + total_failed = 0 + results = [] + + try: + # Download modified and recent feeds + async with aiohttp.ClientSession() as session: + for feed_name, url in self.feed_urls.items(): + filename = f"nvdcve-1.1-{feed_name}.json.gz" + destination = self.data_dir / filename + + if await self.download_file(session, url, destination): + try: + json_file = self.extract_json_file(destination) + processed, failed = self.process_json_file(json_file) + + total_processed += processed + total_failed += failed + + results.append({ + 'feed': feed_name, + 'processed': processed, + 'failed': failed + }) + + # Clean up + if json_file != destination: + json_file.unlink() + + except Exception as e: + logger.error(f"Error processing {feed_name} feed: {e}") + total_failed += 1 + + job.status = 'completed' + job.completed_at = datetime.utcnow() + job.job_metadata.update({ + 'total_processed': total_processed, + 'total_failed': total_failed, + 'results': results + }) + + except Exception as e: + job.status = 'failed' + job.error_message = str(e) + job.completed_at = datetime.utcnow() + logger.error(f"Incremental update job failed: {e}") + + finally: + self.db_session.commit() + + return { + 'job_id': str(job.id), + 'status': job.status, + 'total_processed': total_processed, + 'total_failed': total_failed, + 'results': results + } \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index 5d590e5..e6440fd 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -12,3 +12,5 @@ pygithub==2.1.1 gitpython==3.1.40 beautifulsoup4==4.12.2 lxml==4.9.3 +aiohttp==3.9.1 +aiofiles diff --git a/docker-compose.yml b/docker-compose.yml index 048a4d0..39d185a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: db: image: postgres:15 @@ -25,6 +23,7 @@ services: environment: DATABASE_URL: postgresql://cve_user:cve_password@db:5432/cve_sigma_db NVD_API_KEY: ${NVD_API_KEY:-} + GITHUB_TOKEN: ${GITHUB_TOKEN} depends_on: db: condition: service_healthy diff --git a/frontend/src/App.js b/frontend/src/App.js index 0bb35ef..b7a540a 100644 --- a/frontend/src/App.js +++ b/frontend/src/App.js @@ -15,6 +15,10 @@ function App() { const [activeTab, setActiveTab] = useState('dashboard'); const [fetchingCves, setFetchingCves] = useState(false); const [testResult, setTestResult] = useState(null); + const [bulkJobs, setBulkJobs] = useState([]); + const [bulkStatus, setBulkStatus] = useState({}); + const [pocStats, setPocStats] = useState({}); + const [bulkProcessing, setBulkProcessing] = useState(false); useEffect(() => { fetchData(); @@ -23,15 +27,21 @@ function App() { const fetchData = async () => { try { setLoading(true); - const [cvesRes, rulesRes, statsRes] = await Promise.all([ + const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes] = await Promise.all([ axios.get(`${API_BASE_URL}/api/cves`), axios.get(`${API_BASE_URL}/api/sigma-rules`), - axios.get(`${API_BASE_URL}/api/stats`) + axios.get(`${API_BASE_URL}/api/stats`), + axios.get(`${API_BASE_URL}/api/bulk-jobs`), + axios.get(`${API_BASE_URL}/api/bulk-status`), + axios.get(`${API_BASE_URL}/api/poc-stats`) ]); setCves(cvesRes.data); setSigmaRules(rulesRes.data); setStats(statsRes.data); + setBulkJobs(bulkJobsRes.data); + setBulkStatus(bulkStatusRes.data); + setPocStats(pocStatsRes.data); } catch (error) { console.error('Error fetching data:', error); } finally { @@ -39,6 +49,20 @@ function App() { } }; + const cancelJob = async (jobId) => { + try { + const response = await axios.post(`${API_BASE_URL}/api/cancel-job/${jobId}`); + console.log('Cancel job response:', response.data); + // Refresh data after cancelling + setTimeout(() => { + fetchData(); + }, 1000); + } catch (error) { + console.error('Error cancelling job:', error); + alert('Failed to cancel job. Please try again.'); + } + }; + const handleFetchCves = async () => { try { setFetchingCves(true); @@ -73,6 +97,73 @@ function App() { } }; + const startBulkSeed = async (startYear = 2020, endYear = null) => { + try { + setBulkProcessing(true); + const response = await axios.post(`${API_BASE_URL}/api/bulk-seed`, { + start_year: startYear, + end_year: endYear + }); + console.log('Bulk seed response:', response.data); + // Refresh data after starting + setTimeout(() => { + fetchData(); + }, 2000); + } catch (error) { + console.error('Error starting bulk seed:', error); + setBulkProcessing(false); + } + }; + + const startIncrementalUpdate = async () => { + try { + setBulkProcessing(true); + const response = await axios.post(`${API_BASE_URL}/api/incremental-update`); + console.log('Incremental update response:', response.data); + setTimeout(() => { + fetchData(); + setBulkProcessing(false); + }, 2000); + } catch (error) { + console.error('Error starting incremental update:', error); + setBulkProcessing(false); + } + }; + + const syncNomiSec = async (cveId = null) => { + try { + setBulkProcessing(true); + const response = await axios.post(`${API_BASE_URL}/api/sync-nomi-sec`, { + cve_id: cveId + }); + console.log('Nomi-sec sync response:', response.data); + setTimeout(() => { + fetchData(); + setBulkProcessing(false); + }, 2000); + } catch (error) { + console.error('Error syncing nomi-sec:', error); + setBulkProcessing(false); + } + }; + + const regenerateRules = async (force = false) => { + try { + setBulkProcessing(true); + const response = await axios.post(`${API_BASE_URL}/api/regenerate-rules`, { + force: force + }); + console.log('Rule regeneration response:', response.data); + setTimeout(() => { + fetchData(); + setBulkProcessing(false); + }, 2000); + } catch (error) { + console.error('Error regenerating rules:', error); + setBulkProcessing(false); + } + }; + const getSeverityColor = (severity) => { switch (severity?.toLowerCase()) { case 'critical': return 'bg-red-100 text-red-800'; @@ -93,18 +184,81 @@ function App() { const Dashboard = () => (
-
+

Total CVEs

{stats.total_cves || 0}

+

Bulk: {stats.bulk_processed_cves || 0}

SIGMA Rules

{stats.total_sigma_rules || 0}

+

Nomi-sec: {stats.nomi_sec_rules || 0}

+
+
+

CVEs with PoCs

+

{stats.cves_with_pocs || 0}

+

{(stats.poc_coverage || 0).toFixed(1)}% coverage

Recent CVEs (7d)

-

{stats.recent_cves_7_days || 0}

+

{stats.recent_cves_7_days || 0}

+
+
+

High Quality PoCs

+

{pocStats.high_quality_cves || 0}

+

Avg: {(pocStats.avg_poc_count || 0).toFixed(1)}

+
+
+ + {/* Bulk Processing Controls */} +
+

Bulk Processing

+
+ + + +
@@ -522,6 +676,178 @@ function App() { ); }; + const BulkJobsList = () => ( +
+
+

Bulk Processing Jobs

+ +
+ + {/* Bulk Status Overview */} +
+

System Status

+ {bulkStatus.database_stats && ( +
+
+
{bulkStatus.database_stats.total_cves}
+
Total CVEs
+
+
+
{bulkStatus.database_stats.bulk_processed_cves}
+
Bulk Processed
+
+
+
{bulkStatus.database_stats.cves_with_pocs}
+
With PoCs
+
+
+
{bulkStatus.database_stats.nomi_sec_rules}
+
Enhanced Rules
+
+
+ )} +
+ + {/* Running Jobs */} + {bulkJobs.some(job => job.status === 'running' || job.status === 'pending') && ( +
+
+

Running Jobs

+
+
+ {bulkJobs + .filter(job => job.status === 'running' || job.status === 'pending') + .map((job) => ( +
+
+
+
+

{job.job_type}

+ + {job.status} + +
+
+ Started: {formatDate(job.started_at)} + {job.year && Year: {job.year}} +
+ {job.total_items > 0 && ( +
+
+ Progress: {job.processed_items}/{job.total_items} + {job.failed_items > 0 && ( + Failed: {job.failed_items} + )} +
+
+
+
+
+ )} +
+
+ +
+
+
+ ))} +
+
+ )} + + {/* Recent Jobs */} +
+
+

Recent Jobs

+
+
+ {bulkJobs.length === 0 ? ( +
+ No bulk processing jobs found +
+ ) : ( + bulkJobs.map((job) => ( +
+
+
+
+

{job.job_type}

+ + {job.status} + +
+
+ Started: {formatDate(job.started_at)} + {job.completed_at && ( + Completed: {formatDate(job.completed_at)} + )} + {job.year && ( + Year: {job.year} + )} +
+ {job.total_items > 0 && ( +
+
+ Progress: {job.processed_items}/{job.total_items} + {job.failed_items > 0 && ( + Failed: {job.failed_items} + )} +
+
+
+
+
+ )} + {job.error_message && ( +
+ {job.error_message} +
+ )} +
+
+ {(job.status === 'running' || job.status === 'pending') && ( + + )} +
+
+
+ )) + )} +
+
+
+ ); + if (loading) { return (
@@ -573,6 +899,16 @@ function App() { > SIGMA Rules +
@@ -584,6 +920,7 @@ function App() { {activeTab === 'dashboard' && } {activeTab === 'cves' && } {activeTab === 'rules' && } + {activeTab === 'bulk-jobs' && } diff --git a/init.sql b/init.sql index 2184f47..8e9e7d5 100644 --- a/init.sql +++ b/init.sql @@ -13,6 +13,13 @@ CREATE TABLE cves ( modified_date TIMESTAMP, affected_products TEXT[], reference_urls TEXT[], + -- Bulk processing fields + data_source VARCHAR(20) DEFAULT 'nvd_api', + nvd_json_version VARCHAR(10) DEFAULT '2.0', + bulk_processed BOOLEAN DEFAULT FALSE, + -- nomi-sec PoC fields + poc_count INTEGER DEFAULT 0, + poc_data JSON, created_at TIMESTAMP DEFAULT NOW(), updated_at TIMESTAMP DEFAULT NOW() ); @@ -30,6 +37,10 @@ CREATE TABLE sigma_rules ( exploit_based BOOLEAN DEFAULT FALSE, github_repos TEXT[], exploit_indicators TEXT, + -- Enhanced fields for new data sources + poc_source VARCHAR(20) DEFAULT 'github_search', + poc_quality_score INTEGER DEFAULT 0, + nomi_sec_data JSON, created_at TIMESTAMP DEFAULT NOW(), updated_at TIMESTAMP DEFAULT NOW() ); @@ -44,6 +55,23 @@ CREATE TABLE rule_templates ( created_at TIMESTAMP DEFAULT NOW() ); +-- Bulk processing jobs table +CREATE TABLE bulk_processing_jobs ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + job_type VARCHAR(50) NOT NULL, + status VARCHAR(20) DEFAULT 'pending', + year INTEGER, + total_items INTEGER DEFAULT 0, + processed_items INTEGER DEFAULT 0, + failed_items INTEGER DEFAULT 0, + error_message TEXT, + job_metadata JSON, + started_at TIMESTAMP, + completed_at TIMESTAMP, + cancelled_at TIMESTAMP, + created_at TIMESTAMP DEFAULT NOW() +); + -- Insert some basic rule templates INSERT INTO rule_templates (template_name, template_content, applicable_product_patterns, description) VALUES ( diff --git a/start.sh b/start.sh old mode 100644 new mode 100755