From 455a46c88f08ce0104382143c61038f77b1e3386 Mon Sep 17 00:00:00 2001 From: bpmcdevitt Date: Wed, 9 Jul 2025 11:58:29 -0500 Subject: [PATCH] added git submodule for more exploits. added template dir for base yaml templates for sigma rules --- .gitmodules | 3 + backend/enhanced_sigma_generator.py | 5 +- backend/initialize_templates.py | 235 ++----- backend/main.py | 222 ++++++- backend/mcdevitt_poc_client.py | 593 ++++++++++++++++++ backend/requirements.txt | 1 + backend/templates/README.md | 128 ++++ backend/templates/command_injection.yaml | 44 ++ backend/templates/credential_access.yaml | 39 ++ backend/templates/file_system_activity.yaml | 30 + backend/templates/lateral_movement.yaml | 42 ++ backend/templates/network_connection.yaml | 36 ++ backend/templates/persistence.yaml | 40 ++ backend/templates/powershell_execution.yaml | 32 + backend/templates/privilege_escalation.yaml | 41 ++ backend/templates/process_execution.yaml | 34 + backend/templates/registry_modification.yaml | 29 + backend/templates/service_manipulation.yaml | 42 ++ backend/templates/web_application_attack.yaml | 33 + docker-compose.yml | 1 + frontend/src/App.js | 94 +-- github_poc_collector | 1 + 22 files changed, 1475 insertions(+), 250 deletions(-) create mode 100644 .gitmodules create mode 100644 backend/mcdevitt_poc_client.py create mode 100644 backend/templates/README.md create mode 100644 backend/templates/command_injection.yaml create mode 100644 backend/templates/credential_access.yaml create mode 100644 backend/templates/file_system_activity.yaml create mode 100644 backend/templates/lateral_movement.yaml create mode 100644 backend/templates/network_connection.yaml create mode 100644 backend/templates/persistence.yaml create mode 100644 backend/templates/powershell_execution.yaml create mode 100644 backend/templates/privilege_escalation.yaml create mode 100644 backend/templates/process_execution.yaml create mode 100644 backend/templates/registry_modification.yaml create mode 100644 backend/templates/service_manipulation.yaml create mode 100644 backend/templates/web_application_attack.yaml create mode 160000 github_poc_collector diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..218e4ae --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "github_poc_collector"] + path = github_poc_collector + url = https://git.mcdevitt.tech/bpmcdevitt/github_poc_collector diff --git a/backend/enhanced_sigma_generator.py b/backend/enhanced_sigma_generator.py index e94e222..ee77c83 100644 --- a/backend/enhanced_sigma_generator.py +++ b/backend/enhanced_sigma_generator.py @@ -60,13 +60,14 @@ class EnhancedSigmaGenerator: 'confidence_level': confidence_level, 'auto_generated': True, 'exploit_based': len(poc_data) > 0, - 'poc_source': 'nomi_sec', + 'poc_source': getattr(cve, 'poc_source', 'nomi_sec'), 'poc_quality_score': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0, 'nomi_sec_data': { 'total_pocs': len(poc_data), 'best_poc_quality': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0, 'total_stars': sum(p.get('stargazers_count', 0) for p in poc_data), - 'avg_stars': sum(p.get('stargazers_count', 0) for p in poc_data) / len(poc_data) if poc_data else 0 + 'avg_stars': sum(p.get('stargazers_count', 0) for p in poc_data) / len(poc_data) if poc_data else 0, + 'source': getattr(cve, 'poc_source', 'nomi_sec') }, 'github_repos': [p.get('html_url', '') for p in poc_data], 'exploit_indicators': json.dumps(self._combine_exploit_indicators(poc_data)), diff --git a/backend/initialize_templates.py b/backend/initialize_templates.py index 340a962..bc356b8 100644 --- a/backend/initialize_templates.py +++ b/backend/initialize_templates.py @@ -4,200 +4,53 @@ Initialize SIGMA rule templates for enhanced rule generation """ import json +import yaml +import os +from pathlib import Path from datetime import datetime from main import SessionLocal, RuleTemplate, Base, engine # Create tables if they don't exist Base.metadata.create_all(bind=engine) -# Template definitions with actual SIGMA rule content -SIGMA_TEMPLATES = [ - { - "template_name": "Process Execution Detection", - "template_content": """title: {{TITLE}} -id: {{RULE_ID}} -status: experimental -description: {{DESCRIPTION}} -author: CVE-SIGMA Auto Generator -date: {{DATE}} -references: -{{REFERENCES}} -tags: -{{TAGS}} -logsource: - category: process_creation - product: windows -detection: - selection: - Image|endswith: -{{PROCESSES}} - selection_cmd: - CommandLine|contains: -{{COMMANDS}} - condition: selection or selection_cmd -falsepositives: - - Legitimate software installations - - System administration tasks -level: {{LEVEL}}""", - "applicable_product_patterns": ["windows", "microsoft", "office", "exchange", "sharepoint"], - "description": "Detects suspicious process execution based on PoC exploit indicators" - }, - { - "template_name": "Network Connection Detection", - "template_content": """title: {{TITLE}} -id: {{RULE_ID}} -status: experimental -description: {{DESCRIPTION}} -author: CVE-SIGMA Auto Generator -date: {{DATE}} -references: -{{REFERENCES}} -tags: -{{TAGS}} -logsource: - category: network_connection - product: windows -detection: - selection: - Initiated: true - DestinationIp: -{{NETWORK}} - selection_url: - DestinationHostname|contains: -{{URLS}} - condition: selection or selection_url -falsepositives: - - Legitimate network connections - - Software updates -level: {{LEVEL}}""", - "applicable_product_patterns": ["network", "web", "http", "https", "tcp", "udp"], - "description": "Detects suspicious network connections based on PoC exploit indicators" - }, - { - "template_name": "File System Activity Detection", - "template_content": """title: {{TITLE}} -id: {{RULE_ID}} -status: experimental -description: {{DESCRIPTION}} -author: CVE-SIGMA Auto Generator -date: {{DATE}} -references: -{{REFERENCES}} -tags: -{{TAGS}} -logsource: - category: file_event - product: windows -detection: - selection: - TargetFilename|contains: -{{FILES}} - condition: selection -falsepositives: - - Legitimate file operations - - Software installations -level: {{LEVEL}}""", - "applicable_product_patterns": ["file", "filesystem", "upload", "download"], - "description": "Detects suspicious file system activity based on PoC exploit indicators" - }, - { - "template_name": "PowerShell Execution Detection", - "template_content": """title: {{TITLE}} -id: {{RULE_ID}} -status: experimental -description: {{DESCRIPTION}} -author: CVE-SIGMA Auto Generator -date: {{DATE}} -references: -{{REFERENCES}} -tags: -{{TAGS}} -logsource: - category: process_creation - product: windows -detection: - selection: - Image|endswith: - - '\\powershell.exe' - - '\\pwsh.exe' - CommandLine|contains: -{{COMMANDS}} - condition: selection -falsepositives: - - Legitimate PowerShell scripts - - System administration -level: {{LEVEL}}""", - "applicable_product_patterns": ["powershell", "windows", "microsoft"], - "description": "Detects suspicious PowerShell execution based on PoC exploit indicators" - }, - { - "template_name": "Web Application Attack Detection", - "template_content": """title: {{TITLE}} -id: {{RULE_ID}} -status: experimental -description: {{DESCRIPTION}} -author: CVE-SIGMA Auto Generator -date: {{DATE}} -references: -{{REFERENCES}} -tags: -{{TAGS}} -logsource: - category: webserver -detection: - selection: - cs-uri-query|contains: -{{URLS}} - selection_user_agent: - cs-user-agent|contains: -{{COMMANDS}} - condition: selection or selection_user_agent -falsepositives: - - Legitimate web application usage - - Security scanners -level: {{LEVEL}}""", - "applicable_product_patterns": ["web", "http", "apache", "nginx", "iis"], - "description": "Detects web application attacks based on PoC exploit indicators" - }, - { - "template_name": "Registry Modification Detection", - "template_content": """title: {{TITLE}} -id: {{RULE_ID}} -status: experimental -description: {{DESCRIPTION}} -author: CVE-SIGMA Auto Generator -date: {{DATE}} -references: -{{REFERENCES}} -tags: -{{TAGS}} -logsource: - category: registry_event - product: windows -detection: - selection: - TargetObject|contains: -{{REGISTRY}} - condition: selection -falsepositives: - - Legitimate software configuration changes - - System updates -level: {{LEVEL}}""", - "applicable_product_patterns": ["registry", "windows", "microsoft"], - "description": "Detects suspicious registry modifications based on PoC exploit indicators" - } -] +def load_templates_from_files(): + """Load SIGMA rule templates from YAML files in the templates directory""" + templates = [] + templates_dir = Path(__file__).parent / "templates" + + if not templates_dir.exists(): + print(f"Templates directory not found: {templates_dir}") + return templates + + # Load all YAML files from templates directory + for template_file in templates_dir.glob("*.yaml"): + try: + with open(template_file, 'r', encoding='utf-8') as f: + template_data = yaml.safe_load(f) + templates.append(template_data) + print(f"Loaded template: {template_data['template_name']}") + except Exception as e: + print(f"Error loading template from {template_file}: {e}") + + return templates def initialize_templates(): """Initialize rule templates in the database""" db = SessionLocal() try: + # Load templates from YAML files + templates = load_templates_from_files() + + if not templates: + print("No templates found to initialize") + return + # Clear existing templates db.query(RuleTemplate).delete() # Add new templates - for template_data in SIGMA_TEMPLATES: + for template_data in templates: template = RuleTemplate( template_name=template_data["template_name"], template_content=template_data["template_content"], @@ -207,7 +60,7 @@ def initialize_templates(): db.add(template) db.commit() - print(f"Successfully initialized {len(SIGMA_TEMPLATES)} rule templates") + print(f"Successfully initialized {len(templates)} rule templates") except Exception as e: db.rollback() @@ -216,5 +69,27 @@ def initialize_templates(): finally: db.close() +def list_available_templates(): + """List all available template files and their details""" + templates = load_templates_from_files() + + if not templates: + print("No templates found") + return + + print(f"\nFound {len(templates)} available templates:") + print("=" * 60) + + for i, template in enumerate(templates, 1): + print(f"{i}. {template['template_name']}") + print(f" Description: {template['description']}") + print(f" Applicable patterns: {', '.join(template['applicable_product_patterns'])}") + print() + if __name__ == "__main__": - initialize_templates() \ No newline at end of file + import sys + + if len(sys.argv) > 1 and sys.argv[1] == "list": + list_available_templates() + else: + initialize_templates() \ No newline at end of file diff --git a/backend/main.py b/backend/main.py index caf5acd..f13e87f 100644 --- a/backend/main.py +++ b/backend/main.py @@ -21,6 +21,7 @@ from urllib.parse import urlparse import hashlib import logging import threading +from mcdevitt_poc_client import GitHubPoCClient # Setup logging logging.basicConfig(level=logging.INFO) @@ -138,6 +139,24 @@ class SigmaRuleResponse(BaseModel): class Config: from_attributes = True +# Request models +class BulkSeedRequest(BaseModel): + start_year: int = 2002 + end_year: Optional[int] = None + skip_nvd: bool = False + skip_nomi_sec: bool = True + +class NomiSecSyncRequest(BaseModel): + cve_id: Optional[str] = None + batch_size: int = 50 + +class GitHubPoCSyncRequest(BaseModel): + cve_id: Optional[str] = None + batch_size: int = 50 + +class RuleRegenRequest(BaseModel): + force: bool = False + # GitHub Exploit Analysis Service class GitHubExploitAnalyzer: def __init__(self): @@ -768,11 +787,11 @@ async def background_cve_fetch(): @asynccontextmanager async def lifespan(app: FastAPI): - # Start background task - task = asyncio.create_task(background_cve_fetch()) + # Background CVE fetching disabled - use manual fetch endpoint instead + # task = asyncio.create_task(background_cve_fetch()) yield # Clean up - task.cancel() + # task.cancel() # FastAPI app app = FastAPI(title="CVE-SIGMA Auto Generator", lifespan=lifespan) @@ -998,10 +1017,7 @@ async def get_stats(db: Session = Depends(get_db)): # New bulk processing endpoints @app.post("/api/bulk-seed") async def start_bulk_seed(background_tasks: BackgroundTasks, - start_year: int = 2002, - end_year: Optional[int] = None, - skip_nvd: bool = False, - skip_nomi_sec: bool = False, + request: BulkSeedRequest, db: Session = Depends(get_db)): """Start bulk seeding process""" @@ -1010,10 +1026,10 @@ async def start_bulk_seed(background_tasks: BackgroundTasks, from bulk_seeder import BulkSeeder seeder = BulkSeeder(db) result = await seeder.full_bulk_seed( - start_year=start_year, - end_year=end_year, - skip_nvd=skip_nvd, - skip_nomi_sec=skip_nomi_sec + start_year=request.start_year, + end_year=request.end_year, + skip_nvd=request.skip_nvd, + skip_nomi_sec=request.skip_nomi_sec ) logger.info(f"Bulk seed completed: {result}") except Exception as e: @@ -1026,10 +1042,10 @@ async def start_bulk_seed(background_tasks: BackgroundTasks, return { "message": "Bulk seeding process started", "status": "started", - "start_year": start_year, - "end_year": end_year or datetime.now().year, - "skip_nvd": skip_nvd, - "skip_nomi_sec": skip_nomi_sec + "start_year": request.start_year, + "end_year": request.end_year or datetime.now().year, + "skip_nvd": request.skip_nvd, + "skip_nomi_sec": request.skip_nomi_sec } @app.post("/api/incremental-update") @@ -1056,8 +1072,7 @@ async def start_incremental_update(background_tasks: BackgroundTasks, db: Sessio @app.post("/api/sync-nomi-sec") async def sync_nomi_sec(background_tasks: BackgroundTasks, - cve_id: Optional[str] = None, - batch_size: int = 50, + request: NomiSecSyncRequest, db: Session = Depends(get_db)): """Synchronize nomi-sec PoC data""" @@ -1066,8 +1081,8 @@ async def sync_nomi_sec(background_tasks: BackgroundTasks, job_type='nomi_sec_sync', status='pending', job_metadata={ - 'cve_id': cve_id, - 'batch_size': batch_size + 'cve_id': request.cve_id, + 'batch_size': request.batch_size } ) db.add(job) @@ -1087,18 +1102,18 @@ async def sync_nomi_sec(background_tasks: BackgroundTasks, from nomi_sec_client import NomiSecClient client = NomiSecClient(db) - if cve_id: + if request.cve_id: # Sync specific CVE if job_cancellation_flags.get(job_id, False): logger.info(f"Job {job_id} cancelled before starting") return - result = await client.sync_cve_pocs(cve_id) - logger.info(f"Nomi-sec sync for {cve_id}: {result}") + result = await client.sync_cve_pocs(request.cve_id) + logger.info(f"Nomi-sec sync for {request.cve_id}: {result}") else: # Sync all CVEs with cancellation support result = await client.bulk_sync_all_cves( - batch_size=batch_size, + batch_size=request.batch_size, cancellation_flag=lambda: job_cancellation_flags.get(job_id, False) ) logger.info(f"Nomi-sec bulk sync completed: {result}") @@ -1127,13 +1142,162 @@ async def sync_nomi_sec(background_tasks: BackgroundTasks, background_tasks.add_task(sync_task) return { - "message": f"Nomi-sec sync started" + (f" for {cve_id}" if cve_id else " for all CVEs"), + "message": f"Nomi-sec sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"), "status": "started", "job_id": job_id, - "cve_id": cve_id, - "batch_size": batch_size + "cve_id": request.cve_id, + "batch_size": request.batch_size } +@app.post("/api/sync-github-pocs") +async def sync_github_pocs(background_tasks: BackgroundTasks, + request: GitHubPoCSyncRequest, + db: Session = Depends(get_db)): + """Synchronize GitHub PoC data""" + + # Create job record + job = BulkProcessingJob( + job_type='github_poc_sync', + status='pending', + job_metadata={ + 'cve_id': request.cve_id, + 'batch_size': request.batch_size + } + ) + db.add(job) + db.commit() + db.refresh(job) + + job_id = str(job.id) + running_jobs[job_id] = job + job_cancellation_flags[job_id] = False + + async def sync_task(): + try: + job.status = 'running' + job.started_at = datetime.utcnow() + db.commit() + + client = GitHubPoCClient(db) + + if request.cve_id: + # Sync specific CVE + if job_cancellation_flags.get(job_id, False): + logger.info(f"Job {job_id} cancelled before starting") + return + + result = await client.sync_cve_pocs(request.cve_id) + logger.info(f"GitHub PoC sync for {request.cve_id}: {result}") + else: + # Sync all CVEs with cancellation support + result = await client.bulk_sync_all_cves(batch_size=request.batch_size) + logger.info(f"GitHub PoC bulk sync completed: {result}") + + # Update job status if not cancelled + if not job_cancellation_flags.get(job_id, False): + job.status = 'completed' + job.completed_at = datetime.utcnow() + db.commit() + + except Exception as e: + if not job_cancellation_flags.get(job_id, False): + job.status = 'failed' + job.error_message = str(e) + job.completed_at = datetime.utcnow() + db.commit() + + logger.error(f"GitHub PoC sync failed: {e}") + import traceback + traceback.print_exc() + finally: + # Clean up tracking + running_jobs.pop(job_id, None) + job_cancellation_flags.pop(job_id, None) + + background_tasks.add_task(sync_task) + + return { + "message": f"GitHub PoC sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"), + "status": "started", + "job_id": job_id, + "cve_id": request.cve_id, + "batch_size": request.batch_size + } + +@app.get("/api/github-poc-stats") +async def get_github_poc_stats(db: Session = Depends(get_db)): + """Get GitHub PoC-related statistics""" + + try: + # Get basic statistics + github_poc_rules = db.query(SigmaRule).filter(SigmaRule.poc_source == 'github_poc').count() + cves_with_github_pocs = db.query(CVE).filter( + CVE.poc_data.isnot(None), # Check if poc_data exists + func.json_extract_path_text(CVE.poc_data, '0', 'source') == 'github_poc' + ).count() + + # Get quality distribution + quality_distribution = {} + try: + quality_results = db.query( + func.json_extract_path_text(CVE.poc_data, '0', 'quality_analysis', 'quality_tier').label('tier'), + func.count().label('count') + ).filter( + CVE.poc_data.isnot(None), + func.json_extract_path_text(CVE.poc_data, '0', 'source') == 'github_poc' + ).group_by('tier').all() + + for tier, count in quality_results: + if tier: + quality_distribution[tier] = count + except Exception as e: + logger.warning(f"Error getting quality distribution: {e}") + quality_distribution = {} + + # Calculate average quality score + try: + avg_quality = db.query( + func.avg(func.json_extract_path_text(CVE.poc_data, '0', 'quality_analysis', 'quality_score').cast(Integer)) + ).filter( + CVE.poc_data.isnot(None), + func.json_extract_path_text(CVE.poc_data, '0', 'source') == 'github_poc' + ).scalar() or 0 + except Exception as e: + logger.warning(f"Error calculating average quality: {e}") + avg_quality = 0 + + return { + 'github_poc_rules': github_poc_rules, + 'cves_with_github_pocs': cves_with_github_pocs, + 'quality_distribution': quality_distribution, + 'average_quality_score': float(avg_quality) if avg_quality else 0, + 'source': 'github_poc' + } + except Exception as e: + logger.error(f"Error getting GitHub PoC stats: {e}") + return {"error": str(e)} + +@app.get("/api/github-poc-status") +async def get_github_poc_status(db: Session = Depends(get_db)): + """Get GitHub PoC data availability status""" + + try: + client = GitHubPoCClient(db) + + # Check if GitHub PoC data is available + github_poc_data = client.load_github_poc_data() + + return { + 'github_poc_data_available': len(github_poc_data) > 0, + 'total_cves_with_pocs': len(github_poc_data), + 'sample_cve_ids': list(github_poc_data.keys())[:10], # First 10 CVE IDs + 'data_path': str(client.github_poc_path), + 'path_exists': client.github_poc_path.exists() + } + except Exception as e: + logger.error(f"Error checking GitHub PoC status: {e}") + return {"error": str(e)} + @app.get("/api/bulk-jobs") async def get_bulk_jobs(limit: int = 10, db: Session = Depends(get_db)): """Get bulk processing job status""" @@ -1202,7 +1366,7 @@ async def get_poc_stats(db: Session = Depends(get_db)): @app.post("/api/regenerate-rules") async def regenerate_sigma_rules(background_tasks: BackgroundTasks, - force: bool = False, + request: RuleRegenRequest, db: Session = Depends(get_db)): """Regenerate SIGMA rules using enhanced nomi-sec data""" @@ -1223,7 +1387,7 @@ async def regenerate_sigma_rules(background_tasks: BackgroundTasks, SigmaRule.cve_id == cve.cve_id ).first() - if existing_rule and existing_rule.poc_source == 'nomi_sec' and not force: + if existing_rule and existing_rule.poc_source == 'nomi_sec' and not request.force: continue # Generate enhanced rule @@ -1247,7 +1411,7 @@ async def regenerate_sigma_rules(background_tasks: BackgroundTasks, return { "message": "SIGMA rule regeneration started", "status": "started", - "force": force + "force": request.force } @app.post("/api/cancel-job/{job_id}") diff --git a/backend/mcdevitt_poc_client.py b/backend/mcdevitt_poc_client.py new file mode 100644 index 0000000..b4f1c63 --- /dev/null +++ b/backend/mcdevitt_poc_client.py @@ -0,0 +1,593 @@ +""" +GitHub PoC Collector Integration Client +Reads JSON files from github_poc_collector and fetches GitHub repo contents for SIGMA rule generation +""" + +import aiohttp +import asyncio +import json +import logging +import os +from datetime import datetime +from typing import Dict, List, Optional, Tuple +from sqlalchemy.orm import Session +from pathlib import Path +import re +import base64 + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class GitHubPoCClient: + """Client for processing GitHub PoC collector data and fetching GitHub contents""" + + def __init__(self, db_session: Session, github_token: Optional[str] = None): + self.db_session = db_session + self.github_token = github_token or os.getenv('GITHUB_TOKEN') + self.base_url = "https://api.github.com" + + # Rate limiting - GitHub API: 5000 requests/hour with token, 60 without + self.rate_limit_delay = 0.8 if self.github_token else 60.0 # seconds + self.last_request_time = 0 + + # GitHub PoC collector path (mounted in Docker container) + self.github_poc_path = Path("/github_poc_collector/exploits") + + # Cache for repository contents + self.repo_cache = {} + self.cache_ttl = 1800 # 30 minutes + + def load_github_poc_data(self) -> Dict[str, List[dict]]: + """Load all PoC data from GitHub PoC collector JSON files""" + poc_data = {} + + if not self.github_poc_path.exists(): + logger.error(f"GitHub PoC path not found: {self.github_poc_path}") + return poc_data + + # Walk through year directories + for year_dir in self.github_poc_path.iterdir(): + if year_dir.is_dir(): + for json_file in year_dir.glob("*.json"): + try: + cve_id = json_file.stem # CVE-YYYY-NNNN + with open(json_file, 'r') as f: + repos = json.load(f) + + # Filter out repositories with no stars or very low quality + filtered_repos = [] + for repo in repos: + if isinstance(repo, dict) and repo.get('html_url'): + # Basic quality filtering + stars = repo.get('stargazers_count', 0) + description = repo.get('description', '') or '' + + # Skip very low quality repos + if stars > 0 or len(description) > 20: + filtered_repos.append(repo) + + if filtered_repos: + poc_data[cve_id] = filtered_repos + + except Exception as e: + logger.error(f"Error loading {json_file}: {e}") + + logger.info(f"Loaded PoC data for {len(poc_data)} CVEs") + return poc_data + + async def _make_github_request(self, session: aiohttp.ClientSession, + url: str, params: dict = None) -> Optional[dict]: + """Make a rate-limited request to GitHub API""" + try: + # Rate limiting + current_time = asyncio.get_event_loop().time() + time_since_last = current_time - self.last_request_time + if time_since_last < self.rate_limit_delay: + await asyncio.sleep(self.rate_limit_delay - time_since_last) + + headers = { + 'Accept': 'application/vnd.github.v3+json', + 'User-Agent': 'Auto-SIGMA-Rule-Generator/1.0' + } + + if self.github_token: + headers['Authorization'] = f'token {self.github_token}' + + async with session.get(url, params=params, headers=headers, timeout=30) as response: + self.last_request_time = asyncio.get_event_loop().time() + + if response.status == 200: + return await response.json() + elif response.status == 403: + logger.warning(f"Rate limit exceeded for {url}") + # Wait and retry once + await asyncio.sleep(60) + return await self._make_github_request(session, url, params) + else: + logger.warning(f"GitHub API request failed: {response.status} for {url}") + return None + + except Exception as e: + logger.error(f"Error making GitHub request to {url}: {e}") + return None + + async def get_repository_contents(self, repo_url: str) -> Dict[str, any]: + """Fetch repository contents from GitHub API""" + # Extract owner/repo from URL + try: + # Parse GitHub URL: https://github.com/owner/repo + parts = repo_url.replace('https://github.com/', '').split('/') + if len(parts) < 2: + return {} + + owner, repo = parts[0], parts[1] + repo_key = f"{owner}/{repo}" + + # Check cache + if repo_key in self.repo_cache: + cached_data, timestamp = self.repo_cache[repo_key] + if (datetime.now().timestamp() - timestamp) < self.cache_ttl: + return cached_data + + async with aiohttp.ClientSession() as session: + # Get repository metadata + repo_url_api = f"{self.base_url}/repos/{owner}/{repo}" + repo_data = await self._make_github_request(session, repo_url_api) + + if not repo_data: + return {} + + # Get repository contents (files) + contents_url = f"{self.base_url}/repos/{owner}/{repo}/contents" + contents_data = await self._make_github_request(session, contents_url) + + # Get README content if available + readme_content = "" + if contents_data: + readme_files = [f for f in contents_data + if f.get('name', '').lower().startswith('readme')] + if readme_files: + readme_file = readme_files[0] + readme_url = readme_file.get('download_url') + if readme_url: + try: + async with session.get(readme_url) as readme_response: + if readme_response.status == 200: + readme_content = await readme_response.text() + except Exception as e: + logger.warning(f"Error fetching README: {e}") + + # Extract key files (potential exploit code) + key_files = [] + if contents_data: + for file_info in contents_data: + if file_info.get('type') == 'file': + file_name = file_info.get('name', '') + file_size = file_info.get('size', 0) + + # Focus on code files that might contain exploits + if (file_name.lower().endswith(('.py', '.sh', '.pl', '.rb', '.js', '.c', '.cpp', '.java', '.go', '.rs', '.php')) + and file_size < 50000): # Skip very large files + + try: + file_content = await self._get_file_content(session, file_info.get('download_url')) + if file_content: + key_files.append({ + 'name': file_name, + 'size': file_size, + 'content': file_content[:10000] # Truncate very long files + }) + except Exception as e: + logger.warning(f"Error fetching file {file_name}: {e}") + + result = { + 'repo_data': repo_data, + 'readme_content': readme_content, + 'key_files': key_files, + 'fetched_at': datetime.now().isoformat() + } + + # Cache the result + self.repo_cache[repo_key] = (result, datetime.now().timestamp()) + + return result + + except Exception as e: + logger.error(f"Error fetching repository contents for {repo_url}: {e}") + return {} + + async def _get_file_content(self, session: aiohttp.ClientSession, download_url: str) -> Optional[str]: + """Fetch individual file content""" + try: + async with session.get(download_url, timeout=15) as response: + if response.status == 200: + # Try to decode as text + try: + content = await response.text() + return content + except: + # If text decoding fails, try binary + content = await response.read() + return content.decode('utf-8', errors='ignore') + return None + except Exception as e: + logger.warning(f"Error fetching file content: {e}") + return None + + def analyze_repository_for_indicators(self, repo_data: Dict[str, any]) -> Dict[str, any]: + """Analyze repository contents for exploit indicators""" + indicators = { + "processes": [], + "files": [], + "network": [], + "registry": [], + "commands": [], + "urls": [], + "techniques": [], + "cve_references": [], + "exploit_techniques": [] + } + + # Combine all text content for analysis + text_sources = [] + + # Add README content + if repo_data.get('readme_content'): + text_sources.append(repo_data['readme_content']) + + # Add repository description + if repo_data.get('repo_data', {}).get('description'): + text_sources.append(repo_data['repo_data']['description']) + + # Add key file contents + for file_info in repo_data.get('key_files', []): + text_sources.append(file_info.get('content', '')) + + full_text = " ".join(text_sources).lower() + + # Extract CVE references + cve_pattern = r'cve-\d{4}-\d{4,7}' + cve_matches = re.findall(cve_pattern, full_text, re.IGNORECASE) + indicators["cve_references"] = list(set(cve_matches)) + + # Enhanced process patterns + process_patterns = [ + r'\b(cmd\.exe|powershell\.exe|bash|sh|python\.exe|java\.exe|node\.exe)\b', + r'\b(createprocess|shellexecute|system|winexec|execve|fork|spawn)\b', + r'\b(reverse.?shell|bind.?shell|web.?shell|backdoor)\b', + r'\b(mshta\.exe|rundll32\.exe|regsvr32\.exe|wscript\.exe|cscript\.exe)\b', + r'\b(certutil\.exe|bitsadmin\.exe|schtasks\.exe|wmic\.exe)\b' + ] + + for pattern in process_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + indicators["processes"].extend(matches) + + # Enhanced file patterns + file_patterns = [ + r'\b([a-zA-Z]:\\[^\\\s]+\\[^\\\s]+\.[a-zA-Z0-9]+)\b', # Windows paths + r'\b(/[^/\s]+/[^/\s]+\.[a-zA-Z0-9]+)\b', # Unix paths + r'\b(\w+\.(exe|dll|bat|ps1|py|sh|jar|php|jsp|asp|aspx|bin))\b', # Executable files + r'\b(payload|exploit|shell|backdoor|trojan|malware)\b' # Malicious indicators + ] + + for pattern in file_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + if matches and isinstance(matches[0], tuple): + indicators["files"].extend([m[0] for m in matches]) + else: + indicators["files"].extend(matches) + + # Enhanced network patterns + network_patterns = [ + r'\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b', # IP addresses + r'\b(https?://[^\s<>"]+)\b', # URLs + r'\b([a-zA-Z0-9-]+\.[a-zA-Z]{2,})\b', # Domain names + r'\b(port|socket|connect|bind|listen)\s*[=:]\s*(\d+)\b' # Port references + ] + + for pattern in network_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + if 'http' in pattern: + indicators["urls"].extend(matches) + else: + indicators["network"].extend([m if isinstance(m, str) else m[0] for m in matches]) + + # Enhanced command patterns + command_patterns = [ + r'\b(curl|wget|nc|netcat|ncat|telnet|ssh|scp|rsync)\b', + r'\b(whoami|id|uname|systeminfo|ipconfig|ifconfig|ps|top|netstat)\b', + r'\b(cat|type|more|less|head|tail|find|grep|awk|sed)\b', + r'\b(echo|print|printf|base64|decode|encode)\b', + r'\b(invoke|iex|downloadstring|powershell|cmd)\b', + r'\b(net\s+user|net\s+localgroup|net\s+share)\b', + r'\b(sc\s+create|sc\s+start|sc\s+stop|service)\b' + ] + + for pattern in command_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + indicators["commands"].extend(matches) + + # Registry patterns (Windows) + registry_patterns = [ + r'\b(HKEY_[A-Z_]+)\b', + r'\b(HKLM|HKCU|HKCR|HKU|HKCC)\b', + r'\b(reg\s+add|reg\s+query|reg\s+delete|regedit)\b', + r'\b(SOFTWARE\\\\[^\\\s]+)\b', + r'\b(SYSTEM\\\\[^\\\s]+)\b' + ] + + for pattern in registry_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + indicators["registry"].extend(matches) + + # MITRE ATT&CK technique indicators + technique_patterns = [ + r'\b(privilege.?escalation|lateral.?movement|persistence|evasion)\b', + r'\b(injection|hijack|bypass|overflow|buffer.?overflow)\b', + r'\b(credential.?dump|password.?spray|brute.?force)\b', + r'\b(remote.?code.?execution|arbitrary.?code|code.?injection)\b', + r'\b(dll.?injection|process.?hollow|process.?injection)\b' + ] + + for pattern in technique_patterns: + matches = re.findall(pattern, full_text, re.IGNORECASE) + indicators["techniques"].extend(matches) + + # Clean up and deduplicate all indicators + for key in indicators: + # Remove empty strings and duplicates + indicators[key] = list(set([ + item.strip() for item in indicators[key] + if item and len(str(item).strip()) > 2 + ])) + # Limit to reasonable number of indicators + indicators[key] = indicators[key][:20] + + return indicators + + def calculate_quality_score(self, repo_info: dict, repo_contents: Dict[str, any]) -> Dict[str, any]: + """Calculate quality score for a repository""" + quality_score = 0 + factors = {} + + # Star count factor (0-30 points) + stars = repo_info.get('stargazers_count', 0) + star_score = min(stars * 3, 30) # 3 points per star, max 30 + quality_score += star_score + factors["star_score"] = star_score + + # Fork count factor (0-20 points) + forks = repo_info.get('forks_count', 0) + fork_score = min(forks * 2, 20) # 2 points per fork, max 20 + quality_score += fork_score + factors["fork_score"] = fork_score + + # Recency factor (0-20 points) + try: + created_at = datetime.fromisoformat(repo_info.get('created_at', '').replace('Z', '+00:00')) + days_old = (datetime.now(created_at.tzinfo) - created_at).days + recency_score = max(20 - (days_old // 30), 0) # Lose 1 point per month + quality_score += recency_score + factors["recency_score"] = recency_score + except: + factors["recency_score"] = 0 + + # Description quality factor (0-15 points) + description = repo_info.get('description', '') or '' + desc_score = min(len(description) // 10, 15) # 1 point per 10 chars, max 15 + quality_score += desc_score + factors["description_score"] = desc_score + + # README quality factor (0-15 points) + readme_content = repo_contents.get('readme_content', '') + readme_score = min(len(readme_content) // 50, 15) # 1 point per 50 chars, max 15 + quality_score += readme_score + factors["readme_score"] = readme_score + + return { + "quality_score": quality_score, + "factors": factors, + "quality_tier": self._get_quality_tier(quality_score) + } + + def _get_quality_tier(self, score: int) -> str: + """Get quality tier based on score""" + if score >= 80: + return "excellent" + elif score >= 60: + return "good" + elif score >= 40: + return "fair" + elif score >= 20: + return "poor" + else: + return "very_poor" + + async def sync_cve_pocs(self, cve_id: str) -> dict: + """Synchronize PoC data for a specific CVE using GitHub PoC data""" + from main import CVE, SigmaRule + + # Get existing CVE + cve = self.db_session.query(CVE).filter(CVE.cve_id == cve_id).first() + if not cve: + logger.warning(f"CVE {cve_id} not found in database") + return {"error": "CVE not found"} + + # Load GitHub PoC data + github_poc_data = self.load_github_poc_data() + + if cve_id not in github_poc_data: + logger.info(f"No PoCs found for {cve_id} in GitHub PoC data") + return {"cve_id": cve_id, "pocs_found": 0} + + repos = github_poc_data[cve_id] + + # Process each repository + poc_data = [] + github_repos = [] + total_quality_score = 0 + + for repo_info in repos: + try: + # Fetch repository contents + repo_contents = await self.get_repository_contents(repo_info['html_url']) + + # Analyze for indicators + indicators = self.analyze_repository_for_indicators(repo_contents) + + # Calculate quality score + quality_analysis = self.calculate_quality_score(repo_info, repo_contents) + + poc_entry = { + "id": repo_info.get('name', ''), + "name": repo_info.get('name', ''), + "full_name": repo_info.get('full_name', ''), + "html_url": repo_info.get('html_url', ''), + "description": repo_info.get('description', ''), + "stargazers_count": repo_info.get('stargazers_count', 0), + "forks_count": repo_info.get('forks_count', 0), + "created_at": repo_info.get('created_at', ''), + "quality_analysis": quality_analysis, + "exploit_indicators": indicators, + "source": "mcdevitt_github" + } + + poc_data.append(poc_entry) + github_repos.append(repo_info.get('html_url', '')) + total_quality_score += quality_analysis["quality_score"] + + except Exception as e: + logger.error(f"Error processing repo {repo_info.get('html_url', '')}: {e}") + continue + + # Update CVE with PoC data + cve.poc_count = len(poc_data) + cve.poc_data = poc_data + cve.updated_at = datetime.utcnow() + + # Update or create SIGMA rule with enhanced PoC data + sigma_rule = self.db_session.query(SigmaRule).filter( + SigmaRule.cve_id == cve_id + ).first() + + if sigma_rule: + sigma_rule.poc_source = 'github_poc' + sigma_rule.poc_quality_score = total_quality_score // len(poc_data) if poc_data else 0 + sigma_rule.nomi_sec_data = { + "total_pocs": len(poc_data), + "average_quality": total_quality_score // len(poc_data) if poc_data else 0, + "best_poc": max(poc_data, key=lambda x: x["quality_analysis"]["quality_score"]) if poc_data else None, + "total_stars": sum(p["stargazers_count"] for p in poc_data), + "source": "github_poc" + } + sigma_rule.github_repos = github_repos + sigma_rule.updated_at = datetime.utcnow() + + # Extract best exploit indicators + best_indicators = {} + for poc in poc_data: + for key, values in poc["exploit_indicators"].items(): + if key not in best_indicators: + best_indicators[key] = [] + best_indicators[key].extend(values) + + # Deduplicate and store + for key in best_indicators: + best_indicators[key] = list(set(best_indicators[key])) + + sigma_rule.exploit_indicators = json.dumps(best_indicators) + + self.db_session.commit() + + logger.info(f"Synchronized {len(poc_data)} PoCs for {cve_id}") + + return { + "cve_id": cve_id, + "pocs_found": len(poc_data), + "total_quality_score": total_quality_score, + "average_quality": total_quality_score // len(poc_data) if poc_data else 0, + "github_repos": github_repos, + "source": "github_poc" + } + + async def bulk_sync_all_cves(self, batch_size: int = 50) -> dict: + """Bulk synchronize all CVEs with GitHub PoC data""" + from main import CVE, BulkProcessingJob + + # Load all GitHub PoC data first + github_poc_data = self.load_github_poc_data() + + if not github_poc_data: + return {"error": "No GitHub PoC data found"} + + # Create bulk processing job + job = BulkProcessingJob( + job_type='github_poc_sync', + status='running', + started_at=datetime.utcnow(), + total_items=len(github_poc_data), + job_metadata={'batch_size': batch_size} + ) + self.db_session.add(job) + self.db_session.commit() + + total_processed = 0 + total_found = 0 + results = [] + + try: + # Process each CVE that has PoC data + cve_ids = list(github_poc_data.keys()) + + for i in range(0, len(cve_ids), batch_size): + batch = cve_ids[i:i + batch_size] + + for cve_id in batch: + try: + result = await self.sync_cve_pocs(cve_id) + total_processed += 1 + + if result.get("pocs_found", 0) > 0: + total_found += result["pocs_found"] + results.append(result) + + job.processed_items += 1 + + # Small delay to avoid overwhelming GitHub API + await asyncio.sleep(1) + + except Exception as e: + logger.error(f"Error syncing PoCs for {cve_id}: {e}") + job.failed_items += 1 + + # Commit after each batch + self.db_session.commit() + logger.info(f"Processed batch {i//batch_size + 1}/{(len(cve_ids) + batch_size - 1)//batch_size}") + + # Update job status + job.status = 'completed' + job.completed_at = datetime.utcnow() + job.job_metadata.update({ + 'total_processed': total_processed, + 'total_pocs_found': total_found, + 'cves_with_pocs': len(results) + }) + + except Exception as e: + job.status = 'failed' + job.error_message = str(e) + job.completed_at = datetime.utcnow() + logger.error(f"Bulk McDevitt sync job failed: {e}") + + finally: + self.db_session.commit() + + return { + 'job_id': str(job.id), + 'status': job.status, + 'total_processed': total_processed, + 'total_pocs_found': total_found, + 'cves_with_pocs': len(results) + } \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index e6440fd..ead22b2 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -14,3 +14,4 @@ beautifulsoup4==4.12.2 lxml==4.9.3 aiohttp==3.9.1 aiofiles +pyyaml==6.0.1 diff --git a/backend/templates/README.md b/backend/templates/README.md new file mode 100644 index 0000000..864e53b --- /dev/null +++ b/backend/templates/README.md @@ -0,0 +1,128 @@ +# SIGMA Rule Templates + +This directory contains YAML template files for generating SIGMA rules automatically based on CVE and PoC data. + +## Template Structure + +Each template file is a YAML file with the following structure: + +```yaml +template_name: "Template Name" +description: "Description of what this template detects" +applicable_product_patterns: + - "pattern1" + - "pattern2" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + # ... SIGMA rule content with placeholders +``` + +## Template Fields + +- **template_name**: Human-readable name for the template +- **description**: What the template is designed to detect +- **applicable_product_patterns**: List of keywords that help match CVEs to this template +- **template_content**: The actual SIGMA rule template with placeholders + +## Placeholders + +Templates use the following placeholders that get replaced during rule generation: + +- `{{TITLE}}`: Rule title based on CVE +- `{{RULE_ID}}`: Unique rule identifier +- `{{DESCRIPTION}}`: CVE description and context +- `{{DATE}}`: Rule creation date +- `{{REFERENCES}}`: CVE references and PoC URLs +- `{{TAGS}}`: Relevant tags based on CVE data +- `{{LEVEL}}`: Severity level (low, medium, high, critical) +- `{{PROCESSES}}`: Process names from PoC analysis +- `{{COMMANDS}}`: Command patterns from PoC analysis +- `{{FILES}}`: File paths from PoC analysis +- `{{NETWORK}}`: Network indicators from PoC analysis +- `{{URLS}}`: URL patterns from PoC analysis +- `{{REGISTRY}}`: Registry keys from PoC analysis + +## Available Templates + +### Core Categories +1. **process_execution.yaml** - Process creation and execution +2. **network_connection.yaml** - Network connections and communications +3. **file_system_activity.yaml** - File system operations +4. **registry_modification.yaml** - Windows registry changes + +### Attack Techniques +5. **powershell_execution.yaml** - PowerShell-based attacks +6. **web_application_attack.yaml** - Web application vulnerabilities +7. **command_injection.yaml** - Command injection attacks +8. **privilege_escalation.yaml** - Privilege escalation attempts +9. **credential_access.yaml** - Credential theft and access +10. **persistence.yaml** - Persistence mechanisms +11. **lateral_movement.yaml** - Lateral movement techniques +12. **service_manipulation.yaml** - Windows service manipulation + +## Usage + +### Initialize Templates +```bash +# Load all templates into the database +docker-compose exec backend python initialize_templates.py + +# List available templates +docker-compose exec backend python initialize_templates.py list +``` + +### Adding New Templates + +1. Create a new YAML file in this directory +2. Follow the template structure above +3. Choose appropriate `applicable_product_patterns` keywords +4. Use relevant placeholders in your `template_content` +5. Run the initialization script to load into database + +### Template Matching + +Templates are automatically matched to CVEs based on: +- Product patterns matching CVE affected products +- PoC analysis indicators +- CVE description keywords + +## Best Practices + +1. **Specific Patterns**: Use specific product patterns for better matching +2. **False Positives**: Include realistic false positive scenarios +3. **Conditions**: Use appropriate SIGMA detection conditions +4. **Log Sources**: Match log sources to the type of activity being detected +5. **Severity**: Use appropriate severity levels based on the attack impact + +## Example Template + +```yaml +template_name: "Example Detection" +description: "Detects example malicious activity" +applicable_product_patterns: + - "example" + - "software" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: process_creation + product: windows + detection: + selection: + Image|endswith: + {{PROCESSES}} + condition: selection + falsepositives: + - Legitimate use cases + level: {{LEVEL}} +``` \ No newline at end of file diff --git a/backend/templates/command_injection.yaml b/backend/templates/command_injection.yaml new file mode 100644 index 0000000..988fc6d --- /dev/null +++ b/backend/templates/command_injection.yaml @@ -0,0 +1,44 @@ +template_name: "Command Injection Detection" +description: "Detects command injection attempts based on PoC exploit indicators" +applicable_product_patterns: + - "injection" + - "command" + - "shell" + - "exec" + - "system" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: process_creation + product: windows + detection: + selection_shell: + Image|endswith: + - '\\cmd.exe' + - '\\powershell.exe' + - '\\bash.exe' + - '\\sh.exe' + selection_injection: + CommandLine|contains: + {{COMMANDS}} + selection_patterns: + CommandLine|contains: + - '&' + - '|' + - ';' + - '`' + - '$(' + condition: (selection_shell and selection_injection) or selection_patterns + falsepositives: + - Legitimate command line usage + - System administration scripts + level: {{LEVEL}} \ No newline at end of file diff --git a/backend/templates/credential_access.yaml b/backend/templates/credential_access.yaml new file mode 100644 index 0000000..759b56c --- /dev/null +++ b/backend/templates/credential_access.yaml @@ -0,0 +1,39 @@ +template_name: "Credential Access Detection" +description: "Detects credential access attempts based on PoC exploit indicators" +applicable_product_patterns: + - "credential" + - "password" + - "hash" + - "dump" + - "lsass" + - "mimikatz" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: process_creation + product: windows + detection: + selection_lsass: + Image|contains: + - 'lsass' + - 'mimikatz' + selection_creds: + CommandLine|contains: + {{COMMANDS}} + selection_files: + TargetFilename|contains: + {{FILES}} + condition: selection_lsass or selection_creds or selection_files + falsepositives: + - Legitimate authentication processes + - Password management software + level: {{LEVEL}} \ No newline at end of file diff --git a/backend/templates/file_system_activity.yaml b/backend/templates/file_system_activity.yaml new file mode 100644 index 0000000..ef7c1b4 --- /dev/null +++ b/backend/templates/file_system_activity.yaml @@ -0,0 +1,30 @@ +template_name: "File System Activity Detection" +description: "Detects suspicious file system activity based on PoC exploit indicators" +applicable_product_patterns: + - "file" + - "filesystem" + - "upload" + - "download" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: file_event + product: windows + detection: + selection: + TargetFilename|contains: + {{FILES}} + condition: selection + falsepositives: + - Legitimate file operations + - Software installations + level: {{LEVEL}} \ No newline at end of file diff --git a/backend/templates/lateral_movement.yaml b/backend/templates/lateral_movement.yaml new file mode 100644 index 0000000..98daacf --- /dev/null +++ b/backend/templates/lateral_movement.yaml @@ -0,0 +1,42 @@ +template_name: "Lateral Movement Detection" +description: "Detects lateral movement attempts based on PoC exploit indicators" +applicable_product_patterns: + - "lateral" + - "movement" + - "remote" + - "psexec" + - "wmic" + - "rdp" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: process_creation + product: windows + detection: + selection_psexec: + Image|endswith: '\\psexec.exe' + CommandLine|contains: + {{COMMANDS}} + selection_wmic: + Image|endswith: '\\wmic.exe' + CommandLine|contains: + - 'process' + - 'call' + - 'create' + selection_remote: + CommandLine|contains: + {{NETWORK}} + condition: selection_psexec or selection_wmic or selection_remote + falsepositives: + - Legitimate remote administration + - System management tools + level: {{LEVEL}} \ No newline at end of file diff --git a/backend/templates/network_connection.yaml b/backend/templates/network_connection.yaml new file mode 100644 index 0000000..5151351 --- /dev/null +++ b/backend/templates/network_connection.yaml @@ -0,0 +1,36 @@ +template_name: "Network Connection Detection" +description: "Detects suspicious network connections based on PoC exploit indicators" +applicable_product_patterns: + - "network" + - "web" + - "http" + - "https" + - "tcp" + - "udp" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: network_connection + product: windows + detection: + selection: + Initiated: true + DestinationIp: + {{NETWORK}} + selection_url: + DestinationHostname|contains: + {{URLS}} + condition: selection or selection_url + falsepositives: + - Legitimate network connections + - Software updates + level: {{LEVEL}} \ No newline at end of file diff --git a/backend/templates/persistence.yaml b/backend/templates/persistence.yaml new file mode 100644 index 0000000..a9c53a7 --- /dev/null +++ b/backend/templates/persistence.yaml @@ -0,0 +1,40 @@ +template_name: "Persistence Detection" +description: "Detects persistence mechanisms based on PoC exploit indicators" +applicable_product_patterns: + - "persistence" + - "startup" + - "autorun" + - "scheduled" + - "task" + - "cron" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: process_creation + product: windows + detection: + selection_schtasks: + Image|endswith: '\\schtasks.exe' + CommandLine|contains: + {{COMMANDS}} + selection_startup: + TargetFilename|contains: + - '\\Startup\\' + - '\\Start Menu\\' + selection_registry: + TargetObject|contains: + {{REGISTRY}} + condition: selection_schtasks or selection_startup or selection_registry + falsepositives: + - Legitimate software installations + - System configuration changes + level: {{LEVEL}} \ No newline at end of file diff --git a/backend/templates/powershell_execution.yaml b/backend/templates/powershell_execution.yaml new file mode 100644 index 0000000..bdb2ef7 --- /dev/null +++ b/backend/templates/powershell_execution.yaml @@ -0,0 +1,32 @@ +template_name: "PowerShell Execution Detection" +description: "Detects suspicious PowerShell execution based on PoC exploit indicators" +applicable_product_patterns: + - "powershell" + - "windows" + - "microsoft" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: process_creation + product: windows + detection: + selection: + Image|endswith: + - '\\powershell.exe' + - '\\pwsh.exe' + CommandLine|contains: + {{COMMANDS}} + condition: selection + falsepositives: + - Legitimate PowerShell scripts + - System administration + level: {{LEVEL}} \ No newline at end of file diff --git a/backend/templates/privilege_escalation.yaml b/backend/templates/privilege_escalation.yaml new file mode 100644 index 0000000..ea61b5c --- /dev/null +++ b/backend/templates/privilege_escalation.yaml @@ -0,0 +1,41 @@ +template_name: "Privilege Escalation Detection" +description: "Detects privilege escalation attempts based on PoC exploit indicators" +applicable_product_patterns: + - "privilege" + - "escalation" + - "admin" + - "root" + - "sudo" + - "runas" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: process_creation + product: windows + detection: + selection_runas: + Image|endswith: '\\runas.exe' + CommandLine|contains: + {{COMMANDS}} + selection_whoami: + Image|endswith: '\\whoami.exe' + CommandLine|contains: + - '/priv' + - '/groups' + selection_exploit: + CommandLine|contains: + {{PROCESSES}} + condition: selection_runas or selection_whoami or selection_exploit + falsepositives: + - Legitimate administrative tasks + - System diagnostics + level: {{LEVEL}} \ No newline at end of file diff --git a/backend/templates/process_execution.yaml b/backend/templates/process_execution.yaml new file mode 100644 index 0000000..d74af34 --- /dev/null +++ b/backend/templates/process_execution.yaml @@ -0,0 +1,34 @@ +template_name: "Process Execution Detection" +description: "Detects suspicious process execution based on PoC exploit indicators" +applicable_product_patterns: + - "windows" + - "microsoft" + - "office" + - "exchange" + - "sharepoint" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: process_creation + product: windows + detection: + selection: + Image|endswith: + {{PROCESSES}} + selection_cmd: + CommandLine|contains: + {{COMMANDS}} + condition: selection or selection_cmd + falsepositives: + - Legitimate software installations + - System administration tasks + level: {{LEVEL}} \ No newline at end of file diff --git a/backend/templates/registry_modification.yaml b/backend/templates/registry_modification.yaml new file mode 100644 index 0000000..f173dd9 --- /dev/null +++ b/backend/templates/registry_modification.yaml @@ -0,0 +1,29 @@ +template_name: "Registry Modification Detection" +description: "Detects suspicious registry modifications based on PoC exploit indicators" +applicable_product_patterns: + - "registry" + - "windows" + - "microsoft" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: registry_event + product: windows + detection: + selection: + TargetObject|contains: + {{REGISTRY}} + condition: selection + falsepositives: + - Legitimate software configuration changes + - System updates + level: {{LEVEL}} \ No newline at end of file diff --git a/backend/templates/service_manipulation.yaml b/backend/templates/service_manipulation.yaml new file mode 100644 index 0000000..308b621 --- /dev/null +++ b/backend/templates/service_manipulation.yaml @@ -0,0 +1,42 @@ +template_name: "Service Manipulation Detection" +description: "Detects suspicious Windows service manipulation based on PoC exploit indicators" +applicable_product_patterns: + - "service" + - "windows" + - "microsoft" + - "sc.exe" + - "net.exe" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: process_creation + product: windows + detection: + selection_sc: + Image|endswith: '\\sc.exe' + CommandLine|contains: + {{COMMANDS}} + selection_net: + Image|endswith: '\\net.exe' + CommandLine|contains: + - 'start' + - 'stop' + - 'pause' + - 'continue' + selection_service: + CommandLine|contains: + {{PROCESSES}} + condition: selection_sc or selection_net or selection_service + falsepositives: + - Legitimate system administration + - Software installations + level: {{LEVEL}} \ No newline at end of file diff --git a/backend/templates/web_application_attack.yaml b/backend/templates/web_application_attack.yaml new file mode 100644 index 0000000..a0942a6 --- /dev/null +++ b/backend/templates/web_application_attack.yaml @@ -0,0 +1,33 @@ +template_name: "Web Application Attack Detection" +description: "Detects web application attacks based on PoC exploit indicators" +applicable_product_patterns: + - "web" + - "http" + - "apache" + - "nginx" + - "iis" +template_content: | + title: {{TITLE}} + id: {{RULE_ID}} + status: experimental + description: {{DESCRIPTION}} + author: CVE-SIGMA Auto Generator + date: {{DATE}} + references: + {{REFERENCES}} + tags: + {{TAGS}} + logsource: + category: webserver + detection: + selection: + cs-uri-query|contains: + {{URLS}} + selection_user_agent: + cs-user-agent|contains: + {{COMMANDS}} + condition: selection or selection_user_agent + falsepositives: + - Legitimate web application usage + - Security scanners + level: {{LEVEL}} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 39d185a..af060de 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,6 +29,7 @@ services: condition: service_healthy volumes: - ./backend:/app + - ./github_poc_collector:/github_poc_collector command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload frontend: diff --git a/frontend/src/App.js b/frontend/src/App.js index b7a540a..1515779 100644 --- a/frontend/src/App.js +++ b/frontend/src/App.js @@ -18,7 +18,9 @@ function App() { const [bulkJobs, setBulkJobs] = useState([]); const [bulkStatus, setBulkStatus] = useState({}); const [pocStats, setPocStats] = useState({}); + const [gitHubPocStats, setGitHubPocStats] = useState({}); const [bulkProcessing, setBulkProcessing] = useState(false); + const [hasRunningJobs, setHasRunningJobs] = useState(false); useEffect(() => { fetchData(); @@ -27,13 +29,14 @@ function App() { const fetchData = async () => { try { setLoading(true); - const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes] = await Promise.all([ + const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes, githubPocStatsRes] = await Promise.all([ axios.get(`${API_BASE_URL}/api/cves`), axios.get(`${API_BASE_URL}/api/sigma-rules`), axios.get(`${API_BASE_URL}/api/stats`), axios.get(`${API_BASE_URL}/api/bulk-jobs`), axios.get(`${API_BASE_URL}/api/bulk-status`), - axios.get(`${API_BASE_URL}/api/poc-stats`) + axios.get(`${API_BASE_URL}/api/poc-stats`), + axios.get(`${API_BASE_URL}/api/github-poc-stats`).catch(err => ({ data: {} })) ]); setCves(cvesRes.data); @@ -42,6 +45,11 @@ function App() { setBulkJobs(bulkJobsRes.data); setBulkStatus(bulkStatusRes.data); setPocStats(pocStatsRes.data); + setGitHubPocStats(githubPocStatsRes.data); + + // Update running jobs state + const runningJobs = bulkJobsRes.data.filter(job => job.status === 'running' || job.status === 'pending'); + setHasRunningJobs(runningJobs.length > 0); } catch (error) { console.error('Error fetching data:', error); } finally { @@ -99,68 +107,62 @@ function App() { const startBulkSeed = async (startYear = 2020, endYear = null) => { try { - setBulkProcessing(true); const response = await axios.post(`${API_BASE_URL}/api/bulk-seed`, { start_year: startYear, - end_year: endYear + end_year: endYear, + skip_nomi_sec: true }); console.log('Bulk seed response:', response.data); - // Refresh data after starting - setTimeout(() => { - fetchData(); - }, 2000); + // Refresh data immediately to show job started + fetchData(); } catch (error) { console.error('Error starting bulk seed:', error); - setBulkProcessing(false); } }; const startIncrementalUpdate = async () => { try { - setBulkProcessing(true); const response = await axios.post(`${API_BASE_URL}/api/incremental-update`); console.log('Incremental update response:', response.data); - setTimeout(() => { - fetchData(); - setBulkProcessing(false); - }, 2000); + fetchData(); } catch (error) { console.error('Error starting incremental update:', error); - setBulkProcessing(false); } }; const syncNomiSec = async (cveId = null) => { try { - setBulkProcessing(true); const response = await axios.post(`${API_BASE_URL}/api/sync-nomi-sec`, { cve_id: cveId }); console.log('Nomi-sec sync response:', response.data); - setTimeout(() => { - fetchData(); - setBulkProcessing(false); - }, 2000); + fetchData(); } catch (error) { console.error('Error syncing nomi-sec:', error); - setBulkProcessing(false); + } + }; + + const syncGitHubPocs = async (cveId = null) => { + try { + const response = await axios.post(`${API_BASE_URL}/api/sync-github-pocs`, { + cve_id: cveId + }); + console.log('GitHub PoC sync response:', response.data); + fetchData(); + } catch (error) { + console.error('Error syncing GitHub PoCs:', error); } }; const regenerateRules = async (force = false) => { try { - setBulkProcessing(true); const response = await axios.post(`${API_BASE_URL}/api/regenerate-rules`, { force: force }); console.log('Rule regeneration response:', response.data); - setTimeout(() => { - fetchData(); - setBulkProcessing(false); - }, 2000); + fetchData(); } catch (error) { console.error('Error regenerating rules:', error); - setBulkProcessing(false); } }; @@ -194,11 +196,13 @@ function App() {

SIGMA Rules

{stats.total_sigma_rules || 0}

Nomi-sec: {stats.nomi_sec_rules || 0}

+

GitHub PoCs: {gitHubPocStats.github_poc_rules || 0}

CVEs with PoCs

{stats.cves_with_pocs || 0}

{(stats.poc_coverage || 0).toFixed(1)}% coverage

+

GitHub PoCs: {gitHubPocStats.cves_with_github_pocs || 0}

Recent CVEs (7d)

@@ -208,6 +212,7 @@ function App() {

High Quality PoCs

{pocStats.high_quality_cves || 0}

Avg: {(pocStats.avg_poc_count || 0).toFixed(1)}

+

GitHub PoCs Avg: {(gitHubPocStats.average_quality_score || 0).toFixed(1)}

@@ -217,47 +222,58 @@ function App() {
+
diff --git a/github_poc_collector b/github_poc_collector new file mode 160000 index 0000000..5c171fb --- /dev/null +++ b/github_poc_collector @@ -0,0 +1 @@ +Subproject commit 5c171fb9a97e214ddcdea82054c97e09438504c3