added git submodule for more exploits. added template dir for base yaml templates for sigma rules

2025-07-09 11:58:29 -05:00 · 2025-07-09 11:58:29 -05:00 · 455a46c88f
commit 455a46c88f
parent cfaad8b359
22 changed files with 1475 additions and 250 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
+[submodule "github_poc_collector"]
+	path = github_poc_collector
+	url = https://git.mcdevitt.tech/bpmcdevitt/github_poc_collector
--- a/backend/enhanced_sigma_generator.py
+++ b/backend/enhanced_sigma_generator.py
@ -60,13 +60,14 @@ class EnhancedSigmaGenerator:
                'confidence_level': confidence_level,
                'auto_generated': True,
                'exploit_based': len(poc_data) > 0,
-                'poc_source': 'nomi_sec',
+                'poc_source': getattr(cve, 'poc_source', 'nomi_sec'),
                'poc_quality_score': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0,
                'nomi_sec_data': {
                    'total_pocs': len(poc_data),
                    'best_poc_quality': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0,
                    'total_stars': sum(p.get('stargazers_count', 0) for p in poc_data),
-                    'avg_stars': sum(p.get('stargazers_count', 0) for p in poc_data) / len(poc_data) if poc_data else 0
+                    'avg_stars': sum(p.get('stargazers_count', 0) for p in poc_data) / len(poc_data) if poc_data else 0,
+                    'source': getattr(cve, 'poc_source', 'nomi_sec')
                },
                'github_repos': [p.get('html_url', '') for p in poc_data],
                'exploit_indicators': json.dumps(self._combine_exploit_indicators(poc_data)),
--- a/backend/initialize_templates.py
+++ b/backend/initialize_templates.py
@ -4,200 +4,53 @@ Initialize SIGMA rule templates for enhanced rule generation
 """

 import json
+import yaml
+import os
+from pathlib import Path
 from datetime import datetime
 from main import SessionLocal, RuleTemplate, Base, engine

 # Create tables if they don't exist
 Base.metadata.create_all(bind=engine)

-# Template definitions with actual SIGMA rule content
-SIGMA_TEMPLATES = [
-    {
-        "template_name": "Process Execution Detection",
-        "template_content": """title: {{TITLE}}
-id: {{RULE_ID}}
-status: experimental
-description: {{DESCRIPTION}}
-author: CVE-SIGMA Auto Generator
-date: {{DATE}}
-references:
-{{REFERENCES}}
-tags:
-{{TAGS}}
-logsource:
-    category: process_creation
-    product: windows
-detection:
-    selection:
-        Image|endswith:
-{{PROCESSES}}
-    selection_cmd:
-        CommandLine|contains:
-{{COMMANDS}}
-    condition: selection or selection_cmd
-falsepositives:
-    - Legitimate software installations
-    - System administration tasks
-level: {{LEVEL}}""",
-        "applicable_product_patterns": ["windows", "microsoft", "office", "exchange", "sharepoint"],
-        "description": "Detects suspicious process execution based on PoC exploit indicators"
-    },
-    {
-        "template_name": "Network Connection Detection",
-        "template_content": """title: {{TITLE}}
-id: {{RULE_ID}}
-status: experimental
-description: {{DESCRIPTION}}
-author: CVE-SIGMA Auto Generator
-date: {{DATE}}
-references:
-{{REFERENCES}}
-tags:
-{{TAGS}}
-logsource:
-    category: network_connection
-    product: windows
-detection:
-    selection:
-        Initiated: true
-        DestinationIp:
-{{NETWORK}}
-    selection_url:
-        DestinationHostname|contains:
-{{URLS}}
-    condition: selection or selection_url
-falsepositives:
-    - Legitimate network connections
-    - Software updates
-level: {{LEVEL}}""",
-        "applicable_product_patterns": ["network", "web", "http", "https", "tcp", "udp"],
-        "description": "Detects suspicious network connections based on PoC exploit indicators"
-    },
-    {
-        "template_name": "File System Activity Detection",
-        "template_content": """title: {{TITLE}}
-id: {{RULE_ID}}
-status: experimental
-description: {{DESCRIPTION}}
-author: CVE-SIGMA Auto Generator
-date: {{DATE}}
-references:
-{{REFERENCES}}
-tags:
-{{TAGS}}
-logsource:
-    category: file_event
-    product: windows
-detection:
-    selection:
-        TargetFilename|contains:
-{{FILES}}
-    condition: selection
-falsepositives:
-    - Legitimate file operations
-    - Software installations
-level: {{LEVEL}}""",
-        "applicable_product_patterns": ["file", "filesystem", "upload", "download"],
-        "description": "Detects suspicious file system activity based on PoC exploit indicators"
-    },
-    {
-        "template_name": "PowerShell Execution Detection",
-        "template_content": """title: {{TITLE}}
-id: {{RULE_ID}}
-status: experimental
-description: {{DESCRIPTION}}
-author: CVE-SIGMA Auto Generator
-date: {{DATE}}
-references:
-{{REFERENCES}}
-tags:
-{{TAGS}}
-logsource:
-    category: process_creation
-    product: windows
-detection:
-    selection:
-        Image|endswith:
-            - '\\powershell.exe'
-            - '\\pwsh.exe'
-        CommandLine|contains:
-{{COMMANDS}}
-    condition: selection
-falsepositives:
-    - Legitimate PowerShell scripts
-    - System administration
-level: {{LEVEL}}""",
-        "applicable_product_patterns": ["powershell", "windows", "microsoft"],
-        "description": "Detects suspicious PowerShell execution based on PoC exploit indicators"
-    },
-    {
-        "template_name": "Web Application Attack Detection",
-        "template_content": """title: {{TITLE}}
-id: {{RULE_ID}}
-status: experimental
-description: {{DESCRIPTION}}
-author: CVE-SIGMA Auto Generator
-date: {{DATE}}
-references:
-{{REFERENCES}}
-tags:
-{{TAGS}}
-logsource:
-    category: webserver
-detection:
-    selection:
-        cs-uri-query|contains:
-{{URLS}}
-    selection_user_agent:
-        cs-user-agent|contains:
-{{COMMANDS}}
-    condition: selection or selection_user_agent
-falsepositives:
-    - Legitimate web application usage
-    - Security scanners
-level: {{LEVEL}}""",
-        "applicable_product_patterns": ["web", "http", "apache", "nginx", "iis"],
-        "description": "Detects web application attacks based on PoC exploit indicators"
-    },
-    {
-        "template_name": "Registry Modification Detection",
-        "template_content": """title: {{TITLE}}
-id: {{RULE_ID}}
-status: experimental
-description: {{DESCRIPTION}}
-author: CVE-SIGMA Auto Generator
-date: {{DATE}}
-references:
-{{REFERENCES}}
-tags:
-{{TAGS}}
-logsource:
-    category: registry_event
-    product: windows
-detection:
-    selection:
-        TargetObject|contains:
-{{REGISTRY}}
-    condition: selection
-falsepositives:
-    - Legitimate software configuration changes
-    - System updates
-level: {{LEVEL}}""",
-        "applicable_product_patterns": ["registry", "windows", "microsoft"],
-        "description": "Detects suspicious registry modifications based on PoC exploit indicators"
-    }
-]
+def load_templates_from_files():
+    """Load SIGMA rule templates from YAML files in the templates directory"""
+    templates = []
+    templates_dir = Path(__file__).parent / "templates"
+    
+    if not templates_dir.exists():
+        print(f"Templates directory not found: {templates_dir}")
+        return templates
+    
+    # Load all YAML files from templates directory
+    for template_file in templates_dir.glob("*.yaml"):
+        try:
+            with open(template_file, 'r', encoding='utf-8') as f:
+                template_data = yaml.safe_load(f)
+                templates.append(template_data)
+                print(f"Loaded template: {template_data['template_name']}")
+        except Exception as e:
+            print(f"Error loading template from {template_file}: {e}")
+    
+    return templates

 def initialize_templates():
    """Initialize rule templates in the database"""
    db = SessionLocal()
    
    try:
+        # Load templates from YAML files
+        templates = load_templates_from_files()
+        
+        if not templates:
+            print("No templates found to initialize")
+            return
+        
        # Clear existing templates
        db.query(RuleTemplate).delete()
        
        # Add new templates
-        for template_data in SIGMA_TEMPLATES:
+        for template_data in templates:
            template = RuleTemplate(
                template_name=template_data["template_name"],
                template_content=template_data["template_content"],
@ -207,7 +60,7 @@ def initialize_templates():
            db.add(template)
        
        db.commit()
-        print(f"Successfully initialized {len(SIGMA_TEMPLATES)} rule templates")
+        print(f"Successfully initialized {len(templates)} rule templates")
        
    except Exception as e:
        db.rollback()
@ -216,5 +69,27 @@ def initialize_templates():
    finally:
        db.close()

+def list_available_templates():
+    """List all available template files and their details"""
+    templates = load_templates_from_files()
+    
+    if not templates:
+        print("No templates found")
+        return
+    
+    print(f"\nFound {len(templates)} available templates:")
+    print("=" * 60)
+    
+    for i, template in enumerate(templates, 1):
+        print(f"{i}. {template['template_name']}")
+        print(f"   Description: {template['description']}")
+        print(f"   Applicable patterns: {', '.join(template['applicable_product_patterns'])}")
+        print()
+
 if __name__ == "__main__":
-    initialize_templates()
+    import sys
+    
+    if len(sys.argv) > 1 and sys.argv[1] == "list":
+        list_available_templates()
+    else:
+        initialize_templates()
--- a/backend/main.py
+++ b/backend/main.py
@ -21,6 +21,7 @@ from urllib.parse import urlparse
 import hashlib
 import logging
 import threading
+from mcdevitt_poc_client import GitHubPoCClient

 # Setup logging
 logging.basicConfig(level=logging.INFO)
@ -138,6 +139,24 @@ class SigmaRuleResponse(BaseModel):
    class Config:
        from_attributes = True

+# Request models
+class BulkSeedRequest(BaseModel):
+    start_year: int = 2002
+    end_year: Optional[int] = None
+    skip_nvd: bool = False
+    skip_nomi_sec: bool = True
+
+class NomiSecSyncRequest(BaseModel):
+    cve_id: Optional[str] = None
+    batch_size: int = 50
+
+class GitHubPoCSyncRequest(BaseModel):
+    cve_id: Optional[str] = None
+    batch_size: int = 50
+
+class RuleRegenRequest(BaseModel):
+    force: bool = False
+
 # GitHub Exploit Analysis Service
 class GitHubExploitAnalyzer:
    def __init__(self):
@ -768,11 +787,11 @@ async def background_cve_fetch():

@asynccontextmanager
 async def lifespan(app: FastAPI):
-    # Start background task
-    task = asyncio.create_task(background_cve_fetch())
+    # Background CVE fetching disabled - use manual fetch endpoint instead
+    # task = asyncio.create_task(background_cve_fetch())
    yield
    # Clean up
-    task.cancel()
+    # task.cancel()

 # FastAPI app
 app = FastAPI(title="CVE-SIGMA Auto Generator", lifespan=lifespan)
@ -998,10 +1017,7 @@ async def get_stats(db: Session = Depends(get_db)):
 # New bulk processing endpoints
@app.post("/api/bulk-seed")
 async def start_bulk_seed(background_tasks: BackgroundTasks, 
-                         start_year: int = 2002, 
-                         end_year: Optional[int] = None,
-                         skip_nvd: bool = False,
-                         skip_nomi_sec: bool = False,
+                         request: BulkSeedRequest,
                         db: Session = Depends(get_db)):
    """Start bulk seeding process"""
    
@ -1010,10 +1026,10 @@ async def start_bulk_seed(background_tasks: BackgroundTasks,
            from bulk_seeder import BulkSeeder
            seeder = BulkSeeder(db)
            result = await seeder.full_bulk_seed(
-                start_year=start_year,
-                end_year=end_year,
-                skip_nvd=skip_nvd,
-                skip_nomi_sec=skip_nomi_sec
+                start_year=request.start_year,
+                end_year=request.end_year,
+                skip_nvd=request.skip_nvd,
+                skip_nomi_sec=request.skip_nomi_sec
            )
            logger.info(f"Bulk seed completed: {result}")
        except Exception as e:
@ -1026,10 +1042,10 @@ async def start_bulk_seed(background_tasks: BackgroundTasks,
    return {
        "message": "Bulk seeding process started",
        "status": "started",
-        "start_year": start_year,
-        "end_year": end_year or datetime.now().year,
-        "skip_nvd": skip_nvd,
-        "skip_nomi_sec": skip_nomi_sec
+        "start_year": request.start_year,
+        "end_year": request.end_year or datetime.now().year,
+        "skip_nvd": request.skip_nvd,
+        "skip_nomi_sec": request.skip_nomi_sec
    }

@app.post("/api/incremental-update")
@ -1056,8 +1072,7 @@ async def start_incremental_update(background_tasks: BackgroundTasks, db: Sessio

@app.post("/api/sync-nomi-sec")
 async def sync_nomi_sec(background_tasks: BackgroundTasks, 
-                       cve_id: Optional[str] = None,
-                       batch_size: int = 50,
+                       request: NomiSecSyncRequest,
                       db: Session = Depends(get_db)):
    """Synchronize nomi-sec PoC data"""
    
@ -1066,8 +1081,8 @@ async def sync_nomi_sec(background_tasks: BackgroundTasks,
        job_type='nomi_sec_sync',
        status='pending',
        job_metadata={
-            'cve_id': cve_id,
-            'batch_size': batch_size
+            'cve_id': request.cve_id,
+            'batch_size': request.batch_size
        }
    )
    db.add(job)
@ -1087,18 +1102,18 @@ async def sync_nomi_sec(background_tasks: BackgroundTasks,
            from nomi_sec_client import NomiSecClient
            client = NomiSecClient(db)
            
-            if cve_id:
+            if request.cve_id:
                # Sync specific CVE
                if job_cancellation_flags.get(job_id, False):
                    logger.info(f"Job {job_id} cancelled before starting")
                    return
                    
-                result = await client.sync_cve_pocs(cve_id)
-                logger.info(f"Nomi-sec sync for {cve_id}: {result}")
+                result = await client.sync_cve_pocs(request.cve_id)
+                logger.info(f"Nomi-sec sync for {request.cve_id}: {result}")
            else:
                # Sync all CVEs with cancellation support
                result = await client.bulk_sync_all_cves(
-                    batch_size=batch_size,
+                    batch_size=request.batch_size,
                    cancellation_flag=lambda: job_cancellation_flags.get(job_id, False)
                )
                logger.info(f"Nomi-sec bulk sync completed: {result}")
@ -1127,13 +1142,162 @@ async def sync_nomi_sec(background_tasks: BackgroundTasks,
    background_tasks.add_task(sync_task)
    
    return {
-        "message": f"Nomi-sec sync started" + (f" for {cve_id}" if cve_id else " for all CVEs"),
+        "message": f"Nomi-sec sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"),
        "status": "started",
        "job_id": job_id,
-        "cve_id": cve_id,
-        "batch_size": batch_size
+        "cve_id": request.cve_id,
+        "batch_size": request.batch_size
    }

+@app.post("/api/sync-github-pocs")
+async def sync_github_pocs(background_tasks: BackgroundTasks, 
+                          request: GitHubPoCSyncRequest,
+                          db: Session = Depends(get_db)):
+    """Synchronize GitHub PoC data"""
+    
+    # Create job record
+    job = BulkProcessingJob(
+        job_type='github_poc_sync',
+        status='pending',
+        job_metadata={
+            'cve_id': request.cve_id,
+            'batch_size': request.batch_size
+        }
+    )
+    db.add(job)
+    db.commit()
+    db.refresh(job)
+    
+    job_id = str(job.id)
+    running_jobs[job_id] = job
+    job_cancellation_flags[job_id] = False
+    
+    async def sync_task():
+        try:
+            job.status = 'running'
+            job.started_at = datetime.utcnow()
+            db.commit()
+            
+            client = GitHubPoCClient(db)
+            
+            if request.cve_id:
+                # Sync specific CVE
+                if job_cancellation_flags.get(job_id, False):
+                    logger.info(f"Job {job_id} cancelled before starting")
+                    return
+                    
+                result = await client.sync_cve_pocs(request.cve_id)
+                logger.info(f"GitHub PoC sync for {request.cve_id}: {result}")
+            else:
+                # Sync all CVEs with cancellation support
+                result = await client.bulk_sync_all_cves(batch_size=request.batch_size)
+                logger.info(f"GitHub PoC bulk sync completed: {result}")
+            
+            # Update job status if not cancelled
+            if not job_cancellation_flags.get(job_id, False):
+                job.status = 'completed'
+                job.completed_at = datetime.utcnow()
+                db.commit()
+                
+        except Exception as e:
+            if not job_cancellation_flags.get(job_id, False):
+                job.status = 'failed'
+                job.error_message = str(e)
+                job.completed_at = datetime.utcnow()
+                db.commit()
+                
+            logger.error(f"GitHub PoC sync failed: {e}")
+            import traceback
+            traceback.print_exc()
+        finally:
+            # Clean up tracking
+            running_jobs.pop(job_id, None)
+            job_cancellation_flags.pop(job_id, None)
+    
+    background_tasks.add_task(sync_task)
+    
+    return {
+        "message": f"GitHub PoC sync started" + (f" for {request.cve_id}" if request.cve_id else " for all CVEs"),
+        "status": "started",
+        "job_id": job_id,
+        "cve_id": request.cve_id,
+        "batch_size": request.batch_size
+    }
+
+@app.get("/api/github-poc-stats")
+async def get_github_poc_stats(db: Session = Depends(get_db)):
+    """Get GitHub PoC-related statistics"""
+    
+    try:
+        # Get basic statistics
+        github_poc_rules = db.query(SigmaRule).filter(SigmaRule.poc_source == 'github_poc').count()
+        cves_with_github_pocs = db.query(CVE).filter(
+            CVE.poc_data.isnot(None),  # Check if poc_data exists
+            func.json_extract_path_text(CVE.poc_data, '0', 'source') == 'github_poc'
+        ).count()
+        
+        # Get quality distribution
+        quality_distribution = {}
+        try:
+            quality_results = db.query(
+                func.json_extract_path_text(CVE.poc_data, '0', 'quality_analysis', 'quality_tier').label('tier'),
+                func.count().label('count')
+            ).filter(
+                CVE.poc_data.isnot(None),
+                func.json_extract_path_text(CVE.poc_data, '0', 'source') == 'github_poc'
+            ).group_by('tier').all()
+            
+            for tier, count in quality_results:
+                if tier:
+                    quality_distribution[tier] = count
+        except Exception as e:
+            logger.warning(f"Error getting quality distribution: {e}")
+            quality_distribution = {}
+        
+        # Calculate average quality score
+        try:
+            avg_quality = db.query(
+                func.avg(func.json_extract_path_text(CVE.poc_data, '0', 'quality_analysis', 'quality_score').cast(Integer))
+            ).filter(
+                CVE.poc_data.isnot(None),
+                func.json_extract_path_text(CVE.poc_data, '0', 'source') == 'github_poc'
+            ).scalar() or 0
+        except Exception as e:
+            logger.warning(f"Error calculating average quality: {e}")
+            avg_quality = 0
+        
+        return {
+            'github_poc_rules': github_poc_rules,
+            'cves_with_github_pocs': cves_with_github_pocs,
+            'quality_distribution': quality_distribution,
+            'average_quality_score': float(avg_quality) if avg_quality else 0,
+            'source': 'github_poc'
+        }
+    except Exception as e:
+        logger.error(f"Error getting GitHub PoC stats: {e}")
+        return {"error": str(e)}
+
+@app.get("/api/github-poc-status")
+async def get_github_poc_status(db: Session = Depends(get_db)):
+    """Get GitHub PoC data availability status"""
+    
+    try:
+        client = GitHubPoCClient(db)
+        
+        # Check if GitHub PoC data is available
+        github_poc_data = client.load_github_poc_data()
+        
+        return {
+            'github_poc_data_available': len(github_poc_data) > 0,
+            'total_cves_with_pocs': len(github_poc_data),
+            'sample_cve_ids': list(github_poc_data.keys())[:10],  # First 10 CVE IDs
+            'data_path': str(client.github_poc_path),
+            'path_exists': client.github_poc_path.exists()
+        }
+    except Exception as e:
+        logger.error(f"Error checking GitHub PoC status: {e}")
+        return {"error": str(e)}
+
@app.get("/api/bulk-jobs")
 async def get_bulk_jobs(limit: int = 10, db: Session = Depends(get_db)):
    """Get bulk processing job status"""
@ -1202,7 +1366,7 @@ async def get_poc_stats(db: Session = Depends(get_db)):

@app.post("/api/regenerate-rules")
 async def regenerate_sigma_rules(background_tasks: BackgroundTasks, 
-                                force: bool = False,
+                                request: RuleRegenRequest,
                                db: Session = Depends(get_db)):
    """Regenerate SIGMA rules using enhanced nomi-sec data"""
    
@ -1223,7 +1387,7 @@ async def regenerate_sigma_rules(background_tasks: BackgroundTasks,
                    SigmaRule.cve_id == cve.cve_id
                ).first()
                
-                if existing_rule and existing_rule.poc_source == 'nomi_sec' and not force:
+                if existing_rule and existing_rule.poc_source == 'nomi_sec' and not request.force:
                    continue
                
                # Generate enhanced rule
@ -1247,7 +1411,7 @@ async def regenerate_sigma_rules(background_tasks: BackgroundTasks,
    return {
        "message": "SIGMA rule regeneration started",
        "status": "started",
-        "force": force
+        "force": request.force
    }

@app.post("/api/cancel-job/{job_id}")
--- a/backend/mcdevitt_poc_client.py
+++ b/backend/mcdevitt_poc_client.py
@ -0,0 +1,593 @@
+"""
+GitHub PoC Collector Integration Client
+Reads JSON files from github_poc_collector and fetches GitHub repo contents for SIGMA rule generation
+"""
+
+import aiohttp
+import asyncio
+import json
+import logging
+import os
+from datetime import datetime
+from typing import Dict, List, Optional, Tuple
+from sqlalchemy.orm import Session
+from pathlib import Path
+import re
+import base64
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+class GitHubPoCClient:
+    """Client for processing GitHub PoC collector data and fetching GitHub contents"""
+    
+    def __init__(self, db_session: Session, github_token: Optional[str] = None):
+        self.db_session = db_session
+        self.github_token = github_token or os.getenv('GITHUB_TOKEN')
+        self.base_url = "https://api.github.com"
+        
+        # Rate limiting - GitHub API: 5000 requests/hour with token, 60 without
+        self.rate_limit_delay = 0.8 if self.github_token else 60.0  # seconds
+        self.last_request_time = 0
+        
+        # GitHub PoC collector path (mounted in Docker container)
+        self.github_poc_path = Path("/github_poc_collector/exploits")
+        
+        # Cache for repository contents
+        self.repo_cache = {}
+        self.cache_ttl = 1800  # 30 minutes
+    
+    def load_github_poc_data(self) -> Dict[str, List[dict]]:
+        """Load all PoC data from GitHub PoC collector JSON files"""
+        poc_data = {}
+        
+        if not self.github_poc_path.exists():
+            logger.error(f"GitHub PoC path not found: {self.github_poc_path}")
+            return poc_data
+        
+        # Walk through year directories
+        for year_dir in self.github_poc_path.iterdir():
+            if year_dir.is_dir():
+                for json_file in year_dir.glob("*.json"):
+                    try:
+                        cve_id = json_file.stem  # CVE-YYYY-NNNN
+                        with open(json_file, 'r') as f:
+                            repos = json.load(f)
+                        
+                        # Filter out repositories with no stars or very low quality
+                        filtered_repos = []
+                        for repo in repos:
+                            if isinstance(repo, dict) and repo.get('html_url'):
+                                # Basic quality filtering
+                                stars = repo.get('stargazers_count', 0)
+                                description = repo.get('description', '') or ''
+                                
+                                # Skip very low quality repos
+                                if stars > 0 or len(description) > 20:
+                                    filtered_repos.append(repo)
+                        
+                        if filtered_repos:
+                            poc_data[cve_id] = filtered_repos
+                            
+                    except Exception as e:
+                        logger.error(f"Error loading {json_file}: {e}")
+        
+        logger.info(f"Loaded PoC data for {len(poc_data)} CVEs")
+        return poc_data
+    
+    async def _make_github_request(self, session: aiohttp.ClientSession, 
+                                  url: str, params: dict = None) -> Optional[dict]:
+        """Make a rate-limited request to GitHub API"""
+        try:
+            # Rate limiting
+            current_time = asyncio.get_event_loop().time()
+            time_since_last = current_time - self.last_request_time
+            if time_since_last < self.rate_limit_delay:
+                await asyncio.sleep(self.rate_limit_delay - time_since_last)
+            
+            headers = {
+                'Accept': 'application/vnd.github.v3+json',
+                'User-Agent': 'Auto-SIGMA-Rule-Generator/1.0'
+            }
+            
+            if self.github_token:
+                headers['Authorization'] = f'token {self.github_token}'
+            
+            async with session.get(url, params=params, headers=headers, timeout=30) as response:
+                self.last_request_time = asyncio.get_event_loop().time()
+                
+                if response.status == 200:
+                    return await response.json()
+                elif response.status == 403:
+                    logger.warning(f"Rate limit exceeded for {url}")
+                    # Wait and retry once
+                    await asyncio.sleep(60)
+                    return await self._make_github_request(session, url, params)
+                else:
+                    logger.warning(f"GitHub API request failed: {response.status} for {url}")
+                    return None
+                    
+        except Exception as e:
+            logger.error(f"Error making GitHub request to {url}: {e}")
+            return None
+    
+    async def get_repository_contents(self, repo_url: str) -> Dict[str, any]:
+        """Fetch repository contents from GitHub API"""
+        # Extract owner/repo from URL
+        try:
+            # Parse GitHub URL: https://github.com/owner/repo
+            parts = repo_url.replace('https://github.com/', '').split('/')
+            if len(parts) < 2:
+                return {}
+            
+            owner, repo = parts[0], parts[1]
+            repo_key = f"{owner}/{repo}"
+            
+            # Check cache
+            if repo_key in self.repo_cache:
+                cached_data, timestamp = self.repo_cache[repo_key]
+                if (datetime.now().timestamp() - timestamp) < self.cache_ttl:
+                    return cached_data
+            
+            async with aiohttp.ClientSession() as session:
+                # Get repository metadata
+                repo_url_api = f"{self.base_url}/repos/{owner}/{repo}"
+                repo_data = await self._make_github_request(session, repo_url_api)
+                
+                if not repo_data:
+                    return {}
+                
+                # Get repository contents (files)
+                contents_url = f"{self.base_url}/repos/{owner}/{repo}/contents"
+                contents_data = await self._make_github_request(session, contents_url)
+                
+                # Get README content if available
+                readme_content = ""
+                if contents_data:
+                    readme_files = [f for f in contents_data 
+                                  if f.get('name', '').lower().startswith('readme')]
+                    if readme_files:
+                        readme_file = readme_files[0]
+                        readme_url = readme_file.get('download_url')
+                        if readme_url:
+                            try:
+                                async with session.get(readme_url) as readme_response:
+                                    if readme_response.status == 200:
+                                        readme_content = await readme_response.text()
+                            except Exception as e:
+                                logger.warning(f"Error fetching README: {e}")
+                
+                # Extract key files (potential exploit code)
+                key_files = []
+                if contents_data:
+                    for file_info in contents_data:
+                        if file_info.get('type') == 'file':
+                            file_name = file_info.get('name', '')
+                            file_size = file_info.get('size', 0)
+                            
+                            # Focus on code files that might contain exploits
+                            if (file_name.lower().endswith(('.py', '.sh', '.pl', '.rb', '.js', '.c', '.cpp', '.java', '.go', '.rs', '.php')) 
+                                and file_size < 50000):  # Skip very large files
+                                
+                                try:
+                                    file_content = await self._get_file_content(session, file_info.get('download_url'))
+                                    if file_content:
+                                        key_files.append({
+                                            'name': file_name,
+                                            'size': file_size,
+                                            'content': file_content[:10000]  # Truncate very long files
+                                        })
+                                except Exception as e:
+                                    logger.warning(f"Error fetching file {file_name}: {e}")
+                
+                result = {
+                    'repo_data': repo_data,
+                    'readme_content': readme_content,
+                    'key_files': key_files,
+                    'fetched_at': datetime.now().isoformat()
+                }
+                
+                # Cache the result
+                self.repo_cache[repo_key] = (result, datetime.now().timestamp())
+                
+                return result
+                
+        except Exception as e:
+            logger.error(f"Error fetching repository contents for {repo_url}: {e}")
+            return {}
+    
+    async def _get_file_content(self, session: aiohttp.ClientSession, download_url: str) -> Optional[str]:
+        """Fetch individual file content"""
+        try:
+            async with session.get(download_url, timeout=15) as response:
+                if response.status == 200:
+                    # Try to decode as text
+                    try:
+                        content = await response.text()
+                        return content
+                    except:
+                        # If text decoding fails, try binary
+                        content = await response.read()
+                        return content.decode('utf-8', errors='ignore')
+                return None
+        except Exception as e:
+            logger.warning(f"Error fetching file content: {e}")
+            return None
+    
+    def analyze_repository_for_indicators(self, repo_data: Dict[str, any]) -> Dict[str, any]:
+        """Analyze repository contents for exploit indicators"""
+        indicators = {
+            "processes": [],
+            "files": [],
+            "network": [],
+            "registry": [],
+            "commands": [],
+            "urls": [],
+            "techniques": [],
+            "cve_references": [],
+            "exploit_techniques": []
+        }
+        
+        # Combine all text content for analysis
+        text_sources = []
+        
+        # Add README content
+        if repo_data.get('readme_content'):
+            text_sources.append(repo_data['readme_content'])
+        
+        # Add repository description
+        if repo_data.get('repo_data', {}).get('description'):
+            text_sources.append(repo_data['repo_data']['description'])
+        
+        # Add key file contents
+        for file_info in repo_data.get('key_files', []):
+            text_sources.append(file_info.get('content', ''))
+        
+        full_text = " ".join(text_sources).lower()
+        
+        # Extract CVE references
+        cve_pattern = r'cve-\d{4}-\d{4,7}'
+        cve_matches = re.findall(cve_pattern, full_text, re.IGNORECASE)
+        indicators["cve_references"] = list(set(cve_matches))
+        
+        # Enhanced process patterns
+        process_patterns = [
+            r'\b(cmd\.exe|powershell\.exe|bash|sh|python\.exe|java\.exe|node\.exe)\b',
+            r'\b(createprocess|shellexecute|system|winexec|execve|fork|spawn)\b',
+            r'\b(reverse.?shell|bind.?shell|web.?shell|backdoor)\b',
+            r'\b(mshta\.exe|rundll32\.exe|regsvr32\.exe|wscript\.exe|cscript\.exe)\b',
+            r'\b(certutil\.exe|bitsadmin\.exe|schtasks\.exe|wmic\.exe)\b'
+        ]
+        
+        for pattern in process_patterns:
+            matches = re.findall(pattern, full_text, re.IGNORECASE)
+            indicators["processes"].extend(matches)
+        
+        # Enhanced file patterns
+        file_patterns = [
+            r'\b([a-zA-Z]:\\[^\\\s]+\\[^\\\s]+\.[a-zA-Z0-9]+)\b',  # Windows paths
+            r'\b(/[^/\s]+/[^/\s]+\.[a-zA-Z0-9]+)\b',  # Unix paths
+            r'\b(\w+\.(exe|dll|bat|ps1|py|sh|jar|php|jsp|asp|aspx|bin))\b',  # Executable files
+            r'\b(payload|exploit|shell|backdoor|trojan|malware)\b'  # Malicious indicators
+        ]
+        
+        for pattern in file_patterns:
+            matches = re.findall(pattern, full_text, re.IGNORECASE)
+            if matches and isinstance(matches[0], tuple):
+                indicators["files"].extend([m[0] for m in matches])
+            else:
+                indicators["files"].extend(matches)
+        
+        # Enhanced network patterns
+        network_patterns = [
+            r'\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b',  # IP addresses
+            r'\b(https?://[^\s<>"]+)\b',  # URLs
+            r'\b([a-zA-Z0-9-]+\.[a-zA-Z]{2,})\b',  # Domain names
+            r'\b(port|socket|connect|bind|listen)\s*[=:]\s*(\d+)\b'  # Port references
+        ]
+        
+        for pattern in network_patterns:
+            matches = re.findall(pattern, full_text, re.IGNORECASE)
+            if 'http' in pattern:
+                indicators["urls"].extend(matches)
+            else:
+                indicators["network"].extend([m if isinstance(m, str) else m[0] for m in matches])
+        
+        # Enhanced command patterns
+        command_patterns = [
+            r'\b(curl|wget|nc|netcat|ncat|telnet|ssh|scp|rsync)\b',
+            r'\b(whoami|id|uname|systeminfo|ipconfig|ifconfig|ps|top|netstat)\b',
+            r'\b(cat|type|more|less|head|tail|find|grep|awk|sed)\b',
+            r'\b(echo|print|printf|base64|decode|encode)\b',
+            r'\b(invoke|iex|downloadstring|powershell|cmd)\b',
+            r'\b(net\s+user|net\s+localgroup|net\s+share)\b',
+            r'\b(sc\s+create|sc\s+start|sc\s+stop|service)\b'
+        ]
+        
+        for pattern in command_patterns:
+            matches = re.findall(pattern, full_text, re.IGNORECASE)
+            indicators["commands"].extend(matches)
+        
+        # Registry patterns (Windows)
+        registry_patterns = [
+            r'\b(HKEY_[A-Z_]+)\b',
+            r'\b(HKLM|HKCU|HKCR|HKU|HKCC)\b',
+            r'\b(reg\s+add|reg\s+query|reg\s+delete|regedit)\b',
+            r'\b(SOFTWARE\\\\[^\\\s]+)\b',
+            r'\b(SYSTEM\\\\[^\\\s]+)\b'
+        ]
+        
+        for pattern in registry_patterns:
+            matches = re.findall(pattern, full_text, re.IGNORECASE)
+            indicators["registry"].extend(matches)
+        
+        # MITRE ATT&CK technique indicators
+        technique_patterns = [
+            r'\b(privilege.?escalation|lateral.?movement|persistence|evasion)\b',
+            r'\b(injection|hijack|bypass|overflow|buffer.?overflow)\b',
+            r'\b(credential.?dump|password.?spray|brute.?force)\b',
+            r'\b(remote.?code.?execution|arbitrary.?code|code.?injection)\b',
+            r'\b(dll.?injection|process.?hollow|process.?injection)\b'
+        ]
+        
+        for pattern in technique_patterns:
+            matches = re.findall(pattern, full_text, re.IGNORECASE)
+            indicators["techniques"].extend(matches)
+        
+        # Clean up and deduplicate all indicators
+        for key in indicators:
+            # Remove empty strings and duplicates
+            indicators[key] = list(set([
+                item.strip() for item in indicators[key] 
+                if item and len(str(item).strip()) > 2
+            ]))
+            # Limit to reasonable number of indicators
+            indicators[key] = indicators[key][:20]
+        
+        return indicators
+    
+    def calculate_quality_score(self, repo_info: dict, repo_contents: Dict[str, any]) -> Dict[str, any]:
+        """Calculate quality score for a repository"""
+        quality_score = 0
+        factors = {}
+        
+        # Star count factor (0-30 points)
+        stars = repo_info.get('stargazers_count', 0)
+        star_score = min(stars * 3, 30)  # 3 points per star, max 30
+        quality_score += star_score
+        factors["star_score"] = star_score
+        
+        # Fork count factor (0-20 points)
+        forks = repo_info.get('forks_count', 0)
+        fork_score = min(forks * 2, 20)  # 2 points per fork, max 20
+        quality_score += fork_score
+        factors["fork_score"] = fork_score
+        
+        # Recency factor (0-20 points)
+        try:
+            created_at = datetime.fromisoformat(repo_info.get('created_at', '').replace('Z', '+00:00'))
+            days_old = (datetime.now(created_at.tzinfo) - created_at).days
+            recency_score = max(20 - (days_old // 30), 0)  # Lose 1 point per month
+            quality_score += recency_score
+            factors["recency_score"] = recency_score
+        except:
+            factors["recency_score"] = 0
+        
+        # Description quality factor (0-15 points)
+        description = repo_info.get('description', '') or ''
+        desc_score = min(len(description) // 10, 15)  # 1 point per 10 chars, max 15
+        quality_score += desc_score
+        factors["description_score"] = desc_score
+        
+        # README quality factor (0-15 points)
+        readme_content = repo_contents.get('readme_content', '')
+        readme_score = min(len(readme_content) // 50, 15)  # 1 point per 50 chars, max 15
+        quality_score += readme_score
+        factors["readme_score"] = readme_score
+        
+        return {
+            "quality_score": quality_score,
+            "factors": factors,
+            "quality_tier": self._get_quality_tier(quality_score)
+        }
+    
+    def _get_quality_tier(self, score: int) -> str:
+        """Get quality tier based on score"""
+        if score >= 80:
+            return "excellent"
+        elif score >= 60:
+            return "good"
+        elif score >= 40:
+            return "fair"
+        elif score >= 20:
+            return "poor"
+        else:
+            return "very_poor"
+    
+    async def sync_cve_pocs(self, cve_id: str) -> dict:
+        """Synchronize PoC data for a specific CVE using GitHub PoC data"""
+        from main import CVE, SigmaRule
+        
+        # Get existing CVE
+        cve = self.db_session.query(CVE).filter(CVE.cve_id == cve_id).first()
+        if not cve:
+            logger.warning(f"CVE {cve_id} not found in database")
+            return {"error": "CVE not found"}
+        
+        # Load GitHub PoC data
+        github_poc_data = self.load_github_poc_data()
+        
+        if cve_id not in github_poc_data:
+            logger.info(f"No PoCs found for {cve_id} in GitHub PoC data")
+            return {"cve_id": cve_id, "pocs_found": 0}
+        
+        repos = github_poc_data[cve_id]
+        
+        # Process each repository
+        poc_data = []
+        github_repos = []
+        total_quality_score = 0
+        
+        for repo_info in repos:
+            try:
+                # Fetch repository contents
+                repo_contents = await self.get_repository_contents(repo_info['html_url'])
+                
+                # Analyze for indicators
+                indicators = self.analyze_repository_for_indicators(repo_contents)
+                
+                # Calculate quality score
+                quality_analysis = self.calculate_quality_score(repo_info, repo_contents)
+                
+                poc_entry = {
+                    "id": repo_info.get('name', ''),
+                    "name": repo_info.get('name', ''),
+                    "full_name": repo_info.get('full_name', ''),
+                    "html_url": repo_info.get('html_url', ''),
+                    "description": repo_info.get('description', ''),
+                    "stargazers_count": repo_info.get('stargazers_count', 0),
+                    "forks_count": repo_info.get('forks_count', 0),
+                    "created_at": repo_info.get('created_at', ''),
+                    "quality_analysis": quality_analysis,
+                    "exploit_indicators": indicators,
+                    "source": "mcdevitt_github"
+                }
+                
+                poc_data.append(poc_entry)
+                github_repos.append(repo_info.get('html_url', ''))
+                total_quality_score += quality_analysis["quality_score"]
+                
+            except Exception as e:
+                logger.error(f"Error processing repo {repo_info.get('html_url', '')}: {e}")
+                continue
+        
+        # Update CVE with PoC data
+        cve.poc_count = len(poc_data)
+        cve.poc_data = poc_data
+        cve.updated_at = datetime.utcnow()
+        
+        # Update or create SIGMA rule with enhanced PoC data
+        sigma_rule = self.db_session.query(SigmaRule).filter(
+            SigmaRule.cve_id == cve_id
+        ).first()
+        
+        if sigma_rule:
+            sigma_rule.poc_source = 'github_poc'
+            sigma_rule.poc_quality_score = total_quality_score // len(poc_data) if poc_data else 0
+            sigma_rule.nomi_sec_data = {
+                "total_pocs": len(poc_data),
+                "average_quality": total_quality_score // len(poc_data) if poc_data else 0,
+                "best_poc": max(poc_data, key=lambda x: x["quality_analysis"]["quality_score"]) if poc_data else None,
+                "total_stars": sum(p["stargazers_count"] for p in poc_data),
+                "source": "github_poc"
+            }
+            sigma_rule.github_repos = github_repos
+            sigma_rule.updated_at = datetime.utcnow()
+            
+            # Extract best exploit indicators
+            best_indicators = {}
+            for poc in poc_data:
+                for key, values in poc["exploit_indicators"].items():
+                    if key not in best_indicators:
+                        best_indicators[key] = []
+                    best_indicators[key].extend(values)
+            
+            # Deduplicate and store
+            for key in best_indicators:
+                best_indicators[key] = list(set(best_indicators[key]))
+            
+            sigma_rule.exploit_indicators = json.dumps(best_indicators)
+        
+        self.db_session.commit()
+        
+        logger.info(f"Synchronized {len(poc_data)} PoCs for {cve_id}")
+        
+        return {
+            "cve_id": cve_id,
+            "pocs_found": len(poc_data),
+            "total_quality_score": total_quality_score,
+            "average_quality": total_quality_score // len(poc_data) if poc_data else 0,
+            "github_repos": github_repos,
+            "source": "github_poc"
+        }
+    
+    async def bulk_sync_all_cves(self, batch_size: int = 50) -> dict:
+        """Bulk synchronize all CVEs with GitHub PoC data"""
+        from main import CVE, BulkProcessingJob
+        
+        # Load all GitHub PoC data first
+        github_poc_data = self.load_github_poc_data()
+        
+        if not github_poc_data:
+            return {"error": "No GitHub PoC data found"}
+        
+        # Create bulk processing job
+        job = BulkProcessingJob(
+            job_type='github_poc_sync',
+            status='running',
+            started_at=datetime.utcnow(),
+            total_items=len(github_poc_data),
+            job_metadata={'batch_size': batch_size}
+        )
+        self.db_session.add(job)
+        self.db_session.commit()
+        
+        total_processed = 0
+        total_found = 0
+        results = []
+        
+        try:
+            # Process each CVE that has PoC data
+            cve_ids = list(github_poc_data.keys())
+            
+            for i in range(0, len(cve_ids), batch_size):
+                batch = cve_ids[i:i + batch_size]
+                
+                for cve_id in batch:
+                    try:
+                        result = await self.sync_cve_pocs(cve_id)
+                        total_processed += 1
+                        
+                        if result.get("pocs_found", 0) > 0:
+                            total_found += result["pocs_found"]
+                            results.append(result)
+                        
+                        job.processed_items += 1
+                        
+                        # Small delay to avoid overwhelming GitHub API
+                        await asyncio.sleep(1)
+                        
+                    except Exception as e:
+                        logger.error(f"Error syncing PoCs for {cve_id}: {e}")
+                        job.failed_items += 1
+                
+                # Commit after each batch
+                self.db_session.commit()
+                logger.info(f"Processed batch {i//batch_size + 1}/{(len(cve_ids) + batch_size - 1)//batch_size}")
+            
+            # Update job status
+            job.status = 'completed'
+            job.completed_at = datetime.utcnow()
+            job.job_metadata.update({
+                'total_processed': total_processed,
+                'total_pocs_found': total_found,
+                'cves_with_pocs': len(results)
+            })
+            
+        except Exception as e:
+            job.status = 'failed'
+            job.error_message = str(e)
+            job.completed_at = datetime.utcnow()
+            logger.error(f"Bulk McDevitt sync job failed: {e}")
+            
+        finally:
+            self.db_session.commit()
+        
+        return {
+            'job_id': str(job.id),
+            'status': job.status,
+            'total_processed': total_processed,
+            'total_pocs_found': total_found,
+            'cves_with_pocs': len(results)
+        }
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -14,3 +14,4 @@ beautifulsoup4==4.12.2
 lxml==4.9.3
 aiohttp==3.9.1
 aiofiles
+pyyaml==6.0.1
--- a/backend/templates/README.md
+++ b/backend/templates/README.md
@ -0,0 +1,128 @@
+# SIGMA Rule Templates
+
+This directory contains YAML template files for generating SIGMA rules automatically based on CVE and PoC data.
+
+## Template Structure
+
+Each template file is a YAML file with the following structure:
+
+```yaml
+template_name: "Template Name"
+description: "Description of what this template detects"
+applicable_product_patterns:
+  - "pattern1"
+  - "pattern2"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  # ... SIGMA rule content with placeholders
+```
+
+## Template Fields
+
+- **template_name**: Human-readable name for the template
+- **description**: What the template is designed to detect
+- **applicable_product_patterns**: List of keywords that help match CVEs to this template
+- **template_content**: The actual SIGMA rule template with placeholders
+
+## Placeholders
+
+Templates use the following placeholders that get replaced during rule generation:
+
+- `{{TITLE}}`: Rule title based on CVE
+- `{{RULE_ID}}`: Unique rule identifier
+- `{{DESCRIPTION}}`: CVE description and context
+- `{{DATE}}`: Rule creation date
+- `{{REFERENCES}}`: CVE references and PoC URLs
+- `{{TAGS}}`: Relevant tags based on CVE data
+- `{{LEVEL}}`: Severity level (low, medium, high, critical)
+- `{{PROCESSES}}`: Process names from PoC analysis
+- `{{COMMANDS}}`: Command patterns from PoC analysis
+- `{{FILES}}`: File paths from PoC analysis
+- `{{NETWORK}}`: Network indicators from PoC analysis
+- `{{URLS}}`: URL patterns from PoC analysis
+- `{{REGISTRY}}`: Registry keys from PoC analysis
+
+## Available Templates
+
+### Core Categories
+1. **process_execution.yaml** - Process creation and execution
+2. **network_connection.yaml** - Network connections and communications
+3. **file_system_activity.yaml** - File system operations
+4. **registry_modification.yaml** - Windows registry changes
+
+### Attack Techniques
+5. **powershell_execution.yaml** - PowerShell-based attacks
+6. **web_application_attack.yaml** - Web application vulnerabilities
+7. **command_injection.yaml** - Command injection attacks
+8. **privilege_escalation.yaml** - Privilege escalation attempts
+9. **credential_access.yaml** - Credential theft and access
+10. **persistence.yaml** - Persistence mechanisms
+11. **lateral_movement.yaml** - Lateral movement techniques
+12. **service_manipulation.yaml** - Windows service manipulation
+
+## Usage
+
+### Initialize Templates
+```bash
+# Load all templates into the database
+docker-compose exec backend python initialize_templates.py
+
+# List available templates
+docker-compose exec backend python initialize_templates.py list
+```
+
+### Adding New Templates
+
+1. Create a new YAML file in this directory
+2. Follow the template structure above
+3. Choose appropriate `applicable_product_patterns` keywords
+4. Use relevant placeholders in your `template_content`
+5. Run the initialization script to load into database
+
+### Template Matching
+
+Templates are automatically matched to CVEs based on:
+- Product patterns matching CVE affected products
+- PoC analysis indicators
+- CVE description keywords
+
+## Best Practices
+
+1. **Specific Patterns**: Use specific product patterns for better matching
+2. **False Positives**: Include realistic false positive scenarios
+3. **Conditions**: Use appropriate SIGMA detection conditions
+4. **Log Sources**: Match log sources to the type of activity being detected
+5. **Severity**: Use appropriate severity levels based on the attack impact
+
+## Example Template
+
+```yaml
+template_name: "Example Detection"
+description: "Detects example malicious activity"
+applicable_product_patterns:
+  - "example"
+  - "software"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: process_creation
+      product: windows
+  detection:
+      selection:
+          Image|endswith:
+  {{PROCESSES}}
+      condition: selection
+  falsepositives:
+      - Legitimate use cases
+  level: {{LEVEL}}
+```
--- a/backend/templates/command_injection.yaml
+++ b/backend/templates/command_injection.yaml
@ -0,0 +1,44 @@
+template_name: "Command Injection Detection"
+description: "Detects command injection attempts based on PoC exploit indicators"
+applicable_product_patterns:
+  - "injection"
+  - "command"
+  - "shell"
+  - "exec"
+  - "system"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: process_creation
+      product: windows
+  detection:
+      selection_shell:
+          Image|endswith:
+              - '\\cmd.exe'
+              - '\\powershell.exe'
+              - '\\bash.exe'
+              - '\\sh.exe'
+      selection_injection:
+          CommandLine|contains:
+  {{COMMANDS}}
+      selection_patterns:
+          CommandLine|contains:
+              - '&'
+              - '|'
+              - ';'
+              - '`'
+              - '$('
+      condition: (selection_shell and selection_injection) or selection_patterns
+  falsepositives:
+      - Legitimate command line usage
+      - System administration scripts
+  level: {{LEVEL}}
--- a/backend/templates/credential_access.yaml
+++ b/backend/templates/credential_access.yaml
@ -0,0 +1,39 @@
+template_name: "Credential Access Detection"
+description: "Detects credential access attempts based on PoC exploit indicators"
+applicable_product_patterns:
+  - "credential"
+  - "password"
+  - "hash"
+  - "dump"
+  - "lsass"
+  - "mimikatz"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: process_creation
+      product: windows
+  detection:
+      selection_lsass:
+          Image|contains:
+              - 'lsass'
+              - 'mimikatz'
+      selection_creds:
+          CommandLine|contains:
+  {{COMMANDS}}
+      selection_files:
+          TargetFilename|contains:
+  {{FILES}}
+      condition: selection_lsass or selection_creds or selection_files
+  falsepositives:
+      - Legitimate authentication processes
+      - Password management software
+  level: {{LEVEL}}
--- a/backend/templates/file_system_activity.yaml
+++ b/backend/templates/file_system_activity.yaml
@ -0,0 +1,30 @@
+template_name: "File System Activity Detection"
+description: "Detects suspicious file system activity based on PoC exploit indicators"
+applicable_product_patterns:
+  - "file"
+  - "filesystem"
+  - "upload"
+  - "download"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: file_event
+      product: windows
+  detection:
+      selection:
+          TargetFilename|contains:
+  {{FILES}}
+      condition: selection
+  falsepositives:
+      - Legitimate file operations
+      - Software installations
+  level: {{LEVEL}}
--- a/backend/templates/lateral_movement.yaml
+++ b/backend/templates/lateral_movement.yaml
@ -0,0 +1,42 @@
+template_name: "Lateral Movement Detection"
+description: "Detects lateral movement attempts based on PoC exploit indicators"
+applicable_product_patterns:
+  - "lateral"
+  - "movement"
+  - "remote"
+  - "psexec"
+  - "wmic"
+  - "rdp"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: process_creation
+      product: windows
+  detection:
+      selection_psexec:
+          Image|endswith: '\\psexec.exe'
+          CommandLine|contains:
+  {{COMMANDS}}
+      selection_wmic:
+          Image|endswith: '\\wmic.exe'
+          CommandLine|contains:
+              - 'process'
+              - 'call'
+              - 'create'
+      selection_remote:
+          CommandLine|contains:
+  {{NETWORK}}
+      condition: selection_psexec or selection_wmic or selection_remote
+  falsepositives:
+      - Legitimate remote administration
+      - System management tools
+  level: {{LEVEL}}
--- a/backend/templates/network_connection.yaml
+++ b/backend/templates/network_connection.yaml
@ -0,0 +1,36 @@
+template_name: "Network Connection Detection"
+description: "Detects suspicious network connections based on PoC exploit indicators"
+applicable_product_patterns:
+  - "network"
+  - "web"
+  - "http"
+  - "https"
+  - "tcp"
+  - "udp"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: network_connection
+      product: windows
+  detection:
+      selection:
+          Initiated: true
+          DestinationIp:
+  {{NETWORK}}
+      selection_url:
+          DestinationHostname|contains:
+  {{URLS}}
+      condition: selection or selection_url
+  falsepositives:
+      - Legitimate network connections
+      - Software updates
+  level: {{LEVEL}}
--- a/backend/templates/persistence.yaml
+++ b/backend/templates/persistence.yaml
@ -0,0 +1,40 @@
+template_name: "Persistence Detection"
+description: "Detects persistence mechanisms based on PoC exploit indicators"
+applicable_product_patterns:
+  - "persistence"
+  - "startup"
+  - "autorun"
+  - "scheduled"
+  - "task"
+  - "cron"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: process_creation
+      product: windows
+  detection:
+      selection_schtasks:
+          Image|endswith: '\\schtasks.exe'
+          CommandLine|contains:
+  {{COMMANDS}}
+      selection_startup:
+          TargetFilename|contains:
+              - '\\Startup\\'
+              - '\\Start Menu\\'
+      selection_registry:
+          TargetObject|contains:
+  {{REGISTRY}}
+      condition: selection_schtasks or selection_startup or selection_registry
+  falsepositives:
+      - Legitimate software installations
+      - System configuration changes
+  level: {{LEVEL}}
--- a/backend/templates/powershell_execution.yaml
+++ b/backend/templates/powershell_execution.yaml
@ -0,0 +1,32 @@
+template_name: "PowerShell Execution Detection"
+description: "Detects suspicious PowerShell execution based on PoC exploit indicators"
+applicable_product_patterns:
+  - "powershell"
+  - "windows"
+  - "microsoft"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: process_creation
+      product: windows
+  detection:
+      selection:
+          Image|endswith:
+              - '\\powershell.exe'
+              - '\\pwsh.exe'
+          CommandLine|contains:
+  {{COMMANDS}}
+      condition: selection
+  falsepositives:
+      - Legitimate PowerShell scripts
+      - System administration
+  level: {{LEVEL}}
--- a/backend/templates/privilege_escalation.yaml
+++ b/backend/templates/privilege_escalation.yaml
@ -0,0 +1,41 @@
+template_name: "Privilege Escalation Detection"
+description: "Detects privilege escalation attempts based on PoC exploit indicators"
+applicable_product_patterns:
+  - "privilege"
+  - "escalation"
+  - "admin"
+  - "root"
+  - "sudo"
+  - "runas"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: process_creation
+      product: windows
+  detection:
+      selection_runas:
+          Image|endswith: '\\runas.exe'
+          CommandLine|contains:
+  {{COMMANDS}}
+      selection_whoami:
+          Image|endswith: '\\whoami.exe'
+          CommandLine|contains:
+              - '/priv'
+              - '/groups'
+      selection_exploit:
+          CommandLine|contains:
+  {{PROCESSES}}
+      condition: selection_runas or selection_whoami or selection_exploit
+  falsepositives:
+      - Legitimate administrative tasks
+      - System diagnostics
+  level: {{LEVEL}}
--- a/backend/templates/process_execution.yaml
+++ b/backend/templates/process_execution.yaml
@ -0,0 +1,34 @@
+template_name: "Process Execution Detection"
+description: "Detects suspicious process execution based on PoC exploit indicators"
+applicable_product_patterns:
+  - "windows"
+  - "microsoft"
+  - "office"
+  - "exchange"
+  - "sharepoint"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: process_creation
+      product: windows
+  detection:
+      selection:
+          Image|endswith:
+  {{PROCESSES}}
+      selection_cmd:
+          CommandLine|contains:
+  {{COMMANDS}}
+      condition: selection or selection_cmd
+  falsepositives:
+      - Legitimate software installations
+      - System administration tasks
+  level: {{LEVEL}}
--- a/backend/templates/registry_modification.yaml
+++ b/backend/templates/registry_modification.yaml
@ -0,0 +1,29 @@
+template_name: "Registry Modification Detection"
+description: "Detects suspicious registry modifications based on PoC exploit indicators"
+applicable_product_patterns:
+  - "registry"
+  - "windows"
+  - "microsoft"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: registry_event
+      product: windows
+  detection:
+      selection:
+          TargetObject|contains:
+  {{REGISTRY}}
+      condition: selection
+  falsepositives:
+      - Legitimate software configuration changes
+      - System updates
+  level: {{LEVEL}}
--- a/backend/templates/service_manipulation.yaml
+++ b/backend/templates/service_manipulation.yaml
@ -0,0 +1,42 @@
+template_name: "Service Manipulation Detection"
+description: "Detects suspicious Windows service manipulation based on PoC exploit indicators"
+applicable_product_patterns:
+  - "service"
+  - "windows"
+  - "microsoft"
+  - "sc.exe"
+  - "net.exe"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: process_creation
+      product: windows
+  detection:
+      selection_sc:
+          Image|endswith: '\\sc.exe'
+          CommandLine|contains:
+  {{COMMANDS}}
+      selection_net:
+          Image|endswith: '\\net.exe'
+          CommandLine|contains:
+              - 'start'
+              - 'stop'
+              - 'pause'
+              - 'continue'
+      selection_service:
+          CommandLine|contains:
+  {{PROCESSES}}
+      condition: selection_sc or selection_net or selection_service
+  falsepositives:
+      - Legitimate system administration
+      - Software installations
+  level: {{LEVEL}}
--- a/backend/templates/web_application_attack.yaml
+++ b/backend/templates/web_application_attack.yaml
@ -0,0 +1,33 @@
+template_name: "Web Application Attack Detection"
+description: "Detects web application attacks based on PoC exploit indicators"
+applicable_product_patterns:
+  - "web"
+  - "http"
+  - "apache"
+  - "nginx"
+  - "iis"
+template_content: |
+  title: {{TITLE}}
+  id: {{RULE_ID}}
+  status: experimental
+  description: {{DESCRIPTION}}
+  author: CVE-SIGMA Auto Generator
+  date: {{DATE}}
+  references:
+  {{REFERENCES}}
+  tags:
+  {{TAGS}}
+  logsource:
+      category: webserver
+  detection:
+      selection:
+          cs-uri-query|contains:
+  {{URLS}}
+      selection_user_agent:
+          cs-user-agent|contains:
+  {{COMMANDS}}
+      condition: selection or selection_user_agent
+  falsepositives:
+      - Legitimate web application usage
+      - Security scanners
+  level: {{LEVEL}}
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -29,6 +29,7 @@ services:
        condition: service_healthy
    volumes:
      - ./backend:/app
+      - ./github_poc_collector:/github_poc_collector
    command: uvicorn main:app --host 0.0.0.0 --port 8000 --reload

  frontend:
--- a/frontend/src/App.js
+++ b/frontend/src/App.js
@ -18,7 +18,9 @@ function App() {
  const [bulkJobs, setBulkJobs] = useState([]);
  const [bulkStatus, setBulkStatus] = useState({});
  const [pocStats, setPocStats] = useState({});
+  const [gitHubPocStats, setGitHubPocStats] = useState({});
  const [bulkProcessing, setBulkProcessing] = useState(false);
+  const [hasRunningJobs, setHasRunningJobs] = useState(false);

  useEffect(() => {
    fetchData();
@ -27,13 +29,14 @@ function App() {
  const fetchData = async () => {
    try {
      setLoading(true);
-      const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes] = await Promise.all([
+      const [cvesRes, rulesRes, statsRes, bulkJobsRes, bulkStatusRes, pocStatsRes, githubPocStatsRes] = await Promise.all([
        axios.get(`${API_BASE_URL}/api/cves`),
        axios.get(`${API_BASE_URL}/api/sigma-rules`),
        axios.get(`${API_BASE_URL}/api/stats`),
        axios.get(`${API_BASE_URL}/api/bulk-jobs`),
        axios.get(`${API_BASE_URL}/api/bulk-status`),
-        axios.get(`${API_BASE_URL}/api/poc-stats`)
+        axios.get(`${API_BASE_URL}/api/poc-stats`),
+        axios.get(`${API_BASE_URL}/api/github-poc-stats`).catch(err => ({ data: {} }))
      ]);
      
      setCves(cvesRes.data);
@ -42,6 +45,11 @@ function App() {
      setBulkJobs(bulkJobsRes.data);
      setBulkStatus(bulkStatusRes.data);
      setPocStats(pocStatsRes.data);
+      setGitHubPocStats(githubPocStatsRes.data);
+      
+      // Update running jobs state
+      const runningJobs = bulkJobsRes.data.filter(job => job.status === 'running' || job.status === 'pending');
+      setHasRunningJobs(runningJobs.length > 0);
    } catch (error) {
      console.error('Error fetching data:', error);
    } finally {
@ -99,68 +107,62 @@ function App() {

  const startBulkSeed = async (startYear = 2020, endYear = null) => {
    try {
-      setBulkProcessing(true);
      const response = await axios.post(`${API_BASE_URL}/api/bulk-seed`, {
        start_year: startYear,
-        end_year: endYear
+        end_year: endYear,
+        skip_nomi_sec: true
      });
      console.log('Bulk seed response:', response.data);
-      // Refresh data after starting
-      setTimeout(() => {
-        fetchData();
-      }, 2000);
+      // Refresh data immediately to show job started
+      fetchData();
    } catch (error) {
      console.error('Error starting bulk seed:', error);
-      setBulkProcessing(false);
    }
  };

  const startIncrementalUpdate = async () => {
    try {
-      setBulkProcessing(true);
      const response = await axios.post(`${API_BASE_URL}/api/incremental-update`);
      console.log('Incremental update response:', response.data);
-      setTimeout(() => {
-        fetchData();
-        setBulkProcessing(false);
-      }, 2000);
+      fetchData();
    } catch (error) {
      console.error('Error starting incremental update:', error);
-      setBulkProcessing(false);
    }
  };

  const syncNomiSec = async (cveId = null) => {
    try {
-      setBulkProcessing(true);
      const response = await axios.post(`${API_BASE_URL}/api/sync-nomi-sec`, {
        cve_id: cveId
      });
      console.log('Nomi-sec sync response:', response.data);
-      setTimeout(() => {
-        fetchData();
-        setBulkProcessing(false);
-      }, 2000);
+      fetchData();
    } catch (error) {
      console.error('Error syncing nomi-sec:', error);
-      setBulkProcessing(false);
+    }
+  };
+
+  const syncGitHubPocs = async (cveId = null) => {
+    try {
+      const response = await axios.post(`${API_BASE_URL}/api/sync-github-pocs`, {
+        cve_id: cveId
+      });
+      console.log('GitHub PoC sync response:', response.data);
+      fetchData();
+    } catch (error) {
+      console.error('Error syncing GitHub PoCs:', error);
    }
  };

  const regenerateRules = async (force = false) => {
    try {
-      setBulkProcessing(true);
      const response = await axios.post(`${API_BASE_URL}/api/regenerate-rules`, {
        force: force
      });
      console.log('Rule regeneration response:', response.data);
-      setTimeout(() => {
-        fetchData();
-        setBulkProcessing(false);
-      }, 2000);
+      fetchData();
    } catch (error) {
      console.error('Error regenerating rules:', error);
-      setBulkProcessing(false);
    }
  };

@ -194,11 +196,13 @@ function App() {
          <h3 className="text-lg font-medium text-gray-900">SIGMA Rules</h3>
          <p className="text-3xl font-bold text-green-600">{stats.total_sigma_rules || 0}</p>
          <p className="text-sm text-gray-500">Nomi-sec: {stats.nomi_sec_rules || 0}</p>
+          <p className="text-sm text-gray-500">GitHub PoCs: {gitHubPocStats.github_poc_rules || 0}</p>
        </div>
        <div className="bg-white p-6 rounded-lg shadow">
          <h3 className="text-lg font-medium text-gray-900">CVEs with PoCs</h3>
          <p className="text-3xl font-bold text-purple-600">{stats.cves_with_pocs || 0}</p>
          <p className="text-sm text-gray-500">{(stats.poc_coverage || 0).toFixed(1)}% coverage</p>
+          <p className="text-sm text-gray-500">GitHub PoCs: {gitHubPocStats.cves_with_github_pocs || 0}</p>
        </div>
        <div className="bg-white p-6 rounded-lg shadow">
          <h3 className="text-lg font-medium text-gray-900">Recent CVEs (7d)</h3>
@ -208,6 +212,7 @@ function App() {
          <h3 className="text-lg font-medium text-gray-900">High Quality PoCs</h3>
          <p className="text-3xl font-bold text-indigo-600">{pocStats.high_quality_cves || 0}</p>
          <p className="text-sm text-gray-500">Avg: {(pocStats.avg_poc_count || 0).toFixed(1)}</p>
+          <p className="text-sm text-gray-500">GitHub PoCs Avg: {(gitHubPocStats.average_quality_score || 0).toFixed(1)}</p>
        </div>
      </div>

@ -217,47 +222,58 @@ function App() {
        <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
          <button
            onClick={() => startBulkSeed(2020)}
-            disabled={bulkProcessing}
+            disabled={hasRunningJobs}
            className={`px-4 py-2 rounded-md text-white ${
-              bulkProcessing 
+              hasRunningJobs 
                ? 'bg-gray-400 cursor-not-allowed' 
                : 'bg-blue-600 hover:bg-blue-700'
            }`}
          >
-            {bulkProcessing ? 'Processing...' : 'Bulk Seed (2020+)'}
+            {hasRunningJobs ? 'Processing...' : 'Sync NVD CVEs'}
          </button>
          <button
            onClick={startIncrementalUpdate}
-            disabled={bulkProcessing}
+            disabled={hasRunningJobs}
            className={`px-4 py-2 rounded-md text-white ${
-              bulkProcessing 
+              hasRunningJobs 
                ? 'bg-gray-400 cursor-not-allowed' 
                : 'bg-green-600 hover:bg-green-700'
            }`}
          >
-            {bulkProcessing ? 'Processing...' : 'Incremental Update'}
+            {hasRunningJobs ? 'Processing...' : 'Incremental Update'}
          </button>
          <button
            onClick={() => syncNomiSec()}
-            disabled={bulkProcessing}
+            disabled={hasRunningJobs}
            className={`px-4 py-2 rounded-md text-white ${
-              bulkProcessing 
+              hasRunningJobs 
                ? 'bg-gray-400 cursor-not-allowed' 
                : 'bg-purple-600 hover:bg-purple-700'
            }`}
          >
-            {bulkProcessing ? 'Processing...' : 'Sync nomi-sec PoCs'}
+            {hasRunningJobs ? 'Processing...' : 'Sync nomi-sec PoCs'}
+          </button>
+          <button
+            onClick={() => syncGitHubPocs()}
+            disabled={hasRunningJobs}
+            className={`px-4 py-2 rounded-md text-white ${
+              hasRunningJobs 
+                ? 'bg-gray-400 cursor-not-allowed' 
+                : 'bg-green-600 hover:bg-green-700'
+            }`}
+          >
+            {hasRunningJobs ? 'Processing...' : 'Sync GitHub PoCs'}
          </button>
          <button
            onClick={() => regenerateRules()}
-            disabled={bulkProcessing}
+            disabled={hasRunningJobs}
            className={`px-4 py-2 rounded-md text-white ${
-              bulkProcessing 
+              hasRunningJobs 
                ? 'bg-gray-400 cursor-not-allowed' 
                : 'bg-indigo-600 hover:bg-indigo-700'
            }`}
          >
-            {bulkProcessing ? 'Processing...' : 'Regenerate Rules'}
+            {hasRunningJobs ? 'Processing...' : 'Regenerate Rules'}
          </button>
        </div>
      </div>
--- a/1
+++ b/1
@ -0,0 +1 @@
+Subproject commit 5c171fb9a97e214ddcdea82054c97e09438504c3
				`@ -0,0 +1 @@`
				`Subproject commit 5c171fb9a97e214ddcdea82054c97e09438504c3`