""" Enhanced SIGMA Rule Generator Generates improved SIGMA rules using a hybrid approach: 1. Generate YAML metadata with application code 2. Use LLM to create logsource and detection sections based on PoC analysis """ import json import logging from datetime import datetime from typing import Dict, List, Optional, Tuple from sqlalchemy.orm import Session import re from llm_client import LLMClient from enhanced_llm_client import EnhancedLLMClient from yaml_metadata_generator import YAMLMetadataGenerator from cve2capec_client import CVE2CAPECClient from poc_analyzer import PoCAnalyzer # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class EnhancedSigmaGenerator: """Enhanced SIGMA rule generator using nomi-sec PoC data""" def __init__(self, db_session: Session, llm_provider: str = None, llm_model: str = None): self.db_session = db_session self.llm_client = LLMClient(provider=llm_provider, model=llm_model) # Keep for backward compatibility self.enhanced_llm_client = EnhancedLLMClient(provider=llm_provider, model=llm_model) self.yaml_generator = YAMLMetadataGenerator(db_session) self.cve2capec_client = CVE2CAPECClient() self.poc_analyzer = PoCAnalyzer() async def generate_enhanced_rule(self, cve, use_llm: bool = True, use_hybrid: bool = True) -> dict: """Generate enhanced SIGMA rule for a CVE using PoC data""" from main import SigmaRule, RuleTemplate try: # Get PoC data poc_data = cve.poc_data or [] # Find the best quality PoC best_poc = None if poc_data: best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0)) # Try hybrid approach first if enabled and available rule_content = None generation_method = "template" template = None if use_hybrid and self.enhanced_llm_client.is_available() and best_poc: logger.info(f"Attempting hybrid rule generation for {cve.cve_id} using {self.enhanced_llm_client.provider}") rule_content = await self._generate_hybrid_rule(cve, best_poc, poc_data) if rule_content: generation_method = f"hybrid_{self.enhanced_llm_client.provider}" # Create a dummy template object for hybrid-generated rules class HybridTemplate: def __init__(self, provider_name): self.template_name = f"Hybrid Generated ({provider_name})" template = HybridTemplate(self.enhanced_llm_client.provider) # Fallback to original LLM-enhanced generation elif use_llm and self.llm_client.is_available() and best_poc: logger.info(f"Attempting LLM-enhanced rule generation for {cve.cve_id} using {self.llm_client.provider}") rule_content = await self._generate_llm_enhanced_rule(cve, best_poc, poc_data) if rule_content: generation_method = f"llm_{self.llm_client.provider}" # Create a dummy template object for LLM-generated rules class LLMTemplate: def __init__(self, provider_name): self.template_name = f"LLM Generated ({provider_name})" template = LLMTemplate(self.llm_client.provider) # Fallback to template-based generation if not rule_content: logger.info(f"Using template-based rule generation for {cve.cve_id}") # Select appropriate template based on PoC analysis template = await self._select_template(cve, best_poc) if not template: logger.warning(f"No suitable template found for {cve.cve_id}") return {'success': False, 'error': 'No suitable template'} # Generate rule content rule_content = await self._generate_rule_content(cve, template, poc_data) # Calculate confidence level confidence_level = self._calculate_confidence_level(cve, poc_data) # Store or update SIGMA rule existing_rule = self.db_session.query(SigmaRule).filter( SigmaRule.cve_id == cve.cve_id ).first() rule_data = { 'cve_id': cve.cve_id, 'rule_name': f"{cve.cve_id} Enhanced Detection", 'rule_content': rule_content, 'detection_type': f"{generation_method}_generated", 'log_source': self._extract_log_source_from_content(rule_content), 'confidence_level': confidence_level, 'auto_generated': True, 'exploit_based': len(poc_data) > 0, 'poc_source': getattr(cve, 'poc_source', 'nomi_sec'), 'poc_quality_score': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0, 'nomi_sec_data': { 'total_pocs': len(poc_data), 'best_poc_quality': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0, 'total_stars': sum(p.get('stargazers_count', 0) for p in poc_data), 'avg_stars': sum(p.get('stargazers_count', 0) for p in poc_data) / len(poc_data) if poc_data else 0, 'source': getattr(cve, 'poc_source', 'nomi_sec'), 'generation_method': generation_method }, 'github_repos': [p.get('html_url', '') for p in poc_data], 'exploit_indicators': json.dumps(self._combine_exploit_indicators(poc_data)), 'updated_at': datetime.utcnow() } if existing_rule: # Update existing rule for key, value in rule_data.items(): setattr(existing_rule, key, value) logger.info(f"Updated SIGMA rule for {cve.cve_id}") else: # Create new rule new_rule = SigmaRule(**rule_data) self.db_session.add(new_rule) logger.info(f"Created new SIGMA rule for {cve.cve_id}") self.db_session.commit() return { 'success': True, 'cve_id': cve.cve_id, 'template': template.template_name if template else 'Unknown', 'confidence_level': confidence_level, 'poc_count': len(poc_data), 'quality_score': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0 } except Exception as e: logger.error(f"Error generating enhanced rule for {cve.cve_id}: {e}") return {'success': False, 'error': str(e)} async def _generate_hybrid_rule(self, cve, best_poc: dict, poc_data: list) -> Optional[str]: """Generate SIGMA rule using hybrid approach: metadata + LLM detection.""" try: # Step 1: Generate YAML metadata using application code logger.info(f"Generating YAML metadata for {cve.cve_id}") yaml_metadata = self.yaml_generator.generate_metadata(cve, poc_data) # Step 2: Analyze PoC content with PoCAnalyzer logger.info(f"Analyzing PoC content for {cve.cve_id}") poc_content = await self._extract_poc_content(best_poc) if not poc_content: logger.warning(f"No PoC content available for {cve.cve_id}") return None poc_analysis = self.poc_analyzer.analyze_poc(poc_content, cve.cve_id) # Step 3: Generate detection sections using LLM logger.info(f"Generating detection sections for {cve.cve_id}") detection_sections = await self.enhanced_llm_client.generate_detection_sections( yaml_metadata, poc_analysis, cve.cve_id ) if not detection_sections: logger.warning(f"Failed to generate detection sections for {cve.cve_id}") return None # Step 4: Combine metadata with detection sections logger.info(f"Combining YAML sections for {cve.cve_id}") complete_rule = self.enhanced_llm_client.combine_yaml_sections( yaml_metadata, detection_sections ) if complete_rule: logger.info(f"Successfully generated hybrid rule for {cve.cve_id}") return complete_rule else: logger.warning(f"Failed to combine YAML sections for {cve.cve_id}") return None except Exception as e: logger.error(f"Error generating hybrid rule for {cve.cve_id}: {e}") return None async def _generate_llm_enhanced_rule(self, cve, best_poc: dict, poc_data: list) -> Optional[str]: """Generate SIGMA rule using LLM API with PoC analysis""" try: # Get PoC content from the best quality PoC poc_content = await self._extract_poc_content(best_poc) if not poc_content: logger.warning(f"No PoC content available for {cve.cve_id}") return None # Analyze PoC content with the PoC analyzer logger.info(f"Analyzing PoC content for {cve.cve_id} with PoCAnalyzer") poc_analysis = self.poc_analyzer.analyze_poc(poc_content, cve.cve_id) # Enhance the PoC content with structured analysis enhanced_poc_content = self._format_poc_analysis_for_llm(poc_content, poc_analysis) # Generate rule using LLM with enhanced PoC content rule_content = await self.llm_client.generate_sigma_rule( cve_id=cve.cve_id, poc_content=enhanced_poc_content, cve_description=cve.description or "", existing_rule=None ) if rule_content: # Validate the generated rule with CVE ID check if self.llm_client.validate_sigma_rule(rule_content, cve.cve_id): logger.info(f"Successfully generated LLM-enhanced rule for {cve.cve_id}") return rule_content else: logger.warning(f"Generated rule for {cve.cve_id} failed validation") return None return None except Exception as e: logger.error(f"Error generating LLM-enhanced rule for {cve.cve_id}: {e}") return None async def _extract_poc_content(self, poc: dict) -> Optional[str]: """Extract actual code content from PoC repository""" try: import aiohttp import asyncio # Get repository information repo_url = poc.get('html_url', '') if not repo_url: return None # Convert GitHub URL to API URL for repository content if 'github.com' in repo_url: # Extract owner and repo from URL parts = repo_url.rstrip('/').split('/') if len(parts) >= 2: owner = parts[-2] repo = parts[-1] # Get repository files via GitHub API api_url = f"https://api.github.com/repos/{owner}/{repo}/contents" async with aiohttp.ClientSession() as session: # Add timeout to prevent hanging timeout = aiohttp.ClientTimeout(total=30) async with session.get(api_url, timeout=timeout) as response: if response.status == 200: contents = await response.json() # Look for common exploit files target_files = [ 'exploit.py', 'poc.py', 'exploit.c', 'exploit.cpp', 'exploit.java', 'exploit.rb', 'exploit.php', 'exploit.js', 'exploit.sh', 'exploit.ps1', 'README.md', 'main.py', 'index.js' ] for file_info in contents: if file_info.get('type') == 'file': filename = file_info.get('name', '').lower() # Check if this is a target file if any(target in filename for target in target_files): file_url = file_info.get('download_url') if file_url: async with session.get(file_url, timeout=timeout) as file_response: if file_response.status == 200: content = await file_response.text() # Limit content size if len(content) > 10000: content = content[:10000] + "\n... [content truncated]" return content # If no specific exploit file found, return description/README for file_info in contents: if file_info.get('type') == 'file': filename = file_info.get('name', '').lower() if 'readme' in filename: file_url = file_info.get('download_url') if file_url: async with session.get(file_url, timeout=timeout) as file_response: if file_response.status == 200: content = await file_response.text() return content[:5000] # Smaller limit for README # Fallback to description and metadata description = poc.get('description', '') if description: return f"Repository Description: {description}" return None except Exception as e: logger.error(f"Error extracting PoC content: {e}") return None def _format_poc_analysis_for_llm(self, original_poc_content: str, poc_analysis: dict) -> str: """Format PoC analysis results for LLM consumption""" # Extract key findings from analysis language = poc_analysis.get('language', 'unknown') quality_score = poc_analysis.get('quality_score', {}) mitre_techniques = poc_analysis.get('mitre_techniques', []) behaviors = poc_analysis.get('behaviors', []) # Extract indicators processes = poc_analysis.get('processes', []) files = poc_analysis.get('files', []) network = poc_analysis.get('network', []) registry = poc_analysis.get('registry', []) commands = poc_analysis.get('commands', []) # Build enhanced content for LLM enhanced_content = f"""**ORIGINAL POC CODE:** {original_poc_content[:2000]} **STRUCTURED POC ANALYSIS:** **Language Detected:** {language} **Security Indicators Extracted:** **Process Execution Indicators:** {self._format_indicators_for_display(processes)} **File System Indicators:** {self._format_indicators_for_display(files)} **Network Communication Indicators:** {self._format_indicators_for_display(network)} **Registry Modification Indicators:** {self._format_indicators_for_display(registry)} **Command Execution Indicators:** {self._format_indicators_for_display(commands)} **MITRE ATT&CK Techniques Detected:** {self._format_mitre_techniques_for_display(mitre_techniques)} **Attack Behaviors Identified:** {self._format_behaviors_for_display(behaviors)} **Analysis Quality:** - Overall Score: {quality_score.get('overall_score', 0)}/1.0 - Total Indicators: {poc_analysis.get('total_indicators', 0)} - High Confidence Indicators: {poc_analysis.get('high_confidence_indicators', 0)} - Recommendation: {quality_score.get('recommendation', 'Unknown')} **DETECTION GUIDANCE:** Use the above structured indicators to create specific SIGMA detection patterns. Focus on the high-confidence indicators and behaviors for the most accurate detection rules.""" return enhanced_content def _format_indicators_for_display(self, indicators: list) -> str: """Format indicators for LLM display""" if not indicators: return "- None detected" formatted = [] for indicator in indicators[:5]: # Limit to top 5 indicators if isinstance(indicator, dict): value = indicator.get('value', str(indicator)) confidence = indicator.get('confidence', 0) attack_technique = indicator.get('attack_technique') technique_info = f" (MITRE: {attack_technique})" if attack_technique else "" formatted.append(f"- {value} (confidence: {confidence:.2f}){technique_info}") else: formatted.append(f"- {indicator}") if len(indicators) > 5: formatted.append(f"- ... and {len(indicators) - 5} more indicators") return "\n".join(formatted) def _format_mitre_techniques_for_display(self, techniques: list) -> str: """Format MITRE ATT&CK techniques for display""" if not techniques: return "- None detected" formatted = [] for technique in techniques: # Get technique name if available technique_name = self.cve2capec_client.get_technique_name(technique) if hasattr(self, 'cve2capec_client') else "" if technique_name: formatted.append(f"- {technique}: {technique_name}") else: formatted.append(f"- {technique}") return "\n".join(formatted) def _format_behaviors_for_display(self, behaviors: list) -> str: """Format attack behaviors for display""" if not behaviors: return "- None detected" formatted = [] for behavior in behaviors: if isinstance(behavior, dict): behavior_type = behavior.get('behavior', 'unknown') confidence = behavior.get('confidence', 0) indicators = behavior.get('indicators', []) formatted.append(f"- {behavior_type.replace('_', ' ').title()} (confidence: {confidence:.2f})") if indicators: formatted.append(f" Indicators: {', '.join(indicators[:3])}") else: formatted.append(f"- {behavior}") return "\n".join(formatted) def _extract_log_source_from_content(self, rule_content: str) -> str: """Extract log source from the generated rule content""" try: import yaml parsed = yaml.safe_load(rule_content) logsource = parsed.get('logsource', {}) category = logsource.get('category', '') product = logsource.get('product', '') if category: return category elif product: return product else: return 'generic' except Exception: return 'generic' async def _select_template(self, cve, best_poc: Optional[dict]) -> Optional[object]: """Select the most appropriate template based on CVE and PoC analysis""" from main import RuleTemplate templates = self.db_session.query(RuleTemplate).all() if not templates: logger.warning("No rule templates found in database - creating default template") # Create a default template if none exist return self._create_default_template(cve, best_poc) # Score templates based on relevance template_scores = {} for template in templates: score = 0 # Score based on PoC indicators (highest priority) if best_poc: indicators = best_poc.get('exploit_indicators', {}) score += self._score_template_poc_match(template, indicators) # Score based on CVE description score += self._score_template_cve_match(template, cve) # Score based on affected products if cve.affected_products: score += self._score_template_product_match(template, cve.affected_products) template_scores[template] = score # Return template with highest score if template_scores: best_template = max(template_scores, key=template_scores.get) logger.info(f"Selected template {best_template.template_name} with score {template_scores[best_template]}") return best_template return self._create_default_template(cve, best_poc) def _score_template_poc_match(self, template: object, indicators: dict) -> int: """Score template based on PoC indicators""" score = 0 template_name = template.template_name.lower() # Process-based templates if 'process' in template_name or 'execution' in template_name: if indicators.get('processes') or indicators.get('commands'): score += 30 # Network-based templates if 'network' in template_name or 'connection' in template_name: if indicators.get('network') or indicators.get('urls'): score += 30 # File-based templates if 'file' in template_name or 'modification' in template_name: if indicators.get('files'): score += 30 # PowerShell templates if 'powershell' in template_name: processes = indicators.get('processes', []) if any('powershell' in p.lower() for p in processes): score += 35 return score def _score_template_cve_match(self, template: object, cve) -> int: """Score template based on CVE description""" score = 0 template_name = template.template_name.lower() description = (cve.description or '').lower() # Keyword matching if 'remote' in description and 'execution' in description: if 'process' in template_name or 'execution' in template_name: score += 20 if 'powershell' in description: if 'powershell' in template_name: score += 25 if 'network' in description or 'http' in description: if 'network' in template_name: score += 20 if 'file' in description or 'upload' in description: if 'file' in template_name: score += 20 return score def _score_template_product_match(self, template: object, affected_products: list) -> int: """Score template based on affected products""" score = 0 if not template.applicable_product_patterns: return 0 for pattern in template.applicable_product_patterns: pattern_lower = pattern.lower() for product in affected_products: product_lower = product.lower() if pattern_lower in product_lower: score += 10 break return score async def _generate_rule_content(self, cve, template: object, poc_data: list) -> str: """Generate the actual SIGMA rule content""" # Combine all exploit indicators combined_indicators = self._combine_exploit_indicators(poc_data) # Get base template content rule_content = template.template_content # Generate a unique rule ID import uuid rule_id = str(uuid.uuid4()) # Replace template placeholders replacements = { '{title}': f"{cve.cve_id} Enhanced Detection", '{description}': self._generate_description(cve, poc_data), '{rule_id}': rule_id, '{date}': datetime.now().strftime('%Y/%m/%d'), '{level}': self._calculate_confidence_level(cve, poc_data).lower(), '{cve_url}': f"https://nvd.nist.gov/vuln/detail/{cve.cve_id}", '{tags}': self._generate_tags(cve, poc_data), '{suspicious_processes}': self._format_indicators(combined_indicators.get('processes', [])), '{suspicious_files}': self._format_indicators(combined_indicators.get('files', [])), '{suspicious_commands}': self._format_indicators(combined_indicators.get('commands', [])), '{suspicious_network}': self._format_indicators(combined_indicators.get('network', [])), '{suspicious_urls}': self._format_indicators(combined_indicators.get('urls', [])), '{suspicious_registry}': self._format_indicators(combined_indicators.get('registry', [])), '{suspicious_ports}': self._format_indicators(combined_indicators.get('ports', [])) } # Apply replacements for placeholder, value in replacements.items(): rule_content = rule_content.replace(placeholder, value) # Clean up empty sections rule_content = self._clean_empty_sections(rule_content) # Add enhanced detection based on PoC quality if poc_data: rule_content = self._enhance_detection_logic(rule_content, combined_indicators, poc_data) return rule_content def _combine_exploit_indicators(self, poc_data: list) -> dict: """Combine exploit indicators from all PoCs""" combined = { 'processes': [], 'files': [], 'commands': [], 'network': [], 'urls': [], 'registry': [] } for poc in poc_data: indicators = poc.get('exploit_indicators', {}) for key in combined.keys(): if key in indicators: combined[key].extend(indicators[key]) # Deduplicate and filter for key in combined.keys(): combined[key] = list(set(combined[key])) # Remove empty and invalid entries combined[key] = [item for item in combined[key] if item and len(item) > 2] return combined def _generate_description(self, cve, poc_data: list) -> str: """Generate enhanced rule description""" base_desc = f"Detection for {cve.cve_id}" if cve.description: # Extract key terms from CVE description desc_words = cve.description.lower().split() key_terms = [word for word in desc_words if word in [ 'remote', 'execution', 'injection', 'bypass', 'privilege', 'escalation', 'overflow', 'disclosure', 'traversal', 'deserialization' ]] if key_terms: base_desc += f" involving {', '.join(set(key_terms[:3]))}" if poc_data: total_pocs = len(poc_data) total_stars = sum(p.get('stargazers_count', 0) for p in poc_data) base_desc += f" [Enhanced with {total_pocs} PoC(s), {total_stars} stars]" return base_desc def _generate_references(self, cve, poc_data: list) -> str: """Generate references section""" refs = [] # Add CVE reference refs.append(f"https://nvd.nist.gov/vuln/detail/{cve.cve_id}") # Add top PoC references (max 3) if poc_data: sorted_pocs = sorted(poc_data, key=lambda x: x.get('stargazers_count', 0), reverse=True) for poc in sorted_pocs[:3]: if poc.get('html_url'): refs.append(poc['html_url']) return '\n'.join(f" - {ref}" for ref in refs) def _generate_tags(self, cve, poc_data: list) -> str: """Generate MITRE ATT&CK tags and other tags using CVE2CAPEC mappings""" tags = [] # CVE tag tags.append(cve.cve_id.lower()) # Get MITRE ATT&CK techniques from CVE2CAPEC mapping mitre_techniques = self.cve2capec_client.get_mitre_techniques_for_cve(cve.cve_id) if mitre_techniques: logger.info(f"Found {len(mitre_techniques)} MITRE techniques for {cve.cve_id}: {mitre_techniques}") # Add all mapped MITRE techniques for technique in mitre_techniques: # Convert to attack.t format (lowercase) attack_tag = f"attack.{technique.lower()}" if attack_tag not in tags: tags.append(attack_tag) else: # No CVE2CAPEC mapping found - do not add fallback techniques logger.warning(f"No CVE2CAPEC mapping found for {cve.cve_id}, no MITRE techniques will be added") # Note: LLM will rely on the PoC analysis to determine appropriate techniques # Get CWE codes for additional context cwe_codes = self.cve2capec_client.get_cwe_for_cve(cve.cve_id) if cwe_codes: # Add the primary CWE as a tag primary_cwe = cwe_codes[0].lower().replace('-', '.') tags.append(primary_cwe) # Add PoC quality tags if poc_data: tags.append('exploit.poc') best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0)) quality_tier = best_poc.get('quality_analysis', {}).get('quality_tier', 'poor') tags.append(f'poc.quality.{quality_tier}') # Return tags as YAML array format if not tags: return "unknown" if len(tags) == 1: return tags[0] else: # Format as proper YAML array formatted_tags = [] for tag in tags: formatted_tags.append(f" - {tag}") return '\n'.join(formatted_tags) def _format_indicators(self, indicators: list) -> str: """Format indicators for SIGMA rule""" if not indicators: return ' - "*" # No specific indicators available' # Limit indicators to avoid overly complex rules limited_indicators = indicators[:10] formatted = [] for indicator in limited_indicators: # Clean and escape special characters for SIGMA cleaned = str(indicator).strip() if cleaned: escaped = cleaned.replace('\\\\', '\\\\\\\\').replace('*', '\\\\*').replace('?', '\\\\?') formatted.append(f' - "{escaped}"') return '\n'.join(formatted) if formatted else ' - "*" # No valid indicators' def _enhance_detection_logic(self, rule_content: str, indicators: dict, poc_data: list) -> str: """Enhance detection logic based on PoC quality and indicators""" # If we have high-quality PoCs, add additional detection conditions best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0)) quality_score = best_poc.get('quality_analysis', {}).get('quality_score', 0) if quality_score > 60: # High quality PoC # Add more specific detection conditions if indicators.get('processes') and indicators.get('commands'): additional_condition = """ process_and_command: Image|contains: {{PROCESSES}} CommandLine|contains: {{COMMANDS}}""" # Insert before the condition line rule_content = rule_content.replace( 'condition: selection', additional_condition + '\n condition: selection or process_and_command' ) return rule_content def _calculate_confidence_level(self, cve, poc_data: list) -> str: """Calculate confidence level based on CVE and PoC data""" score = 0 # CVSS score factor if cve.cvss_score: if cve.cvss_score >= 9.0: score += 40 elif cve.cvss_score >= 7.0: score += 30 elif cve.cvss_score >= 5.0: score += 20 else: score += 10 # PoC quality factor if poc_data: total_stars = sum(p.get('stargazers_count', 0) for p in poc_data) poc_count = len(poc_data) score += min(total_stars, 30) # Max 30 points for stars score += min(poc_count * 5, 20) # Max 20 points for PoC count # Quality tier bonus best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0)) quality_tier = best_poc.get('quality_analysis', {}).get('quality_tier', 'poor') tier_bonus = { 'excellent': 20, 'good': 15, 'fair': 10, 'poor': 5, 'very_poor': 0 } score += tier_bonus.get(quality_tier, 0) # Determine confidence level if score >= 80: return 'HIGH' elif score >= 60: return 'MEDIUM' elif score >= 40: return 'LOW' else: return 'INFORMATIONAL' def _create_default_template(self, cve, best_poc: Optional[dict]) -> object: """Create a default template based on CVE and PoC analysis""" from main import RuleTemplate import uuid # Analyze the best PoC to determine the most appropriate template type template_type = "process" if best_poc: indicators = best_poc.get('exploit_indicators', {}) if indicators.get('network') or indicators.get('urls'): template_type = "network" elif indicators.get('files'): template_type = "file" elif any('powershell' in p.lower() for p in indicators.get('processes', [])): template_type = "powershell" # Create template content based on type if template_type == "network": template_content = """title: {{TITLE}} id: {{RULE_ID}} status: experimental description: {{DESCRIPTION}} author: CVE-SIGMA Auto Generator date: {{DATE}} references: {{REFERENCES}} tags: {{TAGS}} logsource: category: network_connection product: windows detection: selection: Initiated: true DestinationIp: {{NETWORK}} selection_url: DestinationHostname|contains: {{URLS}} condition: selection or selection_url falsepositives: - Legitimate network connections level: {{LEVEL}}""" elif template_type == "file": template_content = """title: {{TITLE}} id: {{RULE_ID}} status: experimental description: {{DESCRIPTION}} author: CVE-SIGMA Auto Generator date: {{DATE}} references: {{REFERENCES}} tags: {{TAGS}} logsource: category: file_event product: windows detection: selection: TargetFilename|contains: {{FILES}} condition: selection falsepositives: - Legitimate file operations level: {{LEVEL}}""" elif template_type == "powershell": template_content = """title: {{TITLE}} id: {{RULE_ID}} status: experimental description: {{DESCRIPTION}} author: CVE-SIGMA Auto Generator date: {{DATE}} references: {{REFERENCES}} tags: {{TAGS}} logsource: category: process_creation product: windows detection: selection: Image|endswith: - '\\powershell.exe' - '\\pwsh.exe' CommandLine|contains: {{COMMANDS}} condition: selection falsepositives: - Legitimate PowerShell scripts level: {{LEVEL}}""" else: # default to process template_content = """title: {{TITLE}} id: {{RULE_ID}} status: experimental description: {{DESCRIPTION}} author: CVE-SIGMA Auto Generator date: {{DATE}} references: {{REFERENCES}} tags: {{TAGS}} logsource: category: process_creation product: windows detection: selection: Image|endswith: {{PROCESSES}} selection_cmd: CommandLine|contains: {{COMMANDS}} condition: selection or selection_cmd falsepositives: - Legitimate software usage level: {{LEVEL}}""" # Create a temporary template object class DefaultTemplate: def __init__(self, name, content): self.template_name = name self.template_content = content self.applicable_product_patterns = [] return DefaultTemplate(f"Default {template_type.title()} Template", template_content) def _clean_empty_sections(self, rule_content: str) -> str: """Clean up empty sections in the SIGMA rule""" # Remove lines that contain only placeholder indicators lines = rule_content.split('\n') cleaned_lines = [] for line in lines: # Skip lines that are just placeholder indicators if '- "*" # No' in line and 'or selection' in rule_content: continue cleaned_lines.append(line) return '\n'.join(cleaned_lines) def _extract_log_source(self, template_name: str) -> str: """Extract log source from template name""" template_lower = template_name.lower() if 'process' in template_lower or 'execution' in template_lower: return 'process_creation' elif 'network' in template_lower: return 'network_connection' elif 'file' in template_lower: return 'file_event' elif 'powershell' in template_lower: return 'powershell' elif 'registry' in template_lower: return 'registry_event' else: return 'generic'