auto_sigma_rule_generator/backend/enhanced_sigma_generator.py

"""
Enhanced SIGMA Rule Generator
Generates improved SIGMA rules using a hybrid approach:
1. Generate YAML metadata with application code
2. Use LLM to create logsource and detection sections based on PoC analysis
"""

import json
import logging
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from sqlalchemy.orm import Session
import re
from llm_client import LLMClient
from enhanced_llm_client import EnhancedLLMClient
from yaml_metadata_generator import YAMLMetadataGenerator
from cve2capec_client import CVE2CAPECClient
from poc_analyzer import PoCAnalyzer

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class EnhancedSigmaGenerator:
    """Enhanced SIGMA rule generator using nomi-sec PoC data"""

    def __init__(self, db_session: Session, llm_provider: str = None, llm_model: str = None):
        self.db_session = db_session
        self.llm_client = LLMClient(provider=llm_provider, model=llm_model)  # Keep for backward compatibility
        self.enhanced_llm_client = EnhancedLLMClient(provider=llm_provider, model=llm_model)
        self.yaml_generator = YAMLMetadataGenerator(db_session)
        self.cve2capec_client = CVE2CAPECClient()
        self.poc_analyzer = PoCAnalyzer()

    async def generate_enhanced_rule(self, cve, use_llm: bool = True, use_hybrid: bool = True) -> dict:
        """Generate enhanced SIGMA rule for a CVE using PoC data"""
        from main import SigmaRule, RuleTemplate

        try:
            # Get PoC data
            poc_data = cve.poc_data or []

            # Find the best quality PoC
            best_poc = None
            if poc_data:
                best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0))

            # Try hybrid approach first if enabled and available
            rule_content = None
            generation_method = "template"
            template = None

            if use_hybrid and self.enhanced_llm_client.is_available() and best_poc:
                logger.info(f"Attempting hybrid rule generation for {cve.cve_id} using {self.enhanced_llm_client.provider}")
                rule_content = await self._generate_hybrid_rule(cve, best_poc, poc_data)
                if rule_content:
                    generation_method = f"hybrid_{self.enhanced_llm_client.provider}"
                    # Create a dummy template object for hybrid-generated rules
                    class HybridTemplate:
                        def __init__(self, provider_name):
                            self.template_name = f"Hybrid Generated ({provider_name})"
                    template = HybridTemplate(self.enhanced_llm_client.provider)

            # Fallback to original LLM-enhanced generation
            elif use_llm and self.llm_client.is_available() and best_poc:
                logger.info(f"Attempting LLM-enhanced rule generation for {cve.cve_id} using {self.llm_client.provider}")
                rule_content = await self._generate_llm_enhanced_rule(cve, best_poc, poc_data)
                if rule_content:
                    generation_method = f"llm_{self.llm_client.provider}"
                    # Create a dummy template object for LLM-generated rules
                    class LLMTemplate:
                        def __init__(self, provider_name):
                            self.template_name = f"LLM Generated ({provider_name})"
                    template = LLMTemplate(self.llm_client.provider)

            # Fallback to template-based generation
            if not rule_content:
                logger.info(f"Using template-based rule generation for {cve.cve_id}")

                # Select appropriate template based on PoC analysis
                template = await self._select_template(cve, best_poc)

                if not template:
                    logger.warning(f"No suitable template found for {cve.cve_id}")
                    return {'success': False, 'error': 'No suitable template'}

                # Generate rule content
                rule_content = await self._generate_rule_content(cve, template, poc_data)

            # Calculate confidence level
            confidence_level = self._calculate_confidence_level(cve, poc_data)

            # Store or update SIGMA rule
            existing_rule = self.db_session.query(SigmaRule).filter(
                SigmaRule.cve_id == cve.cve_id
            ).first()

            rule_data = {
                'cve_id': cve.cve_id,
                'rule_name': f"{cve.cve_id} Enhanced Detection",
                'rule_content': rule_content,
                'detection_type': f"{generation_method}_generated",
                'log_source': self._extract_log_source_from_content(rule_content),
                'confidence_level': confidence_level,
                'auto_generated': True,
                'exploit_based': len(poc_data) > 0,
                'poc_source': getattr(cve, 'poc_source', 'nomi_sec'),
                'poc_quality_score': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0,
                'nomi_sec_data': {
                    'total_pocs': len(poc_data),
                    'best_poc_quality': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0,
                    'total_stars': sum(p.get('stargazers_count', 0) for p in poc_data),
                    'avg_stars': sum(p.get('stargazers_count', 0) for p in poc_data) / len(poc_data) if poc_data else 0,
                    'source': getattr(cve, 'poc_source', 'nomi_sec'),
                    'generation_method': generation_method
                },
                'github_repos': [p.get('html_url', '') for p in poc_data],
                'exploit_indicators': json.dumps(self._combine_exploit_indicators(poc_data)),
                'updated_at': datetime.utcnow()
            }

            if existing_rule:
                # Update existing rule
                for key, value in rule_data.items():
                    setattr(existing_rule, key, value)
                logger.info(f"Updated SIGMA rule for {cve.cve_id}")
            else:
                # Create new rule
                new_rule = SigmaRule(**rule_data)
                self.db_session.add(new_rule)
                logger.info(f"Created new SIGMA rule for {cve.cve_id}")

            self.db_session.commit()

            return {
                'success': True,
                'cve_id': cve.cve_id,
                'template': template.template_name if template else 'Unknown',
                'confidence_level': confidence_level,
                'poc_count': len(poc_data),
                'quality_score': best_poc.get('quality_analysis', {}).get('quality_score', 0) if best_poc else 0
            }

        except Exception as e:
            logger.error(f"Error generating enhanced rule for {cve.cve_id}: {e}")
            return {'success': False, 'error': str(e)}

    async def _generate_hybrid_rule(self, cve, best_poc: dict, poc_data: list) -> Optional[str]:
        """Generate SIGMA rule using hybrid approach: metadata + LLM detection."""
        try:
            # Step 1: Generate YAML metadata using application code
            logger.info(f"Generating YAML metadata for {cve.cve_id}")
            yaml_metadata = self.yaml_generator.generate_metadata(cve, poc_data)

            # Step 2: Analyze PoC content with PoCAnalyzer
            logger.info(f"Analyzing PoC content for {cve.cve_id}")
            poc_content = await self._extract_poc_content(best_poc)
            if not poc_content:
                logger.warning(f"No PoC content available for {cve.cve_id}")
                return None

            poc_analysis = self.poc_analyzer.analyze_poc(poc_content, cve.cve_id)

            # Step 3: Generate detection sections using LLM
            logger.info(f"Generating detection sections for {cve.cve_id}")
            detection_sections = await self.enhanced_llm_client.generate_detection_sections(
                yaml_metadata, poc_analysis, cve.cve_id
            )

            if not detection_sections:
                logger.warning(f"Failed to generate detection sections for {cve.cve_id}")
                return None

            # Step 4: Combine metadata with detection sections
            logger.info(f"Combining YAML sections for {cve.cve_id}")
            complete_rule = self.enhanced_llm_client.combine_yaml_sections(
                yaml_metadata, detection_sections
            )

            if complete_rule:
                logger.info(f"Successfully generated hybrid rule for {cve.cve_id}")
                return complete_rule
            else:
                logger.warning(f"Failed to combine YAML sections for {cve.cve_id}")
                return None

        except Exception as e:
            logger.error(f"Error generating hybrid rule for {cve.cve_id}: {e}")
            return None

    async def _generate_llm_enhanced_rule(self, cve, best_poc: dict, poc_data: list) -> Optional[str]:
        """Generate SIGMA rule using LLM API with PoC analysis"""
        try:
            # Get PoC content from the best quality PoC
            poc_content = await self._extract_poc_content(best_poc)
            if not poc_content:
                logger.warning(f"No PoC content available for {cve.cve_id}")
                return None

            # Analyze PoC content with the PoC analyzer
            logger.info(f"Analyzing PoC content for {cve.cve_id} with PoCAnalyzer")
            poc_analysis = self.poc_analyzer.analyze_poc(poc_content, cve.cve_id)

            # Enhance the PoC content with structured analysis
            enhanced_poc_content = self._format_poc_analysis_for_llm(poc_content, poc_analysis)

            # Generate rule using LLM with enhanced PoC content
            rule_content = await self.llm_client.generate_sigma_rule(
                cve_id=cve.cve_id,
                poc_content=enhanced_poc_content,
                cve_description=cve.description or "",
                existing_rule=None
            )

            if rule_content:
                # Validate the generated rule with CVE ID check
                if self.llm_client.validate_sigma_rule(rule_content, cve.cve_id):
                    logger.info(f"Successfully generated LLM-enhanced rule for {cve.cve_id}")
                    return rule_content
                else:
                    logger.warning(f"Generated rule for {cve.cve_id} failed validation")
                    return None

            return None

        except Exception as e:
            logger.error(f"Error generating LLM-enhanced rule for {cve.cve_id}: {e}")
            return None

    async def _extract_poc_content(self, poc: dict) -> Optional[str]:
        """Extract actual code content from PoC repository"""
        try:
            import aiohttp
            import asyncio

            # Get repository information
            repo_url = poc.get('html_url', '')
            if not repo_url:
                return None

            # Convert GitHub URL to API URL for repository content
            if 'github.com' in repo_url:
                # Extract owner and repo from URL
                parts = repo_url.rstrip('/').split('/')
                if len(parts) >= 2:
                    owner = parts[-2]
                    repo = parts[-1]

                    # Get repository files via GitHub API
                    api_url = f"https://api.github.com/repos/{owner}/{repo}/contents"

                    async with aiohttp.ClientSession() as session:
                        # Add timeout to prevent hanging
                        timeout = aiohttp.ClientTimeout(total=30)
                        async with session.get(api_url, timeout=timeout) as response:
                            if response.status == 200:
                                contents = await response.json()

                                # Look for common exploit files
                                target_files = [
                                    'exploit.py', 'poc.py', 'exploit.c', 'exploit.cpp',
                                    'exploit.java', 'exploit.rb', 'exploit.php',
                                    'exploit.js', 'exploit.sh', 'exploit.ps1',
                                    'README.md', 'main.py', 'index.js'
                                ]

                                for file_info in contents:
                                    if file_info.get('type') == 'file':
                                        filename = file_info.get('name', '').lower()

                                        # Check if this is a target file
                                        if any(target in filename for target in target_files):
                                            file_url = file_info.get('download_url')
                                            if file_url:
                                                async with session.get(file_url, timeout=timeout) as file_response:
                                                    if file_response.status == 200:
                                                        content = await file_response.text()
                                                        # Limit content size
                                                        if len(content) > 10000:
                                                            content = content[:10000] + "\n... [content truncated]"
                                                        return content

                                # If no specific exploit file found, return description/README
                                for file_info in contents:
                                    if file_info.get('type') == 'file':
                                        filename = file_info.get('name', '').lower()
                                        if 'readme' in filename:
                                            file_url = file_info.get('download_url')
                                            if file_url:
                                                async with session.get(file_url, timeout=timeout) as file_response:
                                                    if file_response.status == 200:
                                                        content = await file_response.text()
                                                        return content[:5000]  # Smaller limit for README

            # Fallback to description and metadata
            description = poc.get('description', '')
            if description:
                return f"Repository Description: {description}"

            return None

        except Exception as e:
            logger.error(f"Error extracting PoC content: {e}")
            return None


    def _format_poc_analysis_for_llm(self, original_poc_content: str, poc_analysis: dict) -> str:
        """Format PoC analysis results for LLM consumption"""

        # Extract key findings from analysis
        language = poc_analysis.get('language', 'unknown')
        quality_score = poc_analysis.get('quality_score', {})
        mitre_techniques = poc_analysis.get('mitre_techniques', [])
        behaviors = poc_analysis.get('behaviors', [])

        # Extract indicators
        processes = poc_analysis.get('processes', [])
        files = poc_analysis.get('files', [])
        network = poc_analysis.get('network', [])
        registry = poc_analysis.get('registry', [])
        commands = poc_analysis.get('commands', [])

        # Build enhanced content for LLM
        enhanced_content = f"""**ORIGINAL POC CODE:**
{original_poc_content[:2000]}

**STRUCTURED POC ANALYSIS:**

**Language Detected:** {language}

**Security Indicators Extracted:**

**Process Execution Indicators:**
{self._format_indicators_for_display(processes)}

**File System Indicators:**
{self._format_indicators_for_display(files)}

**Network Communication Indicators:**
{self._format_indicators_for_display(network)}

**Registry Modification Indicators:**
{self._format_indicators_for_display(registry)}

**Command Execution Indicators:**
{self._format_indicators_for_display(commands)}

**MITRE ATT&CK Techniques Detected:**
{self._format_mitre_techniques_for_display(mitre_techniques)}

**Attack Behaviors Identified:**
{self._format_behaviors_for_display(behaviors)}

**Analysis Quality:**
- Overall Score: {quality_score.get('overall_score', 0)}/1.0
- Total Indicators: {poc_analysis.get('total_indicators', 0)}
- High Confidence Indicators: {poc_analysis.get('high_confidence_indicators', 0)}
- Recommendation: {quality_score.get('recommendation', 'Unknown')}

**DETECTION GUIDANCE:**
Use the above structured indicators to create specific SIGMA detection patterns. Focus on the high-confidence indicators and behaviors for the most accurate detection rules."""

        return enhanced_content

    def _format_indicators_for_display(self, indicators: list) -> str:
        """Format indicators for LLM display"""
        if not indicators:
            return "- None detected"

        formatted = []
        for indicator in indicators[:5]:  # Limit to top 5 indicators
            if isinstance(indicator, dict):
                value = indicator.get('value', str(indicator))
                confidence = indicator.get('confidence', 0)
                attack_technique = indicator.get('attack_technique')
                technique_info = f" (MITRE: {attack_technique})" if attack_technique else ""
                formatted.append(f"- {value} (confidence: {confidence:.2f}){technique_info}")
            else:
                formatted.append(f"- {indicator}")

        if len(indicators) > 5:
            formatted.append(f"- ... and {len(indicators) - 5} more indicators")

        return "\n".join(formatted)

    def _format_mitre_techniques_for_display(self, techniques: list) -> str:
        """Format MITRE ATT&CK techniques for display"""
        if not techniques:
            return "- None detected"

        formatted = []
        for technique in techniques:
            # Get technique name if available
            technique_name = self.cve2capec_client.get_technique_name(technique) if hasattr(self, 'cve2capec_client') else ""
            if technique_name:
                formatted.append(f"- {technique}: {technique_name}")
            else:
                formatted.append(f"- {technique}")

        return "\n".join(formatted)

    def _format_behaviors_for_display(self, behaviors: list) -> str:
        """Format attack behaviors for display"""
        if not behaviors:
            return "- None detected"

        formatted = []
        for behavior in behaviors:
            if isinstance(behavior, dict):
                behavior_type = behavior.get('behavior', 'unknown')
                confidence = behavior.get('confidence', 0)
                indicators = behavior.get('indicators', [])
                formatted.append(f"- {behavior_type.replace('_', ' ').title()} (confidence: {confidence:.2f})")
                if indicators:
                    formatted.append(f"  Indicators: {', '.join(indicators[:3])}")
            else:
                formatted.append(f"- {behavior}")

        return "\n".join(formatted)

    def _extract_log_source_from_content(self, rule_content: str) -> str:
        """Extract log source from the generated rule content"""
        try:
            import yaml
            parsed = yaml.safe_load(rule_content)
            logsource = parsed.get('logsource', {})

            category = logsource.get('category', '')
            product = logsource.get('product', '')

            if category:
                return category
            elif product:
                return product
            else:
                return 'generic'

        except Exception:
            return 'generic'

    async def _select_template(self, cve, best_poc: Optional[dict]) -> Optional[object]:
        """Select the most appropriate template based on CVE and PoC analysis"""
        from main import RuleTemplate

        templates = self.db_session.query(RuleTemplate).all()

        if not templates:
            logger.warning("No rule templates found in database - creating default template")
            # Create a default template if none exist
            return self._create_default_template(cve, best_poc)

        # Score templates based on relevance
        template_scores = {}

        for template in templates:
            score = 0

            # Score based on PoC indicators (highest priority)
            if best_poc:
                indicators = best_poc.get('exploit_indicators', {})
                score += self._score_template_poc_match(template, indicators)

            # Score based on CVE description
            score += self._score_template_cve_match(template, cve)

            # Score based on affected products
            if cve.affected_products:
                score += self._score_template_product_match(template, cve.affected_products)

            template_scores[template] = score

        # Return template with highest score
        if template_scores:
            best_template = max(template_scores, key=template_scores.get)
            logger.info(f"Selected template {best_template.template_name} with score {template_scores[best_template]}")
            return best_template

        return self._create_default_template(cve, best_poc)

    def _score_template_poc_match(self, template: object, indicators: dict) -> int:
        """Score template based on PoC indicators"""
        score = 0
        template_name = template.template_name.lower()

        # Process-based templates
        if 'process' in template_name or 'execution' in template_name:
            if indicators.get('processes') or indicators.get('commands'):
                score += 30

        # Network-based templates
        if 'network' in template_name or 'connection' in template_name:
            if indicators.get('network') or indicators.get('urls'):
                score += 30

        # File-based templates
        if 'file' in template_name or 'modification' in template_name:
            if indicators.get('files'):
                score += 30

        # PowerShell templates
        if 'powershell' in template_name:
            processes = indicators.get('processes', [])
            if any('powershell' in p.lower() for p in processes):
                score += 35

        return score

    def _score_template_cve_match(self, template: object, cve) -> int:
        """Score template based on CVE description"""
        score = 0
        template_name = template.template_name.lower()
        description = (cve.description or '').lower()

        # Keyword matching
        if 'remote' in description and 'execution' in description:
            if 'process' in template_name or 'execution' in template_name:
                score += 20

        if 'powershell' in description:
            if 'powershell' in template_name:
                score += 25

        if 'network' in description or 'http' in description:
            if 'network' in template_name:
                score += 20

        if 'file' in description or 'upload' in description:
            if 'file' in template_name:
                score += 20

        return score

    def _score_template_product_match(self, template: object, affected_products: list) -> int:
        """Score template based on affected products"""
        score = 0

        if not template.applicable_product_patterns:
            return 0

        for pattern in template.applicable_product_patterns:
            pattern_lower = pattern.lower()
            for product in affected_products:
                product_lower = product.lower()
                if pattern_lower in product_lower:
                    score += 10
                    break

        return score

    async def _generate_rule_content(self, cve, template: object, poc_data: list) -> str:
        """Generate the actual SIGMA rule content"""
        # Combine all exploit indicators
        combined_indicators = self._combine_exploit_indicators(poc_data)

        # Get base template content
        rule_content = template.template_content

        # Generate a unique rule ID
        import uuid
        rule_id = str(uuid.uuid4())

        # Replace template placeholders
        replacements = {
            '{title}': f"{cve.cve_id} Enhanced Detection",
            '{description}': self._generate_description(cve, poc_data),
            '{rule_id}': rule_id,
            '{date}': datetime.now().strftime('%Y/%m/%d'),
            '{level}': self._calculate_confidence_level(cve, poc_data).lower(),
            '{cve_url}': f"https://nvd.nist.gov/vuln/detail/{cve.cve_id}",
            '{tags}': self._generate_tags(cve, poc_data),
            '{suspicious_processes}': self._format_indicators(combined_indicators.get('processes', [])),
            '{suspicious_files}': self._format_indicators(combined_indicators.get('files', [])),
            '{suspicious_commands}': self._format_indicators(combined_indicators.get('commands', [])),
            '{suspicious_network}': self._format_indicators(combined_indicators.get('network', [])),
            '{suspicious_urls}': self._format_indicators(combined_indicators.get('urls', [])),
            '{suspicious_registry}': self._format_indicators(combined_indicators.get('registry', [])),
            '{suspicious_ports}': self._format_indicators(combined_indicators.get('ports', []))
        }

        # Apply replacements
        for placeholder, value in replacements.items():
            rule_content = rule_content.replace(placeholder, value)

        # Clean up empty sections
        rule_content = self._clean_empty_sections(rule_content)

        # Add enhanced detection based on PoC quality
        if poc_data:
            rule_content = self._enhance_detection_logic(rule_content, combined_indicators, poc_data)

        return rule_content

    def _combine_exploit_indicators(self, poc_data: list) -> dict:
        """Combine exploit indicators from all PoCs"""
        combined = {
            'processes': [],
            'files': [],
            'commands': [],
            'network': [],
            'urls': [],
            'registry': []
        }

        for poc in poc_data:
            indicators = poc.get('exploit_indicators', {})
            for key in combined.keys():
                if key in indicators:
                    combined[key].extend(indicators[key])

        # Deduplicate and filter
        for key in combined.keys():
            combined[key] = list(set(combined[key]))
            # Remove empty and invalid entries
            combined[key] = [item for item in combined[key] if item and len(item) > 2]

        return combined

    def _generate_description(self, cve, poc_data: list) -> str:
        """Generate enhanced rule description"""
        base_desc = f"Detection for {cve.cve_id}"

        if cve.description:
            # Extract key terms from CVE description
            desc_words = cve.description.lower().split()
            key_terms = [word for word in desc_words if word in [
                'remote', 'execution', 'injection', 'bypass', 'privilege', 'escalation',
                'overflow', 'disclosure', 'traversal', 'deserialization'
            ]]

            if key_terms:
                base_desc += f" involving {', '.join(set(key_terms[:3]))}"

        if poc_data:
            total_pocs = len(poc_data)
            total_stars = sum(p.get('stargazers_count', 0) for p in poc_data)
            base_desc += f" [Enhanced with {total_pocs} PoC(s), {total_stars} stars]"

        return base_desc

    def _generate_references(self, cve, poc_data: list) -> str:
        """Generate references section"""
        refs = []

        # Add CVE reference
        refs.append(f"https://nvd.nist.gov/vuln/detail/{cve.cve_id}")

        # Add top PoC references (max 3)
        if poc_data:
            sorted_pocs = sorted(poc_data, key=lambda x: x.get('stargazers_count', 0), reverse=True)
            for poc in sorted_pocs[:3]:
                if poc.get('html_url'):
                    refs.append(poc['html_url'])

        return '\n'.join(f"    - {ref}" for ref in refs)

    def _generate_tags(self, cve, poc_data: list) -> str:
        """Generate MITRE ATT&CK tags and other tags using CVE2CAPEC mappings"""
        tags = []

        # CVE tag
        tags.append(cve.cve_id.lower())

        # Get MITRE ATT&CK techniques from CVE2CAPEC mapping
        mitre_techniques = self.cve2capec_client.get_mitre_techniques_for_cve(cve.cve_id)

        if mitre_techniques:
            logger.info(f"Found {len(mitre_techniques)} MITRE techniques for {cve.cve_id}: {mitre_techniques}")
            # Add all mapped MITRE techniques
            for technique in mitre_techniques:
                # Convert to attack.t format (lowercase)
                attack_tag = f"attack.{technique.lower()}"
                if attack_tag not in tags:
                    tags.append(attack_tag)
        else:
            # No CVE2CAPEC mapping found - do not add fallback techniques
            logger.warning(f"No CVE2CAPEC mapping found for {cve.cve_id}, no MITRE techniques will be added")
            # Note: LLM will rely on the PoC analysis to determine appropriate techniques

        # Get CWE codes for additional context
        cwe_codes = self.cve2capec_client.get_cwe_for_cve(cve.cve_id)
        if cwe_codes:
            # Add the primary CWE as a tag
            primary_cwe = cwe_codes[0].lower().replace('-', '.')
            tags.append(primary_cwe)

        # Add PoC quality tags
        if poc_data:
            tags.append('exploit.poc')
            best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0))
            quality_tier = best_poc.get('quality_analysis', {}).get('quality_tier', 'poor')
            tags.append(f'poc.quality.{quality_tier}')

        # Return tags as YAML array format
        if not tags:
            return "unknown"

        if len(tags) == 1:
            return tags[0]
        else:
            # Format as proper YAML array
            formatted_tags = []
            for tag in tags:
                formatted_tags.append(f"    - {tag}")
            return '\n'.join(formatted_tags)

    def _format_indicators(self, indicators: list) -> str:
        """Format indicators for SIGMA rule"""
        if not indicators:
            return '            - "*"  # No specific indicators available'

        # Limit indicators to avoid overly complex rules
        limited_indicators = indicators[:10]

        formatted = []
        for indicator in limited_indicators:
            # Clean and escape special characters for SIGMA
            cleaned = str(indicator).strip()
            if cleaned:
                escaped = cleaned.replace('\\\\', '\\\\\\\\').replace('*', '\\\\*').replace('?', '\\\\?')
                formatted.append(f'            - "{escaped}"')

        return '\n'.join(formatted) if formatted else '            - "*"  # No valid indicators'

    def _enhance_detection_logic(self, rule_content: str, indicators: dict, poc_data: list) -> str:
        """Enhance detection logic based on PoC quality and indicators"""

        # If we have high-quality PoCs, add additional detection conditions
        best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0))
        quality_score = best_poc.get('quality_analysis', {}).get('quality_score', 0)

        if quality_score > 60:  # High quality PoC
            # Add more specific detection conditions
            if indicators.get('processes') and indicators.get('commands'):
                additional_condition = """
        process_and_command:
            Image|contains: {{PROCESSES}}
            CommandLine|contains: {{COMMANDS}}"""

                # Insert before the condition line
                rule_content = rule_content.replace(
                    'condition: selection',
                    additional_condition + '\n    condition: selection or process_and_command'
                )

        return rule_content

    def _calculate_confidence_level(self, cve, poc_data: list) -> str:
        """Calculate confidence level based on CVE and PoC data"""
        score = 0

        # CVSS score factor
        if cve.cvss_score:
            if cve.cvss_score >= 9.0:
                score += 40
            elif cve.cvss_score >= 7.0:
                score += 30
            elif cve.cvss_score >= 5.0:
                score += 20
            else:
                score += 10

        # PoC quality factor
        if poc_data:
            total_stars = sum(p.get('stargazers_count', 0) for p in poc_data)
            poc_count = len(poc_data)

            score += min(total_stars, 30)  # Max 30 points for stars
            score += min(poc_count * 5, 20)  # Max 20 points for PoC count

            # Quality tier bonus
            best_poc = max(poc_data, key=lambda x: x.get('quality_analysis', {}).get('quality_score', 0))
            quality_tier = best_poc.get('quality_analysis', {}).get('quality_tier', 'poor')

            tier_bonus = {
                'excellent': 20,
                'good': 15,
                'fair': 10,
                'poor': 5,
                'very_poor': 0
            }
            score += tier_bonus.get(quality_tier, 0)

        # Determine confidence level
        if score >= 80:
            return 'HIGH'
        elif score >= 60:
            return 'MEDIUM'
        elif score >= 40:
            return 'LOW'
        else:
            return 'INFORMATIONAL'

    def _create_default_template(self, cve, best_poc: Optional[dict]) -> object:
        """Create a default template based on CVE and PoC analysis"""
        from main import RuleTemplate
        import uuid

        # Analyze the best PoC to determine the most appropriate template type
        template_type = "process"
        if best_poc:
            indicators = best_poc.get('exploit_indicators', {})
            if indicators.get('network') or indicators.get('urls'):
                template_type = "network"
            elif indicators.get('files'):
                template_type = "file"
            elif any('powershell' in p.lower() for p in indicators.get('processes', [])):
                template_type = "powershell"

        # Create template content based on type
        if template_type == "network":
            template_content = """title: {{TITLE}}
id: {{RULE_ID}}
status: experimental
description: {{DESCRIPTION}}
author: CVE-SIGMA Auto Generator
date: {{DATE}}
references:
{{REFERENCES}}
tags:
{{TAGS}}
logsource:
    category: network_connection
    product: windows
detection:
    selection:
        Initiated: true
        DestinationIp:
{{NETWORK}}
    selection_url:
        DestinationHostname|contains:
{{URLS}}
    condition: selection or selection_url
falsepositives:
    - Legitimate network connections
level: {{LEVEL}}"""
        elif template_type == "file":
            template_content = """title: {{TITLE}}
id: {{RULE_ID}}
status: experimental
description: {{DESCRIPTION}}
author: CVE-SIGMA Auto Generator
date: {{DATE}}
references:
{{REFERENCES}}
tags:
{{TAGS}}
logsource:
    category: file_event
    product: windows
detection:
    selection:
        TargetFilename|contains:
{{FILES}}
    condition: selection
falsepositives:
    - Legitimate file operations
level: {{LEVEL}}"""
        elif template_type == "powershell":
            template_content = """title: {{TITLE}}
id: {{RULE_ID}}
status: experimental
description: {{DESCRIPTION}}
author: CVE-SIGMA Auto Generator
date: {{DATE}}
references:
{{REFERENCES}}
tags:
{{TAGS}}
logsource:
    category: process_creation
    product: windows
detection:
    selection:
        Image|endswith:
            - '\\powershell.exe'
            - '\\pwsh.exe'
        CommandLine|contains:
{{COMMANDS}}
    condition: selection
falsepositives:
    - Legitimate PowerShell scripts
level: {{LEVEL}}"""
        else:  # default to process
            template_content = """title: {{TITLE}}
id: {{RULE_ID}}
status: experimental
description: {{DESCRIPTION}}
author: CVE-SIGMA Auto Generator
date: {{DATE}}
references:
{{REFERENCES}}
tags:
{{TAGS}}
logsource:
    category: process_creation
    product: windows
detection:
    selection:
        Image|endswith:
{{PROCESSES}}
    selection_cmd:
        CommandLine|contains:
{{COMMANDS}}
    condition: selection or selection_cmd
falsepositives:
    - Legitimate software usage
level: {{LEVEL}}"""

        # Create a temporary template object
        class DefaultTemplate:
            def __init__(self, name, content):
                self.template_name = name
                self.template_content = content
                self.applicable_product_patterns = []

        return DefaultTemplate(f"Default {template_type.title()} Template", template_content)

    def _clean_empty_sections(self, rule_content: str) -> str:
        """Clean up empty sections in the SIGMA rule"""
        # Remove lines that contain only placeholder indicators
        lines = rule_content.split('\n')
        cleaned_lines = []

        for line in lines:
            # Skip lines that are just placeholder indicators
            if '- "*"  # No' in line and 'or selection' in rule_content:
                continue
            cleaned_lines.append(line)

        return '\n'.join(cleaned_lines)

    def _extract_log_source(self, template_name: str) -> str:
        """Extract log source from template name"""
        template_lower = template_name.lower()

        if 'process' in template_lower or 'execution' in template_lower:
            return 'process_creation'
        elif 'network' in template_lower:
            return 'network_connection'
        elif 'file' in template_lower:
            return 'file_event'
        elif 'powershell' in template_lower:
            return 'powershell'
        elif 'registry' in template_lower:
            return 'registry_event'
        else:
            return 'generic'