remove infra.md.example, infra.md is the source of truth

2026-03-03 03:06:13 +08:00
parent 1ad3033cc1
commit a3c6d09350
86 changed files with 17093 additions and 39 deletions
--- a/ayn-antivirus/ayn_antivirus/detectors/ai_analyzer.py
+++ b/ayn-antivirus/ayn_antivirus/detectors/ai_analyzer.py
@@ -0,0 +1,268 @@
+"""AYN Antivirus — AI-Powered Threat Analyzer.
+
+Uses Claude to analyze suspicious files and filter false positives.
+Each detection from heuristic/signature scanners is verified by AI
+before being reported as a real threat.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import platform
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+SYSTEM_PROMPT = """Linux VPS antivirus analyst. {environment}
+Normal: pip/npm scripts in /usr/local/bin, Docker hex IDs, cron jobs (fstrim/certbot/logrotate), high-entropy archives, curl/wget in deploy scripts, recently-modified files after apt/pip.
+Reply ONLY JSON: {{"verdict":"threat"|"safe"|"suspicious","confidence":0-100,"reason":"short","recommended_action":"quarantine"|"delete"|"ignore"|"monitor"}}"""
+
+ANALYSIS_PROMPT = """FILE:{file_path} DETECT:{threat_name}({threat_type}) SEV:{severity} DET:{detector} CONF:{original_confidence}% SIZE:{file_size} PERM:{permissions} OWN:{owner} MOD:{mtime}
+PREVIEW:
+{content_preview}
+JSON verdict:"""
+
+
+@dataclass
+class AIVerdict:
+    """Result of AI analysis on a detection."""
+    verdict: str          # threat, safe, suspicious
+    confidence: int       # 0-100
+    reason: str
+    recommended_action: str  # quarantine, delete, ignore, monitor
+    raw_response: str = ""
+
+    @property
+    def is_threat(self) -> bool:
+        return self.verdict == "threat"
+
+    @property
+    def is_safe(self) -> bool:
+        return self.verdict == "safe"
+
+
+class AIAnalyzer:
+    """AI-powered threat analysis using Claude."""
+
+    def __init__(self, api_key: Optional[str] = None, model: str = "claude-sonnet-4-20250514"):
+        self._api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "") or self._load_key_from_env_file()
+        self._model = model
+        self._client = None
+        self._environment = self._detect_environment()
+
+    @staticmethod
+    def _load_key_from_env_file() -> str:
+        for p in ["/opt/ayn-antivirus/.env", Path.home() / ".ayn-antivirus" / ".env"]:
+            try:
+                for line in Path(p).read_text().splitlines():
+                    line = line.strip()
+                    if line.startswith("ANTHROPIC_API_KEY=") and not line.endswith("="):
+                        return line.split("=", 1)[1].strip().strip("'\"")
+            except Exception:
+                pass
+        return ""
+
+    @property
+    def available(self) -> bool:
+        return bool(self._api_key)
+
+    def _get_client(self):
+        if not self._client:
+            try:
+                import anthropic
+                self._client = anthropic.Anthropic(api_key=self._api_key)
+            except Exception as exc:
+                logger.error("Failed to init Anthropic client: %s", exc)
+                return None
+        return self._client
+
+    @staticmethod
+    def _detect_environment() -> str:
+        """Gather environment context for the AI."""
+        import shutil
+        parts = [
+            f"OS: {platform.system()} {platform.release()}",
+            f"Hostname: {platform.node()}",
+            f"Arch: {platform.machine()}",
+        ]
+        if shutil.which("incus"):
+            parts.append("Container runtime: Incus/LXC (containers run Docker inside)")
+        if shutil.which("docker"):
+            parts.append("Docker: available")
+        if Path("/etc/dokploy").exists() or shutil.which("dokploy"):
+            parts.append("Platform: Dokploy (Docker deployment platform)")
+
+        # Check if we're inside a container
+        if Path("/run/host/container-manager").exists():
+            parts.append("Running inside: managed container")
+        return "\n".join(parts)
+
+    def _get_file_context(self, file_path: str) -> Dict[str, Any]:
+        """Gather file metadata and content preview."""
+        p = Path(file_path)
+        ctx = {
+            "file_size": 0,
+            "permissions": "",
+            "owner": "",
+            "mtime": "",
+            "content_preview": "[file not readable]",
+        }
+        try:
+            st = p.stat()
+            ctx["file_size"] = st.st_size
+            ctx["permissions"] = oct(st.st_mode)[-4:]
+            ctx["mtime"] = str(st.st_mtime)
+            try:
+                import pwd
+                ctx["owner"] = pwd.getpwuid(st.st_uid).pw_name
+            except Exception:
+                ctx["owner"] = str(st.st_uid)
+        except OSError:
+            pass
+
+        try:
+            with open(file_path, "rb") as f:
+                raw = f.read(512)
+            # Try text decode, fall back to hex
+            try:
+                ctx["content_preview"] = raw.decode("utf-8", errors="replace")
+            except Exception:
+                ctx["content_preview"] = raw.hex()[:512]
+        except Exception:
+            pass
+
+        return ctx
+
+    def analyze(
+        self,
+        file_path: str,
+        threat_name: str,
+        threat_type: str,
+        severity: str,
+        detector: str,
+        confidence: int = 50,
+    ) -> AIVerdict:
+        """Analyze a single detection with AI."""
+        if not self.available:
+            # No API key — pass through as-is
+            return AIVerdict(
+                verdict="suspicious",
+                confidence=confidence,
+                reason="AI analysis unavailable (no API key)",
+                recommended_action="quarantine",
+            )
+
+        client = self._get_client()
+        if not client:
+            return AIVerdict(
+                verdict="suspicious",
+                confidence=confidence,
+                reason="AI client init failed",
+                recommended_action="quarantine",
+            )
+
+        ctx = self._get_file_context(file_path)
+
+        # Sanitize content preview to avoid format string issues
+        preview = ctx.get("content_preview", "")
+        if len(preview) > 500:
+            preview = preview[:500] + "..."
+        # Replace curly braces to avoid format() issues
+        preview = preview.replace("{", "{{").replace("}", "}}")
+
+        user_msg = ANALYSIS_PROMPT.format(
+            file_path=file_path,
+            threat_name=threat_name,
+            threat_type=threat_type,
+            severity=severity,
+            detector=detector,
+            original_confidence=confidence,
+            file_size=ctx.get("file_size", 0),
+            permissions=ctx.get("permissions", ""),
+            owner=ctx.get("owner", ""),
+            mtime=ctx.get("mtime", ""),
+            content_preview=preview,
+        )
+
+        text = ""
+        try:
+            response = client.messages.create(
+                model=self._model,
+                max_tokens=150,
+                system=SYSTEM_PROMPT.format(environment=self._environment),
+                messages=[{"role": "user", "content": user_msg}],
+            )
+            text = response.content[0].text.strip()
+
+            # Parse JSON from response (handle markdown code blocks)
+            if "```" in text:
+                parts = text.split("```")
+                for part in parts[1:]:
+                    cleaned = part.strip()
+                    if cleaned.startswith("json"):
+                        cleaned = cleaned[4:].strip()
+                    if cleaned.startswith("{"):
+                        text = cleaned
+                        break
+
+            # Find the JSON object in the response
+            start = text.find("{")
+            end = text.rfind("}") + 1
+            if start >= 0 and end > start:
+                text = text[start:end]
+
+            data = json.loads(text)
+            return AIVerdict(
+                verdict=data.get("verdict", "suspicious"),
+                confidence=data.get("confidence", 50),
+                reason=data.get("reason", ""),
+                recommended_action=data.get("recommended_action", "quarantine"),
+                raw_response=text,
+            )
+        except json.JSONDecodeError as exc:
+            logger.warning("AI returned non-JSON: %s — raw: %s", exc, text[:200])
+            return AIVerdict(
+                verdict="suspicious",
+                confidence=confidence,
+                reason=f"AI parse error: {text[:100]}",
+                recommended_action="quarantine",
+                raw_response=text,
+            )
+        except Exception as exc:
+            logger.error("AI analysis failed: %s", exc)
+            return AIVerdict(
+                verdict="suspicious",
+                confidence=confidence,
+                reason=f"AI error: {exc}",
+                recommended_action="quarantine",
+            )
+
+    def analyze_batch(
+        self,
+        detections: List[Dict[str, Any]],
+    ) -> List[Dict[str, Any]]:
+        """Analyze a batch of detections. Returns enriched detections with AI verdicts.
+
+        Each detection dict should have: file_path, threat_name, threat_type, severity, detector
+        """
+        results = []
+        for d in detections:
+            verdict = self.analyze(
+                file_path=d.get("file_path", ""),
+                threat_name=d.get("threat_name", ""),
+                threat_type=d.get("threat_type", ""),
+                severity=d.get("severity", "MEDIUM"),
+                detector=d.get("detector", ""),
+                confidence=d.get("confidence", 50),
+            )
+            enriched = dict(d)
+            enriched["ai_verdict"] = verdict.verdict
+            enriched["ai_confidence"] = verdict.confidence
+            enriched["ai_reason"] = verdict.reason
+            enriched["ai_action"] = verdict.recommended_action
+            results.append(enriched)
+        return results