Files
calvana/ayn-antivirus/ayn_antivirus/detectors/ai_analyzer.py

269 lines
9.4 KiB
Python

"""AYN Antivirus — AI-Powered Threat Analyzer.
Uses Claude to analyze suspicious files and filter false positives.
Each detection from heuristic/signature scanners is verified by AI
before being reported as a real threat.
"""
from __future__ import annotations
import json
import logging
import os
import platform
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
SYSTEM_PROMPT = """Linux VPS antivirus analyst. {environment}
Normal: pip/npm scripts in /usr/local/bin, Docker hex IDs, cron jobs (fstrim/certbot/logrotate), high-entropy archives, curl/wget in deploy scripts, recently-modified files after apt/pip.
Reply ONLY JSON: {{"verdict":"threat"|"safe"|"suspicious","confidence":0-100,"reason":"short","recommended_action":"quarantine"|"delete"|"ignore"|"monitor"}}"""
ANALYSIS_PROMPT = """FILE:{file_path} DETECT:{threat_name}({threat_type}) SEV:{severity} DET:{detector} CONF:{original_confidence}% SIZE:{file_size} PERM:{permissions} OWN:{owner} MOD:{mtime}
PREVIEW:
{content_preview}
JSON verdict:"""
@dataclass
class AIVerdict:
"""Result of AI analysis on a detection."""
verdict: str # threat, safe, suspicious
confidence: int # 0-100
reason: str
recommended_action: str # quarantine, delete, ignore, monitor
raw_response: str = ""
@property
def is_threat(self) -> bool:
return self.verdict == "threat"
@property
def is_safe(self) -> bool:
return self.verdict == "safe"
class AIAnalyzer:
"""AI-powered threat analysis using Claude."""
def __init__(self, api_key: Optional[str] = None, model: str = "claude-sonnet-4-20250514"):
self._api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "") or self._load_key_from_env_file()
self._model = model
self._client = None
self._environment = self._detect_environment()
@staticmethod
def _load_key_from_env_file() -> str:
for p in ["/opt/ayn-antivirus/.env", Path.home() / ".ayn-antivirus" / ".env"]:
try:
for line in Path(p).read_text().splitlines():
line = line.strip()
if line.startswith("ANTHROPIC_API_KEY=") and not line.endswith("="):
return line.split("=", 1)[1].strip().strip("'\"")
except Exception:
pass
return ""
@property
def available(self) -> bool:
return bool(self._api_key)
def _get_client(self):
if not self._client:
try:
import anthropic
self._client = anthropic.Anthropic(api_key=self._api_key)
except Exception as exc:
logger.error("Failed to init Anthropic client: %s", exc)
return None
return self._client
@staticmethod
def _detect_environment() -> str:
"""Gather environment context for the AI."""
import shutil
parts = [
f"OS: {platform.system()} {platform.release()}",
f"Hostname: {platform.node()}",
f"Arch: {platform.machine()}",
]
if shutil.which("incus"):
parts.append("Container runtime: Incus/LXC (containers run Docker inside)")
if shutil.which("docker"):
parts.append("Docker: available")
if Path("/etc/dokploy").exists() or shutil.which("dokploy"):
parts.append("Platform: Dokploy (Docker deployment platform)")
# Check if we're inside a container
if Path("/run/host/container-manager").exists():
parts.append("Running inside: managed container")
return "\n".join(parts)
def _get_file_context(self, file_path: str) -> Dict[str, Any]:
"""Gather file metadata and content preview."""
p = Path(file_path)
ctx = {
"file_size": 0,
"permissions": "",
"owner": "",
"mtime": "",
"content_preview": "[file not readable]",
}
try:
st = p.stat()
ctx["file_size"] = st.st_size
ctx["permissions"] = oct(st.st_mode)[-4:]
ctx["mtime"] = str(st.st_mtime)
try:
import pwd
ctx["owner"] = pwd.getpwuid(st.st_uid).pw_name
except Exception:
ctx["owner"] = str(st.st_uid)
except OSError:
pass
try:
with open(file_path, "rb") as f:
raw = f.read(512)
# Try text decode, fall back to hex
try:
ctx["content_preview"] = raw.decode("utf-8", errors="replace")
except Exception:
ctx["content_preview"] = raw.hex()[:512]
except Exception:
pass
return ctx
def analyze(
self,
file_path: str,
threat_name: str,
threat_type: str,
severity: str,
detector: str,
confidence: int = 50,
) -> AIVerdict:
"""Analyze a single detection with AI."""
if not self.available:
# No API key — pass through as-is
return AIVerdict(
verdict="suspicious",
confidence=confidence,
reason="AI analysis unavailable (no API key)",
recommended_action="quarantine",
)
client = self._get_client()
if not client:
return AIVerdict(
verdict="suspicious",
confidence=confidence,
reason="AI client init failed",
recommended_action="quarantine",
)
ctx = self._get_file_context(file_path)
# Sanitize content preview to avoid format string issues
preview = ctx.get("content_preview", "")
if len(preview) > 500:
preview = preview[:500] + "..."
# Replace curly braces to avoid format() issues
preview = preview.replace("{", "{{").replace("}", "}}")
user_msg = ANALYSIS_PROMPT.format(
file_path=file_path,
threat_name=threat_name,
threat_type=threat_type,
severity=severity,
detector=detector,
original_confidence=confidence,
file_size=ctx.get("file_size", 0),
permissions=ctx.get("permissions", ""),
owner=ctx.get("owner", ""),
mtime=ctx.get("mtime", ""),
content_preview=preview,
)
text = ""
try:
response = client.messages.create(
model=self._model,
max_tokens=150,
system=SYSTEM_PROMPT.format(environment=self._environment),
messages=[{"role": "user", "content": user_msg}],
)
text = response.content[0].text.strip()
# Parse JSON from response (handle markdown code blocks)
if "```" in text:
parts = text.split("```")
for part in parts[1:]:
cleaned = part.strip()
if cleaned.startswith("json"):
cleaned = cleaned[4:].strip()
if cleaned.startswith("{"):
text = cleaned
break
# Find the JSON object in the response
start = text.find("{")
end = text.rfind("}") + 1
if start >= 0 and end > start:
text = text[start:end]
data = json.loads(text)
return AIVerdict(
verdict=data.get("verdict", "suspicious"),
confidence=data.get("confidence", 50),
reason=data.get("reason", ""),
recommended_action=data.get("recommended_action", "quarantine"),
raw_response=text,
)
except json.JSONDecodeError as exc:
logger.warning("AI returned non-JSON: %s — raw: %s", exc, text[:200])
return AIVerdict(
verdict="suspicious",
confidence=confidence,
reason=f"AI parse error: {text[:100]}",
recommended_action="quarantine",
raw_response=text,
)
except Exception as exc:
logger.error("AI analysis failed: %s", exc)
return AIVerdict(
verdict="suspicious",
confidence=confidence,
reason=f"AI error: {exc}",
recommended_action="quarantine",
)
def analyze_batch(
self,
detections: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""Analyze a batch of detections. Returns enriched detections with AI verdicts.
Each detection dict should have: file_path, threat_name, threat_type, severity, detector
"""
results = []
for d in detections:
verdict = self.analyze(
file_path=d.get("file_path", ""),
threat_name=d.get("threat_name", ""),
threat_type=d.get("threat_type", ""),
severity=d.get("severity", "MEDIUM"),
detector=d.get("detector", ""),
confidence=d.get("confidence", 50),
)
enriched = dict(d)
enriched["ai_verdict"] = verdict.verdict
enriched["ai_confidence"] = verdict.confidence
enriched["ai_reason"] = verdict.reason
enriched["ai_action"] = verdict.recommended_action
results.append(enriched)
return results