remove infra.md.example, infra.md is the source of truth
This commit is contained in:
268
ayn-antivirus/ayn_antivirus/detectors/ai_analyzer.py
Normal file
268
ayn-antivirus/ayn_antivirus/detectors/ai_analyzer.py
Normal file
@@ -0,0 +1,268 @@
|
||||
"""AYN Antivirus — AI-Powered Threat Analyzer.
|
||||
|
||||
Uses Claude to analyze suspicious files and filter false positives.
|
||||
Each detection from heuristic/signature scanners is verified by AI
|
||||
before being reported as a real threat.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SYSTEM_PROMPT = """Linux VPS antivirus analyst. {environment}
|
||||
Normal: pip/npm scripts in /usr/local/bin, Docker hex IDs, cron jobs (fstrim/certbot/logrotate), high-entropy archives, curl/wget in deploy scripts, recently-modified files after apt/pip.
|
||||
Reply ONLY JSON: {{"verdict":"threat"|"safe"|"suspicious","confidence":0-100,"reason":"short","recommended_action":"quarantine"|"delete"|"ignore"|"monitor"}}"""
|
||||
|
||||
ANALYSIS_PROMPT = """FILE:{file_path} DETECT:{threat_name}({threat_type}) SEV:{severity} DET:{detector} CONF:{original_confidence}% SIZE:{file_size} PERM:{permissions} OWN:{owner} MOD:{mtime}
|
||||
PREVIEW:
|
||||
{content_preview}
|
||||
JSON verdict:"""
|
||||
|
||||
|
||||
@dataclass
|
||||
class AIVerdict:
|
||||
"""Result of AI analysis on a detection."""
|
||||
verdict: str # threat, safe, suspicious
|
||||
confidence: int # 0-100
|
||||
reason: str
|
||||
recommended_action: str # quarantine, delete, ignore, monitor
|
||||
raw_response: str = ""
|
||||
|
||||
@property
|
||||
def is_threat(self) -> bool:
|
||||
return self.verdict == "threat"
|
||||
|
||||
@property
|
||||
def is_safe(self) -> bool:
|
||||
return self.verdict == "safe"
|
||||
|
||||
|
||||
class AIAnalyzer:
|
||||
"""AI-powered threat analysis using Claude."""
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, model: str = "claude-sonnet-4-20250514"):
|
||||
self._api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "") or self._load_key_from_env_file()
|
||||
self._model = model
|
||||
self._client = None
|
||||
self._environment = self._detect_environment()
|
||||
|
||||
@staticmethod
|
||||
def _load_key_from_env_file() -> str:
|
||||
for p in ["/opt/ayn-antivirus/.env", Path.home() / ".ayn-antivirus" / ".env"]:
|
||||
try:
|
||||
for line in Path(p).read_text().splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("ANTHROPIC_API_KEY=") and not line.endswith("="):
|
||||
return line.split("=", 1)[1].strip().strip("'\"")
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return bool(self._api_key)
|
||||
|
||||
def _get_client(self):
|
||||
if not self._client:
|
||||
try:
|
||||
import anthropic
|
||||
self._client = anthropic.Anthropic(api_key=self._api_key)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to init Anthropic client: %s", exc)
|
||||
return None
|
||||
return self._client
|
||||
|
||||
@staticmethod
|
||||
def _detect_environment() -> str:
|
||||
"""Gather environment context for the AI."""
|
||||
import shutil
|
||||
parts = [
|
||||
f"OS: {platform.system()} {platform.release()}",
|
||||
f"Hostname: {platform.node()}",
|
||||
f"Arch: {platform.machine()}",
|
||||
]
|
||||
if shutil.which("incus"):
|
||||
parts.append("Container runtime: Incus/LXC (containers run Docker inside)")
|
||||
if shutil.which("docker"):
|
||||
parts.append("Docker: available")
|
||||
if Path("/etc/dokploy").exists() or shutil.which("dokploy"):
|
||||
parts.append("Platform: Dokploy (Docker deployment platform)")
|
||||
|
||||
# Check if we're inside a container
|
||||
if Path("/run/host/container-manager").exists():
|
||||
parts.append("Running inside: managed container")
|
||||
return "\n".join(parts)
|
||||
|
||||
def _get_file_context(self, file_path: str) -> Dict[str, Any]:
|
||||
"""Gather file metadata and content preview."""
|
||||
p = Path(file_path)
|
||||
ctx = {
|
||||
"file_size": 0,
|
||||
"permissions": "",
|
||||
"owner": "",
|
||||
"mtime": "",
|
||||
"content_preview": "[file not readable]",
|
||||
}
|
||||
try:
|
||||
st = p.stat()
|
||||
ctx["file_size"] = st.st_size
|
||||
ctx["permissions"] = oct(st.st_mode)[-4:]
|
||||
ctx["mtime"] = str(st.st_mtime)
|
||||
try:
|
||||
import pwd
|
||||
ctx["owner"] = pwd.getpwuid(st.st_uid).pw_name
|
||||
except Exception:
|
||||
ctx["owner"] = str(st.st_uid)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
try:
|
||||
with open(file_path, "rb") as f:
|
||||
raw = f.read(512)
|
||||
# Try text decode, fall back to hex
|
||||
try:
|
||||
ctx["content_preview"] = raw.decode("utf-8", errors="replace")
|
||||
except Exception:
|
||||
ctx["content_preview"] = raw.hex()[:512]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return ctx
|
||||
|
||||
def analyze(
|
||||
self,
|
||||
file_path: str,
|
||||
threat_name: str,
|
||||
threat_type: str,
|
||||
severity: str,
|
||||
detector: str,
|
||||
confidence: int = 50,
|
||||
) -> AIVerdict:
|
||||
"""Analyze a single detection with AI."""
|
||||
if not self.available:
|
||||
# No API key — pass through as-is
|
||||
return AIVerdict(
|
||||
verdict="suspicious",
|
||||
confidence=confidence,
|
||||
reason="AI analysis unavailable (no API key)",
|
||||
recommended_action="quarantine",
|
||||
)
|
||||
|
||||
client = self._get_client()
|
||||
if not client:
|
||||
return AIVerdict(
|
||||
verdict="suspicious",
|
||||
confidence=confidence,
|
||||
reason="AI client init failed",
|
||||
recommended_action="quarantine",
|
||||
)
|
||||
|
||||
ctx = self._get_file_context(file_path)
|
||||
|
||||
# Sanitize content preview to avoid format string issues
|
||||
preview = ctx.get("content_preview", "")
|
||||
if len(preview) > 500:
|
||||
preview = preview[:500] + "..."
|
||||
# Replace curly braces to avoid format() issues
|
||||
preview = preview.replace("{", "{{").replace("}", "}}")
|
||||
|
||||
user_msg = ANALYSIS_PROMPT.format(
|
||||
file_path=file_path,
|
||||
threat_name=threat_name,
|
||||
threat_type=threat_type,
|
||||
severity=severity,
|
||||
detector=detector,
|
||||
original_confidence=confidence,
|
||||
file_size=ctx.get("file_size", 0),
|
||||
permissions=ctx.get("permissions", ""),
|
||||
owner=ctx.get("owner", ""),
|
||||
mtime=ctx.get("mtime", ""),
|
||||
content_preview=preview,
|
||||
)
|
||||
|
||||
text = ""
|
||||
try:
|
||||
response = client.messages.create(
|
||||
model=self._model,
|
||||
max_tokens=150,
|
||||
system=SYSTEM_PROMPT.format(environment=self._environment),
|
||||
messages=[{"role": "user", "content": user_msg}],
|
||||
)
|
||||
text = response.content[0].text.strip()
|
||||
|
||||
# Parse JSON from response (handle markdown code blocks)
|
||||
if "```" in text:
|
||||
parts = text.split("```")
|
||||
for part in parts[1:]:
|
||||
cleaned = part.strip()
|
||||
if cleaned.startswith("json"):
|
||||
cleaned = cleaned[4:].strip()
|
||||
if cleaned.startswith("{"):
|
||||
text = cleaned
|
||||
break
|
||||
|
||||
# Find the JSON object in the response
|
||||
start = text.find("{")
|
||||
end = text.rfind("}") + 1
|
||||
if start >= 0 and end > start:
|
||||
text = text[start:end]
|
||||
|
||||
data = json.loads(text)
|
||||
return AIVerdict(
|
||||
verdict=data.get("verdict", "suspicious"),
|
||||
confidence=data.get("confidence", 50),
|
||||
reason=data.get("reason", ""),
|
||||
recommended_action=data.get("recommended_action", "quarantine"),
|
||||
raw_response=text,
|
||||
)
|
||||
except json.JSONDecodeError as exc:
|
||||
logger.warning("AI returned non-JSON: %s — raw: %s", exc, text[:200])
|
||||
return AIVerdict(
|
||||
verdict="suspicious",
|
||||
confidence=confidence,
|
||||
reason=f"AI parse error: {text[:100]}",
|
||||
recommended_action="quarantine",
|
||||
raw_response=text,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("AI analysis failed: %s", exc)
|
||||
return AIVerdict(
|
||||
verdict="suspicious",
|
||||
confidence=confidence,
|
||||
reason=f"AI error: {exc}",
|
||||
recommended_action="quarantine",
|
||||
)
|
||||
|
||||
def analyze_batch(
|
||||
self,
|
||||
detections: List[Dict[str, Any]],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Analyze a batch of detections. Returns enriched detections with AI verdicts.
|
||||
|
||||
Each detection dict should have: file_path, threat_name, threat_type, severity, detector
|
||||
"""
|
||||
results = []
|
||||
for d in detections:
|
||||
verdict = self.analyze(
|
||||
file_path=d.get("file_path", ""),
|
||||
threat_name=d.get("threat_name", ""),
|
||||
threat_type=d.get("threat_type", ""),
|
||||
severity=d.get("severity", "MEDIUM"),
|
||||
detector=d.get("detector", ""),
|
||||
confidence=d.get("confidence", 50),
|
||||
)
|
||||
enriched = dict(d)
|
||||
enriched["ai_verdict"] = verdict.verdict
|
||||
enriched["ai_confidence"] = verdict.confidence
|
||||
enriched["ai_reason"] = verdict.reason
|
||||
enriched["ai_action"] = verdict.recommended_action
|
||||
results.append(enriched)
|
||||
return results
|
||||
Reference in New Issue
Block a user