remove infra.md.example, infra.md is the source of truth

This commit is contained in:
Azreen Jamal
2026-03-03 03:06:13 +08:00
parent 1ad3033cc1
commit a3c6d09350
86 changed files with 17093 additions and 39 deletions

View File

@@ -0,0 +1,20 @@
"""AYN Antivirus detector modules."""
from ayn_antivirus.detectors.base import BaseDetector, DetectionResult
from ayn_antivirus.detectors.cryptominer_detector import CryptominerDetector
from ayn_antivirus.detectors.heuristic_detector import HeuristicDetector
from ayn_antivirus.detectors.rootkit_detector import RootkitDetector
from ayn_antivirus.detectors.signature_detector import SignatureDetector
from ayn_antivirus.detectors.spyware_detector import SpywareDetector
from ayn_antivirus.detectors.yara_detector import YaraDetector
__all__ = [
"BaseDetector",
"DetectionResult",
"CryptominerDetector",
"HeuristicDetector",
"RootkitDetector",
"SignatureDetector",
"SpywareDetector",
"YaraDetector",
]

View File

@@ -0,0 +1,268 @@
"""AYN Antivirus — AI-Powered Threat Analyzer.
Uses Claude to analyze suspicious files and filter false positives.
Each detection from heuristic/signature scanners is verified by AI
before being reported as a real threat.
"""
from __future__ import annotations
import json
import logging
import os
import platform
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
SYSTEM_PROMPT = """Linux VPS antivirus analyst. {environment}
Normal: pip/npm scripts in /usr/local/bin, Docker hex IDs, cron jobs (fstrim/certbot/logrotate), high-entropy archives, curl/wget in deploy scripts, recently-modified files after apt/pip.
Reply ONLY JSON: {{"verdict":"threat"|"safe"|"suspicious","confidence":0-100,"reason":"short","recommended_action":"quarantine"|"delete"|"ignore"|"monitor"}}"""
ANALYSIS_PROMPT = """FILE:{file_path} DETECT:{threat_name}({threat_type}) SEV:{severity} DET:{detector} CONF:{original_confidence}% SIZE:{file_size} PERM:{permissions} OWN:{owner} MOD:{mtime}
PREVIEW:
{content_preview}
JSON verdict:"""
@dataclass
class AIVerdict:
"""Result of AI analysis on a detection."""
verdict: str # threat, safe, suspicious
confidence: int # 0-100
reason: str
recommended_action: str # quarantine, delete, ignore, monitor
raw_response: str = ""
@property
def is_threat(self) -> bool:
return self.verdict == "threat"
@property
def is_safe(self) -> bool:
return self.verdict == "safe"
class AIAnalyzer:
"""AI-powered threat analysis using Claude."""
def __init__(self, api_key: Optional[str] = None, model: str = "claude-sonnet-4-20250514"):
self._api_key = api_key or os.environ.get("ANTHROPIC_API_KEY", "") or self._load_key_from_env_file()
self._model = model
self._client = None
self._environment = self._detect_environment()
@staticmethod
def _load_key_from_env_file() -> str:
for p in ["/opt/ayn-antivirus/.env", Path.home() / ".ayn-antivirus" / ".env"]:
try:
for line in Path(p).read_text().splitlines():
line = line.strip()
if line.startswith("ANTHROPIC_API_KEY=") and not line.endswith("="):
return line.split("=", 1)[1].strip().strip("'\"")
except Exception:
pass
return ""
@property
def available(self) -> bool:
return bool(self._api_key)
def _get_client(self):
if not self._client:
try:
import anthropic
self._client = anthropic.Anthropic(api_key=self._api_key)
except Exception as exc:
logger.error("Failed to init Anthropic client: %s", exc)
return None
return self._client
@staticmethod
def _detect_environment() -> str:
"""Gather environment context for the AI."""
import shutil
parts = [
f"OS: {platform.system()} {platform.release()}",
f"Hostname: {platform.node()}",
f"Arch: {platform.machine()}",
]
if shutil.which("incus"):
parts.append("Container runtime: Incus/LXC (containers run Docker inside)")
if shutil.which("docker"):
parts.append("Docker: available")
if Path("/etc/dokploy").exists() or shutil.which("dokploy"):
parts.append("Platform: Dokploy (Docker deployment platform)")
# Check if we're inside a container
if Path("/run/host/container-manager").exists():
parts.append("Running inside: managed container")
return "\n".join(parts)
def _get_file_context(self, file_path: str) -> Dict[str, Any]:
"""Gather file metadata and content preview."""
p = Path(file_path)
ctx = {
"file_size": 0,
"permissions": "",
"owner": "",
"mtime": "",
"content_preview": "[file not readable]",
}
try:
st = p.stat()
ctx["file_size"] = st.st_size
ctx["permissions"] = oct(st.st_mode)[-4:]
ctx["mtime"] = str(st.st_mtime)
try:
import pwd
ctx["owner"] = pwd.getpwuid(st.st_uid).pw_name
except Exception:
ctx["owner"] = str(st.st_uid)
except OSError:
pass
try:
with open(file_path, "rb") as f:
raw = f.read(512)
# Try text decode, fall back to hex
try:
ctx["content_preview"] = raw.decode("utf-8", errors="replace")
except Exception:
ctx["content_preview"] = raw.hex()[:512]
except Exception:
pass
return ctx
def analyze(
self,
file_path: str,
threat_name: str,
threat_type: str,
severity: str,
detector: str,
confidence: int = 50,
) -> AIVerdict:
"""Analyze a single detection with AI."""
if not self.available:
# No API key — pass through as-is
return AIVerdict(
verdict="suspicious",
confidence=confidence,
reason="AI analysis unavailable (no API key)",
recommended_action="quarantine",
)
client = self._get_client()
if not client:
return AIVerdict(
verdict="suspicious",
confidence=confidence,
reason="AI client init failed",
recommended_action="quarantine",
)
ctx = self._get_file_context(file_path)
# Sanitize content preview to avoid format string issues
preview = ctx.get("content_preview", "")
if len(preview) > 500:
preview = preview[:500] + "..."
# Replace curly braces to avoid format() issues
preview = preview.replace("{", "{{").replace("}", "}}")
user_msg = ANALYSIS_PROMPT.format(
file_path=file_path,
threat_name=threat_name,
threat_type=threat_type,
severity=severity,
detector=detector,
original_confidence=confidence,
file_size=ctx.get("file_size", 0),
permissions=ctx.get("permissions", ""),
owner=ctx.get("owner", ""),
mtime=ctx.get("mtime", ""),
content_preview=preview,
)
text = ""
try:
response = client.messages.create(
model=self._model,
max_tokens=150,
system=SYSTEM_PROMPT.format(environment=self._environment),
messages=[{"role": "user", "content": user_msg}],
)
text = response.content[0].text.strip()
# Parse JSON from response (handle markdown code blocks)
if "```" in text:
parts = text.split("```")
for part in parts[1:]:
cleaned = part.strip()
if cleaned.startswith("json"):
cleaned = cleaned[4:].strip()
if cleaned.startswith("{"):
text = cleaned
break
# Find the JSON object in the response
start = text.find("{")
end = text.rfind("}") + 1
if start >= 0 and end > start:
text = text[start:end]
data = json.loads(text)
return AIVerdict(
verdict=data.get("verdict", "suspicious"),
confidence=data.get("confidence", 50),
reason=data.get("reason", ""),
recommended_action=data.get("recommended_action", "quarantine"),
raw_response=text,
)
except json.JSONDecodeError as exc:
logger.warning("AI returned non-JSON: %s — raw: %s", exc, text[:200])
return AIVerdict(
verdict="suspicious",
confidence=confidence,
reason=f"AI parse error: {text[:100]}",
recommended_action="quarantine",
raw_response=text,
)
except Exception as exc:
logger.error("AI analysis failed: %s", exc)
return AIVerdict(
verdict="suspicious",
confidence=confidence,
reason=f"AI error: {exc}",
recommended_action="quarantine",
)
def analyze_batch(
self,
detections: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
"""Analyze a batch of detections. Returns enriched detections with AI verdicts.
Each detection dict should have: file_path, threat_name, threat_type, severity, detector
"""
results = []
for d in detections:
verdict = self.analyze(
file_path=d.get("file_path", ""),
threat_name=d.get("threat_name", ""),
threat_type=d.get("threat_type", ""),
severity=d.get("severity", "MEDIUM"),
detector=d.get("detector", ""),
confidence=d.get("confidence", 50),
)
enriched = dict(d)
enriched["ai_verdict"] = verdict.verdict
enriched["ai_confidence"] = verdict.confidence
enriched["ai_reason"] = verdict.reason
enriched["ai_action"] = verdict.recommended_action
results.append(enriched)
return results

View File

@@ -0,0 +1,129 @@
"""Abstract base class and shared data structures for AYN detectors."""
from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Detection result
# ---------------------------------------------------------------------------
@dataclass
class DetectionResult:
"""A single detection produced by a detector.
Attributes
----------
threat_name:
Short identifier for the threat (e.g. ``"Trojan.Miner.XMRig"``).
threat_type:
Category string — ``VIRUS``, ``MALWARE``, ``SPYWARE``, ``MINER``,
``ROOTKIT``, ``HEURISTIC``, etc.
severity:
One of ``CRITICAL``, ``HIGH``, ``MEDIUM``, ``LOW``.
confidence:
How confident the detector is in the finding (0100).
details:
Human-readable explanation.
detector_name:
Which detector produced this result.
"""
threat_name: str
threat_type: str
severity: str
confidence: int
details: str
detector_name: str
# ---------------------------------------------------------------------------
# Abstract base
# ---------------------------------------------------------------------------
class BaseDetector(ABC):
"""Interface that every AYN detector must implement.
Detectors receive a file path (and optionally pre-read content / hash)
and return zero or more :class:`DetectionResult` instances.
"""
# ------------------------------------------------------------------
# Identity
# ------------------------------------------------------------------
@property
@abstractmethod
def name(self) -> str:
"""Machine-friendly detector identifier."""
...
@property
@abstractmethod
def description(self) -> str:
"""One-line human-readable summary."""
...
# ------------------------------------------------------------------
# Detection
# ------------------------------------------------------------------
@abstractmethod
def detect(
self,
file_path: str | Path,
file_content: Optional[bytes] = None,
file_hash: Optional[str] = None,
) -> List[DetectionResult]:
"""Run detection logic against a single file.
Parameters
----------
file_path:
Path to the file on disk.
file_content:
Optional pre-read bytes of the file (avoids double-read).
file_hash:
Optional pre-computed SHA-256 hex digest.
Returns
-------
list[DetectionResult]
Empty list when the file is clean.
"""
...
# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------
def _read_content(
self,
file_path: Path,
file_content: Optional[bytes],
max_bytes: int = 10 * 1024 * 1024,
) -> bytes:
"""Return *file_content* if provided, otherwise read from disk.
Reads at most *max_bytes* to avoid unbounded memory usage.
"""
if file_content is not None:
return file_content
with open(file_path, "rb") as fh:
return fh.read(max_bytes)
def _log(self, msg: str, *args) -> None:
logger.info("[%s] " + msg, self.name, *args)
def _warn(self, msg: str, *args) -> None:
logger.warning("[%s] " + msg, self.name, *args)
def _error(self, msg: str, *args) -> None:
logger.error("[%s] " + msg, self.name, *args)

View File

@@ -0,0 +1,317 @@
"""Crypto-miner detector for AYN Antivirus.
Combines file-content analysis, process inspection, and network connection
checks to detect cryptocurrency mining activity on the host.
"""
from __future__ import annotations
import logging
import re
from pathlib import Path
from typing import List, Optional
import psutil
from ayn_antivirus.constants import (
CRYPTO_MINER_PROCESS_NAMES,
CRYPTO_POOL_DOMAINS,
HIGH_CPU_THRESHOLD,
SUSPICIOUS_PORTS,
)
from ayn_antivirus.detectors.base import BaseDetector, DetectionResult
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# File-content patterns
# ---------------------------------------------------------------------------
_RE_STRATUM = re.compile(rb"stratum\+(?:tcp|ssl|tls)://[^\s\"']+", re.IGNORECASE)
_RE_POOL_DOMAIN = re.compile(
rb"(?:" + b"|".join(re.escape(d.encode()) for d in CRYPTO_POOL_DOMAINS) + rb")",
re.IGNORECASE,
)
_RE_ALGO_REF = re.compile(
rb"\b(?:cryptonight|randomx|ethash|kawpow|equihash|scrypt|sha256d|x11|x13|lyra2rev2|blake2s)\b",
re.IGNORECASE,
)
_RE_MINING_CONFIG = re.compile(
rb"""["'](?:algo|pool|wallet|worker|pass|coin|url|user)["']\s*:\s*["']""",
re.IGNORECASE,
)
# Wallet address patterns (broad but useful).
_RE_BTC_ADDR = re.compile(rb"\b(?:1|3|bc1)[A-HJ-NP-Za-km-z1-9]{25,62}\b")
_RE_ETH_ADDR = re.compile(rb"\b0x[0-9a-fA-F]{40}\b")
_RE_XMR_ADDR = re.compile(rb"\b4[0-9AB][1-9A-HJ-NP-Za-km-z]{93}\b")
class CryptominerDetector(BaseDetector):
"""Detect cryptocurrency mining activity via files, processes, and network."""
# ------------------------------------------------------------------
# BaseDetector interface
# ------------------------------------------------------------------
@property
def name(self) -> str:
return "cryptominer_detector"
@property
def description(self) -> str:
return "Detects crypto-mining binaries, configs, processes, and network traffic"
def detect(
self,
file_path: str | Path,
file_content: Optional[bytes] = None,
file_hash: Optional[str] = None,
) -> List[DetectionResult]:
"""Analyse a file for mining indicators.
Also checks running processes and network connections for live mining
activity (these are host-wide and not specific to *file_path*, but
are included for a comprehensive picture).
"""
file_path = Path(file_path)
results: List[DetectionResult] = []
try:
content = self._read_content(file_path, file_content)
except OSError as exc:
self._warn("Cannot read %s: %s", file_path, exc)
return results
# --- File-content checks ---
results.extend(self._check_stratum_urls(file_path, content))
results.extend(self._check_pool_domains(file_path, content))
results.extend(self._check_algo_references(file_path, content))
results.extend(self._check_mining_config(file_path, content))
results.extend(self._check_wallet_addresses(file_path, content))
return results
# ------------------------------------------------------------------
# File-content checks
# ------------------------------------------------------------------
def _check_stratum_urls(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
matches = _RE_STRATUM.findall(content)
if matches:
urls = [m.decode(errors="replace") for m in matches[:5]]
results.append(DetectionResult(
threat_name="Miner.Stratum.URL",
threat_type="MINER",
severity="CRITICAL",
confidence=95,
details=f"Stratum mining URL(s) found: {', '.join(urls)}",
detector_name=self.name,
))
return results
def _check_pool_domains(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
matches = _RE_POOL_DOMAIN.findall(content)
if matches:
domains = sorted(set(m.decode(errors="replace") for m in matches))
results.append(DetectionResult(
threat_name="Miner.PoolDomain",
threat_type="MINER",
severity="HIGH",
confidence=90,
details=f"Mining pool domain(s) referenced: {', '.join(domains[:5])}",
detector_name=self.name,
))
return results
def _check_algo_references(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
matches = _RE_ALGO_REF.findall(content)
if matches:
algos = sorted(set(m.decode(errors="replace").lower() for m in matches))
results.append(DetectionResult(
threat_name="Miner.AlgorithmReference",
threat_type="MINER",
severity="MEDIUM",
confidence=60,
details=f"Mining algorithm reference(s): {', '.join(algos)}",
detector_name=self.name,
))
return results
def _check_mining_config(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
matches = _RE_MINING_CONFIG.findall(content)
if len(matches) >= 2:
results.append(DetectionResult(
threat_name="Miner.ConfigFile",
threat_type="MINER",
severity="HIGH",
confidence=85,
details=(
f"File resembles a mining configuration "
f"({len(matches)} config key(s) detected)"
),
detector_name=self.name,
))
return results
def _check_wallet_addresses(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
wallets: List[str] = []
for label, regex in [
("BTC", _RE_BTC_ADDR),
("ETH", _RE_ETH_ADDR),
("XMR", _RE_XMR_ADDR),
]:
matches = regex.findall(content)
for m in matches[:3]:
wallets.append(f"{label}:{m.decode(errors='replace')[:20]}")
if wallets:
results.append(DetectionResult(
threat_name="Miner.WalletAddress",
threat_type="MINER",
severity="HIGH",
confidence=70,
details=f"Cryptocurrency wallet address(es): {', '.join(wallets[:5])}",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Process-based detection (host-wide, not file-specific)
# ------------------------------------------------------------------
@staticmethod
def find_miner_processes() -> List[DetectionResult]:
"""Scan running processes for known miner names.
This is a host-wide check and should be called independently from
the per-file ``detect()`` method.
"""
results: List[DetectionResult] = []
for proc in psutil.process_iter(["pid", "name", "cmdline", "cpu_percent"]):
try:
info = proc.info
pname = (info.get("name") or "").lower()
cmdline = " ".join(info.get("cmdline") or []).lower()
for miner in CRYPTO_MINER_PROCESS_NAMES:
if miner in pname or miner in cmdline:
results.append(DetectionResult(
threat_name=f"Miner.Process.{miner}",
threat_type="MINER",
severity="CRITICAL",
confidence=95,
details=(
f"Known miner process running: {info.get('name')} "
f"(PID {info['pid']}, CPU {info.get('cpu_percent', 0):.1f}%)"
),
detector_name="cryptominer_detector",
))
break
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
continue
return results
# ------------------------------------------------------------------
# CPU analysis (host-wide)
# ------------------------------------------------------------------
@staticmethod
def find_high_cpu_processes(
threshold: float = HIGH_CPU_THRESHOLD,
) -> List[DetectionResult]:
"""Flag processes consuming CPU above *threshold* percent."""
results: List[DetectionResult] = []
for proc in psutil.process_iter(["pid", "name", "cpu_percent"]):
try:
info = proc.info
cpu = info.get("cpu_percent") or 0.0
if cpu > threshold:
results.append(DetectionResult(
threat_name="Miner.HighCPU",
threat_type="MINER",
severity="HIGH",
confidence=55,
details=(
f"Process {info.get('name')} (PID {info['pid']}) "
f"using {cpu:.1f}% CPU"
),
detector_name="cryptominer_detector",
))
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
continue
return results
# ------------------------------------------------------------------
# Network detection (host-wide)
# ------------------------------------------------------------------
@staticmethod
def find_mining_connections() -> List[DetectionResult]:
"""Check active network connections for mining pool traffic."""
results: List[DetectionResult] = []
try:
connections = psutil.net_connections(kind="inet")
except psutil.AccessDenied:
logger.warning("Insufficient permissions to read network connections")
return results
for conn in connections:
raddr = conn.raddr
if not raddr:
continue
remote_ip = raddr.ip
remote_port = raddr.port
proc_name = ""
if conn.pid:
try:
proc_name = psutil.Process(conn.pid).name()
except (psutil.NoSuchProcess, psutil.AccessDenied):
proc_name = "?"
if remote_port in SUSPICIOUS_PORTS:
results.append(DetectionResult(
threat_name="Miner.Network.SuspiciousPort",
threat_type="MINER",
severity="HIGH",
confidence=75,
details=(
f"Connection to port {remote_port} "
f"({remote_ip}, process={proc_name}, PID={conn.pid})"
),
detector_name="cryptominer_detector",
))
for domain in CRYPTO_POOL_DOMAINS:
if domain in remote_ip:
results.append(DetectionResult(
threat_name="Miner.Network.PoolConnection",
threat_type="MINER",
severity="CRITICAL",
confidence=95,
details=(
f"Active connection to mining pool {domain} "
f"({remote_ip}:{remote_port}, process={proc_name})"
),
detector_name="cryptominer_detector",
))
break
return results

View File

@@ -0,0 +1,436 @@
"""Heuristic detector for AYN Antivirus.
Uses statistical and pattern-based analysis to flag files that *look*
malicious even when no signature or YARA rule matches. Checks include
Shannon entropy (packed/encrypted binaries), suspicious string patterns,
obfuscation indicators, ELF anomalies, and permission/location red flags.
"""
from __future__ import annotations
import logging
import math
import re
import stat
from collections import Counter
from datetime import datetime, timedelta
from pathlib import Path
from typing import List, Optional
from ayn_antivirus.constants import SUSPICIOUS_EXTENSIONS
from ayn_antivirus.detectors.base import BaseDetector, DetectionResult
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Thresholds
# ---------------------------------------------------------------------------
_HIGH_ENTROPY_THRESHOLD = 7.5 # bits per byte — likely packed / encrypted
_CHR_CHAIN_MIN = 6 # minimum chr()/\xNN sequence length
_B64_MIN_LENGTH = 40 # minimum base64 blob considered suspicious
# ---------------------------------------------------------------------------
# Compiled regexes (built once at import time)
# ---------------------------------------------------------------------------
_RE_BASE64_BLOB = re.compile(
rb"(?:(?:[A-Za-z0-9+/]{4}){10,})(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"
)
_RE_EVAL_EXEC = re.compile(rb"\b(?:eval|exec|compile)\s*\(", re.IGNORECASE)
_RE_SYSTEM_CALL = re.compile(
rb"\b(?:os\.system|subprocess\.(?:call|run|Popen)|commands\.getoutput)\s*\(",
re.IGNORECASE,
)
_RE_REVERSE_SHELL = re.compile(
rb"(?:/dev/tcp/|bash\s+-i\s+>&|nc\s+-[elp]|ncat\s+-|socat\s+|python[23]?\s+-c\s+['\"]import\s+socket)",
re.IGNORECASE,
)
_RE_WGET_CURL_PIPE = re.compile(
rb"(?:wget|curl)\s+[^\n]*\|\s*(?:sh|bash|python|perl)", re.IGNORECASE
)
_RE_ENCODED_PS = re.compile(
rb"-(?:enc(?:odedcommand)?|e|ec)\s+[A-Za-z0-9+/=]{20,}", re.IGNORECASE
)
_RE_CHR_CHAIN = re.compile(
rb"(?:chr\s*\(\s*\d+\s*\)\s*[\.\+]\s*){" + str(_CHR_CHAIN_MIN).encode() + rb",}",
re.IGNORECASE,
)
_RE_HEX_STRING = re.compile(
rb"(?:\\x[0-9a-fA-F]{2}){8,}"
)
_RE_STRING_CONCAT = re.compile(
rb"""(?:["'][^"']{1,4}["']\s*[\+\.]\s*){6,}""",
)
# UPX magic at the beginning of packed sections.
_UPX_MAGIC = b"UPX!"
# System directories where world-writable or SUID files are suspicious.
_SYSTEM_DIRS = {"/usr/bin", "/usr/sbin", "/bin", "/sbin", "/usr/local/bin", "/usr/local/sbin"}
# Locations where hidden files are suspicious.
_SUSPICIOUS_HIDDEN_DIRS = {"/tmp", "/var/tmp", "/dev/shm", "/var/www", "/srv"}
class HeuristicDetector(BaseDetector):
"""Flag files that exhibit suspicious characteristics without a known signature."""
# ------------------------------------------------------------------
# BaseDetector interface
# ------------------------------------------------------------------
@property
def name(self) -> str:
return "heuristic_detector"
@property
def description(self) -> str:
return "Statistical and pattern-based heuristic analysis"
def detect(
self,
file_path: str | Path,
file_content: Optional[bytes] = None,
file_hash: Optional[str] = None,
) -> List[DetectionResult]:
file_path = Path(file_path)
results: List[DetectionResult] = []
try:
content = self._read_content(file_path, file_content)
except OSError as exc:
self._warn("Cannot read %s: %s", file_path, exc)
return results
# --- Entropy analysis ---
results.extend(self._check_entropy(file_path, content))
# --- Suspicious string patterns ---
results.extend(self._check_suspicious_strings(file_path, content))
# --- Obfuscation indicators ---
results.extend(self._check_obfuscation(file_path, content))
# --- ELF anomalies ---
results.extend(self._check_elf_anomalies(file_path, content))
# --- Permission / location anomalies ---
results.extend(self._check_permission_anomalies(file_path))
# --- Hidden files in suspicious locations ---
results.extend(self._check_hidden_files(file_path))
# --- Recently modified system files ---
results.extend(self._check_recent_system_modification(file_path))
return results
# ------------------------------------------------------------------
# Entropy
# ------------------------------------------------------------------
@staticmethod
def calculate_entropy(data: bytes) -> float:
"""Calculate Shannon entropy (bits per byte) of *data*.
Returns a value between 0.0 (uniform) and 8.0 (maximum randomness).
"""
if not data:
return 0.0
length = len(data)
freq = Counter(data)
entropy = 0.0
for count in freq.values():
p = count / length
if p > 0:
entropy -= p * math.log2(p)
return entropy
def _check_entropy(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
if len(content) < 256:
return results # too short for meaningful entropy
entropy = self.calculate_entropy(content)
if entropy > _HIGH_ENTROPY_THRESHOLD:
results.append(DetectionResult(
threat_name="Heuristic.Packed.HighEntropy",
threat_type="MALWARE",
severity="MEDIUM",
confidence=65,
details=(
f"File entropy {entropy:.2f} bits/byte exceeds threshold "
f"({_HIGH_ENTROPY_THRESHOLD}) — likely packed or encrypted"
),
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Suspicious strings
# ------------------------------------------------------------------
def _check_suspicious_strings(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
# Base64-encoded payloads.
b64_blobs = _RE_BASE64_BLOB.findall(content)
long_blobs = [b for b in b64_blobs if len(b) >= _B64_MIN_LENGTH]
if long_blobs:
results.append(DetectionResult(
threat_name="Heuristic.Obfuscation.Base64Payload",
threat_type="MALWARE",
severity="MEDIUM",
confidence=55,
details=f"Found {len(long_blobs)} large base64-encoded blob(s)",
detector_name=self.name,
))
# eval / exec / compile calls.
if _RE_EVAL_EXEC.search(content):
results.append(DetectionResult(
threat_name="Heuristic.Suspicious.DynamicExecution",
threat_type="MALWARE",
severity="MEDIUM",
confidence=50,
details="File uses eval()/exec()/compile() — possible code injection",
detector_name=self.name,
))
# os.system / subprocess calls.
if _RE_SYSTEM_CALL.search(content):
results.append(DetectionResult(
threat_name="Heuristic.Suspicious.SystemCall",
threat_type="MALWARE",
severity="MEDIUM",
confidence=45,
details="File invokes system commands via os.system/subprocess",
detector_name=self.name,
))
# Reverse shell patterns.
match = _RE_REVERSE_SHELL.search(content)
if match:
results.append(DetectionResult(
threat_name="Heuristic.ReverseShell",
threat_type="MALWARE",
severity="CRITICAL",
confidence=85,
details=f"Reverse shell pattern detected: {match.group()[:80]!r}",
detector_name=self.name,
))
# wget/curl piped to sh/bash.
if _RE_WGET_CURL_PIPE.search(content):
results.append(DetectionResult(
threat_name="Heuristic.Dropper.PipeToShell",
threat_type="MALWARE",
severity="HIGH",
confidence=80,
details="File downloads and pipes directly to a shell interpreter",
detector_name=self.name,
))
# Encoded PowerShell command.
if _RE_ENCODED_PS.search(content):
results.append(DetectionResult(
threat_name="Heuristic.PowerShell.EncodedCommand",
threat_type="MALWARE",
severity="HIGH",
confidence=75,
details="Encoded PowerShell command detected",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Obfuscation
# ------------------------------------------------------------------
def _check_obfuscation(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
# chr() chains.
if _RE_CHR_CHAIN.search(content):
results.append(DetectionResult(
threat_name="Heuristic.Obfuscation.ChrChain",
threat_type="MALWARE",
severity="MEDIUM",
confidence=60,
details="Obfuscation via long chr() concatenation chain",
detector_name=self.name,
))
# Hex-encoded byte strings.
hex_matches = _RE_HEX_STRING.findall(content)
if len(hex_matches) > 3:
results.append(DetectionResult(
threat_name="Heuristic.Obfuscation.HexStrings",
threat_type="MALWARE",
severity="MEDIUM",
confidence=55,
details=f"Multiple hex-encoded strings detected ({len(hex_matches)} occurrences)",
detector_name=self.name,
))
# Excessive string concatenation.
if _RE_STRING_CONCAT.search(content):
results.append(DetectionResult(
threat_name="Heuristic.Obfuscation.StringConcat",
threat_type="MALWARE",
severity="LOW",
confidence=40,
details="Excessive short-string concatenation — possible obfuscation",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# ELF anomalies
# ------------------------------------------------------------------
def _check_elf_anomalies(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
if not content[:4] == b"\x7fELF":
return results
# UPX packed.
if _UPX_MAGIC in content[:4096]:
results.append(DetectionResult(
threat_name="Heuristic.Packed.UPX",
threat_type="MALWARE",
severity="MEDIUM",
confidence=60,
details="ELF binary is UPX-packed",
detector_name=self.name,
))
# Stripped binary in unusual location.
path_str = str(file_path)
is_in_system = any(path_str.startswith(d) for d in _SYSTEM_DIRS)
if not is_in_system:
# Non-system ELF — more suspicious if stripped (no .symtab).
if b".symtab" not in content and b".debug" not in content:
results.append(DetectionResult(
threat_name="Heuristic.ELF.StrippedNonSystem",
threat_type="MALWARE",
severity="LOW",
confidence=35,
details="Stripped ELF binary found outside standard system directories",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Permission anomalies
# ------------------------------------------------------------------
def _check_permission_anomalies(
self, file_path: Path
) -> List[DetectionResult]:
results: List[DetectionResult] = []
try:
st = file_path.stat()
except OSError:
return results
mode = st.st_mode
path_str = str(file_path)
# World-writable file in a system directory.
is_in_system = any(path_str.startswith(d) for d in _SYSTEM_DIRS)
if is_in_system and (mode & stat.S_IWOTH):
results.append(DetectionResult(
threat_name="Heuristic.Permissions.WorldWritableSystem",
threat_type="MALWARE",
severity="HIGH",
confidence=70,
details=f"World-writable file in system directory: {file_path}",
detector_name=self.name,
))
# SUID/SGID on unusual files.
is_suid = bool(mode & stat.S_ISUID)
is_sgid = bool(mode & stat.S_ISGID)
if (is_suid or is_sgid) and not is_in_system:
flag = "SUID" if is_suid else "SGID"
results.append(DetectionResult(
threat_name=f"Heuristic.Permissions.{flag}NonSystem",
threat_type="MALWARE",
severity="HIGH",
confidence=75,
details=f"{flag} bit set on file outside system directories: {file_path}",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Hidden files in suspicious locations
# ------------------------------------------------------------------
def _check_hidden_files(
self, file_path: Path
) -> List[DetectionResult]:
results: List[DetectionResult] = []
if not file_path.name.startswith("."):
return results
path_str = str(file_path)
for sus_dir in _SUSPICIOUS_HIDDEN_DIRS:
if path_str.startswith(sus_dir):
results.append(DetectionResult(
threat_name="Heuristic.HiddenFile.SuspiciousLocation",
threat_type="MALWARE",
severity="MEDIUM",
confidence=50,
details=f"Hidden file in suspicious directory: {file_path}",
detector_name=self.name,
))
break
return results
# ------------------------------------------------------------------
# Recently modified system files
# ------------------------------------------------------------------
def _check_recent_system_modification(
self, file_path: Path
) -> List[DetectionResult]:
results: List[DetectionResult] = []
path_str = str(file_path)
is_in_system = any(path_str.startswith(d) for d in _SYSTEM_DIRS)
if not is_in_system:
return results
try:
mtime = datetime.utcfromtimestamp(file_path.stat().st_mtime)
except OSError:
return results
if datetime.utcnow() - mtime < timedelta(hours=24):
results.append(DetectionResult(
threat_name="Heuristic.SystemFile.RecentlyModified",
threat_type="MALWARE",
severity="MEDIUM",
confidence=45,
details=(
f"System file modified within the last 24 hours: "
f"{file_path} (mtime: {mtime.isoformat()})"
),
detector_name=self.name,
))
return results

View File

@@ -0,0 +1,387 @@
"""Rootkit detector for AYN Antivirus.
Performs system-wide checks for indicators of rootkit compromise: known
rootkit files, modified system binaries, hidden processes, hidden kernel
modules, LD_PRELOAD hijacking, hidden network ports, and tampered logs.
Many checks require **root** privileges. On non-Linux systems, kernel-
module and /proc-based checks are gracefully skipped.
"""
from __future__ import annotations
import logging
import os
import subprocess
from pathlib import Path
from typing import List, Optional, Set
import psutil
from ayn_antivirus.constants import (
KNOWN_ROOTKIT_FILES,
MALICIOUS_ENV_VARS,
)
from ayn_antivirus.detectors.base import BaseDetector, DetectionResult
logger = logging.getLogger(__name__)
class RootkitDetector(BaseDetector):
"""System-wide rootkit detection.
Unlike other detectors, the *file_path* argument is optional. When
called without a path (or with ``file_path=None``) the detector runs
every host-level check. When given a file it limits itself to checks
relevant to that file.
"""
# ------------------------------------------------------------------
# BaseDetector interface
# ------------------------------------------------------------------
@property
def name(self) -> str:
return "rootkit_detector"
@property
def description(self) -> str:
return "Detects rootkits via file, process, module, and environment analysis"
def detect(
self,
file_path: str | Path | None = None,
file_content: Optional[bytes] = None,
file_hash: Optional[str] = None,
) -> List[DetectionResult]:
"""Run rootkit checks.
If *file_path* is ``None``, all system-wide checks are executed.
Otherwise only file-specific checks run.
"""
results: List[DetectionResult] = []
if file_path is not None:
fp = Path(file_path)
# File-specific: is this a known rootkit artefact?
results.extend(self._check_known_rootkit_file(fp))
return results
# --- Full system-wide scan ---
results.extend(self._check_known_rootkit_files())
results.extend(self._check_ld_preload())
results.extend(self._check_ld_so_preload())
results.extend(self._check_hidden_processes())
results.extend(self._check_hidden_kernel_modules())
results.extend(self._check_hidden_network_ports())
results.extend(self._check_malicious_env_vars())
results.extend(self._check_tampered_logs())
return results
# ------------------------------------------------------------------
# Known rootkit files
# ------------------------------------------------------------------
def _check_known_rootkit_files(self) -> List[DetectionResult]:
"""Check every path in :pydata:`KNOWN_ROOTKIT_FILES`."""
results: List[DetectionResult] = []
for path_str in KNOWN_ROOTKIT_FILES:
p = Path(path_str)
if p.exists():
results.append(DetectionResult(
threat_name="Rootkit.KnownFile",
threat_type="ROOTKIT",
severity="CRITICAL",
confidence=90,
details=f"Known rootkit artefact present: {path_str}",
detector_name=self.name,
))
return results
def _check_known_rootkit_file(self, file_path: Path) -> List[DetectionResult]:
"""Check whether *file_path* is a known rootkit file."""
results: List[DetectionResult] = []
path_str = str(file_path)
if path_str in KNOWN_ROOTKIT_FILES:
results.append(DetectionResult(
threat_name="Rootkit.KnownFile",
threat_type="ROOTKIT",
severity="CRITICAL",
confidence=90,
details=f"Known rootkit artefact: {path_str}",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# LD_PRELOAD / ld.so.preload
# ------------------------------------------------------------------
def _check_ld_preload(self) -> List[DetectionResult]:
"""Flag the ``LD_PRELOAD`` environment variable if set globally."""
results: List[DetectionResult] = []
val = os.environ.get("LD_PRELOAD", "")
if val:
results.append(DetectionResult(
threat_name="Rootkit.LDPreload.EnvVar",
threat_type="ROOTKIT",
severity="CRITICAL",
confidence=85,
details=f"LD_PRELOAD is set: {val}",
detector_name=self.name,
))
return results
def _check_ld_so_preload(self) -> List[DetectionResult]:
"""Check ``/etc/ld.so.preload`` for suspicious entries."""
results: List[DetectionResult] = []
ld_preload_file = Path("/etc/ld.so.preload")
if not ld_preload_file.exists():
return results
try:
content = ld_preload_file.read_text().strip()
except PermissionError:
self._warn("Cannot read /etc/ld.so.preload")
return results
if content:
lines = [l.strip() for l in content.splitlines() if l.strip() and not l.startswith("#")]
if lines:
results.append(DetectionResult(
threat_name="Rootkit.LDPreload.File",
threat_type="ROOTKIT",
severity="CRITICAL",
confidence=85,
details=f"/etc/ld.so.preload contains entries: {', '.join(lines[:5])}",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Hidden processes
# ------------------------------------------------------------------
def _check_hidden_processes(self) -> List[DetectionResult]:
"""Compare /proc PIDs with psutil to find hidden processes."""
results: List[DetectionResult] = []
proc_dir = Path("/proc")
if not proc_dir.is_dir():
return results # non-Linux
proc_pids: Set[int] = set()
try:
for entry in proc_dir.iterdir():
if entry.name.isdigit():
proc_pids.add(int(entry.name))
except PermissionError:
return results
psutil_pids = set(psutil.pids())
hidden = proc_pids - psutil_pids
for pid in hidden:
name = ""
try:
comm = proc_dir / str(pid) / "comm"
if comm.exists():
name = comm.read_text().strip()
except OSError:
pass
results.append(DetectionResult(
threat_name="Rootkit.HiddenProcess",
threat_type="ROOTKIT",
severity="CRITICAL",
confidence=85,
details=f"PID {pid} ({name or 'unknown'}) visible in /proc but hidden from psutil",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Hidden kernel modules
# ------------------------------------------------------------------
def _check_hidden_kernel_modules(self) -> List[DetectionResult]:
"""Compare ``lsmod`` output with ``/proc/modules`` to find discrepancies."""
results: List[DetectionResult] = []
proc_modules_path = Path("/proc/modules")
if not proc_modules_path.exists():
return results # non-Linux
# Modules from /proc/modules.
try:
proc_content = proc_modules_path.read_text()
except PermissionError:
return results
proc_mods: Set[str] = set()
for line in proc_content.splitlines():
parts = line.split()
if parts:
proc_mods.add(parts[0])
# Modules from lsmod.
lsmod_mods: Set[str] = set()
try:
output = subprocess.check_output(["lsmod"], stderr=subprocess.DEVNULL, timeout=10)
for line in output.decode(errors="replace").splitlines()[1:]:
parts = line.split()
if parts:
lsmod_mods.add(parts[0])
except (FileNotFoundError, subprocess.SubprocessError, OSError):
return results # lsmod not available
# Modules in /proc but NOT in lsmod → hidden from userspace.
hidden = proc_mods - lsmod_mods
for mod in hidden:
results.append(DetectionResult(
threat_name="Rootkit.HiddenKernelModule",
threat_type="ROOTKIT",
severity="CRITICAL",
confidence=80,
details=f"Kernel module '{mod}' in /proc/modules but hidden from lsmod",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Hidden network ports
# ------------------------------------------------------------------
def _check_hidden_network_ports(self) -> List[DetectionResult]:
"""Compare ``ss``/``netstat`` listening ports with psutil."""
results: List[DetectionResult] = []
# Ports from psutil.
psutil_ports: Set[int] = set()
try:
for conn in psutil.net_connections(kind="inet"):
if conn.status == "LISTEN" and conn.laddr:
psutil_ports.add(conn.laddr.port)
except psutil.AccessDenied:
return results
# Ports from ss.
ss_ports: Set[int] = set()
try:
output = subprocess.check_output(
["ss", "-tlnH"], stderr=subprocess.DEVNULL, timeout=10
)
for line in output.decode(errors="replace").splitlines():
# Typical ss output: LISTEN 0 128 0.0.0.0:22 ...
parts = line.split()
for part in parts:
if ":" in part:
try:
port = int(part.rsplit(":", 1)[1])
ss_ports.add(port)
except (ValueError, IndexError):
continue
except (FileNotFoundError, subprocess.SubprocessError, OSError):
return results # ss not available
# Ports in ss but not in psutil → potentially hidden by a rootkit.
hidden = ss_ports - psutil_ports
for port in hidden:
results.append(DetectionResult(
threat_name="Rootkit.HiddenPort",
threat_type="ROOTKIT",
severity="HIGH",
confidence=70,
details=f"Listening port {port} visible to ss but hidden from psutil",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Malicious environment variables
# ------------------------------------------------------------------
def _check_malicious_env_vars(self) -> List[DetectionResult]:
"""Check the current environment for known-risky variables."""
results: List[DetectionResult] = []
for entry in MALICIOUS_ENV_VARS:
if "=" in entry:
# Exact key=value match (e.g. "HISTFILE=/dev/null").
key, val = entry.split("=", 1)
if os.environ.get(key) == val:
results.append(DetectionResult(
threat_name="Rootkit.EnvVar.Suspicious",
threat_type="ROOTKIT",
severity="HIGH",
confidence=75,
details=f"Suspicious environment variable: {key}={val}",
detector_name=self.name,
))
else:
# Key presence check (e.g. "LD_PRELOAD").
if entry in os.environ:
results.append(DetectionResult(
threat_name="Rootkit.EnvVar.Suspicious",
threat_type="ROOTKIT",
severity="HIGH",
confidence=65,
details=f"Suspicious environment variable set: {entry}={os.environ[entry][:100]}",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Tampered log files
# ------------------------------------------------------------------
_LOG_PATHS = [
"/var/log/auth.log",
"/var/log/syslog",
"/var/log/messages",
"/var/log/secure",
"/var/log/wtmp",
"/var/log/btmp",
"/var/log/lastlog",
]
def _check_tampered_logs(self) -> List[DetectionResult]:
"""Look for signs of log tampering: zero-byte logs, missing logs,
or logs whose mtime is suspiciously older than expected.
"""
results: List[DetectionResult] = []
for log_path_str in self._LOG_PATHS:
log_path = Path(log_path_str)
if not log_path.exists():
# Missing critical log.
if log_path_str in ("/var/log/auth.log", "/var/log/syslog", "/var/log/wtmp"):
results.append(DetectionResult(
threat_name="Rootkit.Log.Missing",
threat_type="ROOTKIT",
severity="HIGH",
confidence=60,
details=f"Critical log file missing: {log_path_str}",
detector_name=self.name,
))
continue
try:
st = log_path.stat()
except OSError:
continue
# Zero-byte log file (may have been truncated).
if st.st_size == 0:
results.append(DetectionResult(
threat_name="Rootkit.Log.Truncated",
threat_type="ROOTKIT",
severity="HIGH",
confidence=70,
details=f"Log file is empty (possibly truncated): {log_path_str}",
detector_name=self.name,
))
return results

View File

@@ -0,0 +1,192 @@
"""AYN Antivirus — Signature-based Detector.
Looks up file hashes against the threat signature database populated by
the feed update pipeline (MalwareBazaar, ThreatFox, etc.). Uses
:class:`~ayn_antivirus.signatures.db.hash_db.HashDatabase` so that
definitions written by ``ayn-antivirus update`` are immediately available
for detection.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Dict, List, Optional
from ayn_antivirus.constants import DEFAULT_DB_PATH
from ayn_antivirus.detectors.base import BaseDetector, DetectionResult
from ayn_antivirus.utils.helpers import hash_file as _hash_file_util
logger = logging.getLogger("ayn_antivirus.detectors.signature")
_VALID_SEVERITIES = {"CRITICAL", "HIGH", "MEDIUM", "LOW"}
class SignatureDetector(BaseDetector):
"""Detect known malware by matching file hashes against the signature DB.
Parameters
----------
db_path:
Path to the shared SQLite database that holds the ``threats``,
``ioc_ips``, ``ioc_domains``, and ``ioc_urls`` tables.
"""
def __init__(self, db_path: str | Path = DEFAULT_DB_PATH) -> None:
self.db_path = str(db_path)
self._hash_db = None
self._ioc_db = None
self._loaded = False
# ------------------------------------------------------------------
# BaseDetector interface
# ------------------------------------------------------------------
@property
def name(self) -> str:
return "signature_detector"
@property
def description(self) -> str:
return "Hash-based signature detection using threat intelligence feeds"
def detect(
self,
file_path: str | Path,
file_content: Optional[bytes] = None,
file_hash: Optional[str] = None,
) -> List[DetectionResult]:
"""Check the file's hash against the ``threats`` table.
If *file_hash* is not supplied it is computed on the fly.
"""
self._ensure_loaded()
results: List[DetectionResult] = []
if not self._hash_db:
return results
# Compute hash if not provided.
if not file_hash:
try:
file_hash = _hash_file_util(str(file_path), algo="sha256")
except Exception:
return results
# Also compute MD5 for VirusShare lookups.
md5_hash = None
try:
md5_hash = _hash_file_util(str(file_path), algo="md5")
except Exception:
pass
# Look up SHA256 first, then MD5.
threat = self._hash_db.lookup(file_hash)
if not threat and md5_hash:
threat = self._hash_db.lookup(md5_hash)
if threat:
severity = (threat.get("severity") or "HIGH").upper()
if severity not in _VALID_SEVERITIES:
severity = "HIGH"
results.append(DetectionResult(
threat_name=threat.get("threat_name", "Malware.Known"),
threat_type=threat.get("threat_type", "MALWARE"),
severity=severity,
confidence=100,
details=(
f"Known threat signature match "
f"(source: {threat.get('source', 'unknown')}). "
f"Hash: {file_hash[:16]}... "
f"Details: {threat.get('details', '')}"
),
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# IOC lookup helpers (used by engine for network enrichment)
# ------------------------------------------------------------------
def lookup_hash(self, file_hash: str) -> Optional[Dict]:
"""Look up a single hash. Returns threat info dict or ``None``."""
self._ensure_loaded()
if not self._hash_db:
return None
return self._hash_db.lookup(file_hash)
def lookup_ip(self, ip: str) -> Optional[Dict]:
"""Look up an IP against the IOC database."""
self._ensure_loaded()
if not self._ioc_db:
return None
return self._ioc_db.lookup_ip(ip)
def lookup_domain(self, domain: str) -> Optional[Dict]:
"""Look up a domain against the IOC database."""
self._ensure_loaded()
if not self._ioc_db:
return None
return self._ioc_db.lookup_domain(domain)
# ------------------------------------------------------------------
# Statistics
# ------------------------------------------------------------------
def get_stats(self) -> Dict:
"""Return signature / IOC database statistics."""
self._ensure_loaded()
stats: Dict = {"hash_count": 0, "loaded": self._loaded}
if self._hash_db:
stats["hash_count"] = self._hash_db.count()
stats.update(self._hash_db.get_stats())
if self._ioc_db:
stats["ioc_ips"] = len(self._ioc_db.get_all_malicious_ips())
stats["ioc_domains"] = len(self._ioc_db.get_all_malicious_domains())
return stats
@property
def signature_count(self) -> int:
"""Number of hash signatures currently loaded."""
self._ensure_loaded()
return self._hash_db.count() if self._hash_db else 0
# ------------------------------------------------------------------
# Lifecycle
# ------------------------------------------------------------------
def close(self) -> None:
"""Close database connections."""
if self._hash_db:
self._hash_db.close()
self._hash_db = None
if self._ioc_db:
self._ioc_db.close()
self._ioc_db = None
self._loaded = False
# ------------------------------------------------------------------
# Internal
# ------------------------------------------------------------------
def _ensure_loaded(self) -> None:
"""Lazy-load the database connections on first use."""
if self._loaded:
return
if not self.db_path:
logger.warning("No signature DB path configured")
self._loaded = True
return
try:
from ayn_antivirus.signatures.db.hash_db import HashDatabase
from ayn_antivirus.signatures.db.ioc_db import IOCDatabase
self._hash_db = HashDatabase(self.db_path)
self._hash_db.initialize()
self._ioc_db = IOCDatabase(self.db_path)
self._ioc_db.initialize()
count = self._hash_db.count()
logger.info("Signature DB loaded: %d hash signatures", count)
except Exception as exc:
logger.error("Failed to load signature DB: %s", exc)
self._loaded = True

View File

@@ -0,0 +1,366 @@
"""Spyware detector for AYN Antivirus.
Scans files and system state for indicators of spyware: keyloggers, screen
capture utilities, data exfiltration patterns, reverse shells, unauthorized
SSH keys, and suspicious shell-profile modifications.
"""
from __future__ import annotations
import re
from pathlib import Path
from typing import List, Optional
from ayn_antivirus.constants import SUSPICIOUS_CRON_PATTERNS
from ayn_antivirus.detectors.base import BaseDetector, DetectionResult
# ---------------------------------------------------------------------------
# File-content patterns
# ---------------------------------------------------------------------------
# Keylogger indicators.
_RE_KEYLOGGER = re.compile(
rb"(?:"
rb"/dev/input/event\d+"
rb"|xinput\s+(?:test|list)"
rb"|xdotool\b"
rb"|showkey\b"
rb"|logkeys\b"
rb"|pynput\.keyboard"
rb"|keyboard\.on_press"
rb"|evdev\.InputDevice"
rb"|GetAsyncKeyState"
rb"|SetWindowsHookEx"
rb")",
re.IGNORECASE,
)
# Screen / audio capture.
_RE_SCREEN_CAPTURE = re.compile(
rb"(?:"
rb"scrot\b"
rb"|import\s+-window\s+root"
rb"|xwd\b"
rb"|ffmpeg\s+.*-f\s+x11grab"
rb"|xdpyinfo"
rb"|ImageGrab\.grab"
rb"|screenshot"
rb"|pyautogui\.screenshot"
rb"|screencapture\b"
rb")",
re.IGNORECASE,
)
_RE_AUDIO_CAPTURE = re.compile(
rb"(?:"
rb"arecord\b"
rb"|parecord\b"
rb"|ffmpeg\s+.*-f\s+(?:alsa|pulse|avfoundation)"
rb"|pyaudio"
rb"|sounddevice"
rb")",
re.IGNORECASE,
)
# Data exfiltration.
_RE_EXFIL = re.compile(
rb"(?:"
rb"curl\s+.*-[FdT]\s"
rb"|curl\s+.*--upload-file"
rb"|wget\s+.*--post-file"
rb"|scp\s+.*@"
rb"|rsync\s+.*@"
rb"|nc\s+-[^\s]*\s+\d+\s*<"
rb"|python[23]?\s+-m\s+http\.server"
rb")",
re.IGNORECASE,
)
# Reverse shell.
_RE_REVERSE_SHELL = re.compile(
rb"(?:"
rb"bash\s+-i\s+>&\s*/dev/tcp/"
rb"|nc\s+-e\s+/bin/"
rb"|ncat\s+.*-e\s+/bin/"
rb"|socat\s+exec:"
rb"|python[23]?\s+-c\s+['\"]import\s+socket"
rb"|perl\s+-e\s+['\"]use\s+Socket"
rb"|ruby\s+-rsocket\s+-e"
rb"|php\s+-r\s+['\"].*fsockopen"
rb"|mkfifo\s+/tmp/.*;\s*nc"
rb"|/dev/tcp/\d+\.\d+\.\d+\.\d+"
rb")",
re.IGNORECASE,
)
# Suspicious cron patterns (compiled from constants).
_RE_CRON_PATTERNS = [
re.compile(pat.encode(), re.IGNORECASE) for pat in SUSPICIOUS_CRON_PATTERNS
]
class SpywareDetector(BaseDetector):
"""Detect spyware indicators in files and on the host."""
# ------------------------------------------------------------------
# BaseDetector interface
# ------------------------------------------------------------------
@property
def name(self) -> str:
return "spyware_detector"
@property
def description(self) -> str:
return "Detects keyloggers, screen capture, data exfiltration, and reverse shells"
def detect(
self,
file_path: str | Path,
file_content: Optional[bytes] = None,
file_hash: Optional[str] = None,
) -> List[DetectionResult]:
file_path = Path(file_path)
results: List[DetectionResult] = []
try:
content = self._read_content(file_path, file_content)
except OSError as exc:
self._warn("Cannot read %s: %s", file_path, exc)
return results
# --- File-content checks ---
results.extend(self._check_keylogger(file_path, content))
results.extend(self._check_screen_capture(file_path, content))
results.extend(self._check_audio_capture(file_path, content))
results.extend(self._check_exfiltration(file_path, content))
results.extend(self._check_reverse_shell(file_path, content))
results.extend(self._check_hidden_cron(file_path, content))
# --- Host-state checks (only for relevant paths) ---
results.extend(self._check_authorized_keys(file_path, content))
results.extend(self._check_shell_profile(file_path, content))
return results
# ------------------------------------------------------------------
# Keylogger patterns
# ------------------------------------------------------------------
def _check_keylogger(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
matches = _RE_KEYLOGGER.findall(content)
if matches:
samples = sorted(set(m.decode(errors="replace") for m in matches[:5]))
results.append(DetectionResult(
threat_name="Spyware.Keylogger",
threat_type="SPYWARE",
severity="CRITICAL",
confidence=80,
details=f"Keylogger indicators: {', '.join(samples)}",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Screen capture
# ------------------------------------------------------------------
def _check_screen_capture(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
if _RE_SCREEN_CAPTURE.search(content):
results.append(DetectionResult(
threat_name="Spyware.ScreenCapture",
threat_type="SPYWARE",
severity="HIGH",
confidence=70,
details="Screen-capture tools or API calls detected",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Audio capture
# ------------------------------------------------------------------
def _check_audio_capture(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
if _RE_AUDIO_CAPTURE.search(content):
results.append(DetectionResult(
threat_name="Spyware.AudioCapture",
threat_type="SPYWARE",
severity="HIGH",
confidence=65,
details="Audio recording tools or API calls detected",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Data exfiltration
# ------------------------------------------------------------------
def _check_exfiltration(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
matches = _RE_EXFIL.findall(content)
if matches:
samples = [m.decode(errors="replace")[:80] for m in matches[:3]]
results.append(DetectionResult(
threat_name="Spyware.DataExfiltration",
threat_type="SPYWARE",
severity="HIGH",
confidence=70,
details=f"Data exfiltration pattern(s): {'; '.join(samples)}",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Reverse shell
# ------------------------------------------------------------------
def _check_reverse_shell(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
match = _RE_REVERSE_SHELL.search(content)
if match:
results.append(DetectionResult(
threat_name="Spyware.ReverseShell",
threat_type="SPYWARE",
severity="CRITICAL",
confidence=90,
details=f"Reverse shell pattern: {match.group()[:100]!r}",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Hidden cron jobs
# ------------------------------------------------------------------
def _check_hidden_cron(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
# Only check cron-related files.
path_str = str(file_path)
is_cron = any(tok in path_str for tok in ("cron", "crontab", "/var/spool/"))
if not is_cron:
return results
for pat in _RE_CRON_PATTERNS:
match = pat.search(content)
if match:
results.append(DetectionResult(
threat_name="Spyware.Cron.SuspiciousEntry",
threat_type="SPYWARE",
severity="HIGH",
confidence=80,
details=f"Suspicious cron pattern in {file_path}: {match.group()[:80]!r}",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Unauthorized SSH keys
# ------------------------------------------------------------------
def _check_authorized_keys(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
if file_path.name != "authorized_keys":
return results
# Flag if the file exists in an unexpected location.
path_str = str(file_path)
if not path_str.startswith("/root/") and "/.ssh/" not in path_str:
results.append(DetectionResult(
threat_name="Spyware.SSH.UnauthorizedKeysFile",
threat_type="SPYWARE",
severity="HIGH",
confidence=75,
details=f"authorized_keys found in unexpected location: {file_path}",
detector_name=self.name,
))
# Check for suspiciously many keys.
key_count = content.count(b"ssh-rsa") + content.count(b"ssh-ed25519") + content.count(b"ecdsa-sha2")
if key_count > 10:
results.append(DetectionResult(
threat_name="Spyware.SSH.ExcessiveKeys",
threat_type="SPYWARE",
severity="MEDIUM",
confidence=55,
details=f"{key_count} SSH keys in {file_path} — possible unauthorized access",
detector_name=self.name,
))
# command= prefix can force a shell command on login — often abused.
if b'command="' in content or b"command='" in content:
results.append(DetectionResult(
threat_name="Spyware.SSH.ForcedCommand",
threat_type="SPYWARE",
severity="MEDIUM",
confidence=60,
details=f"Forced command found in authorized_keys: {file_path}",
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Shell profile modifications
# ------------------------------------------------------------------
_PROFILE_FILES = {
".bashrc", ".bash_profile", ".profile", ".zshrc",
".bash_login", ".bash_logout",
}
_RE_PROFILE_SUSPICIOUS = re.compile(
rb"(?:"
rb"curl\s+[^\n]*\|\s*(?:sh|bash)"
rb"|wget\s+[^\n]*\|\s*(?:sh|bash)"
rb"|/dev/tcp/"
rb"|base64\s+--decode"
rb"|nohup\s+.*&"
rb"|eval\s+\$\("
rb"|python[23]?\s+-c\s+['\"]import\s+(?:socket|os|pty)"
rb")",
re.IGNORECASE,
)
def _check_shell_profile(
self, file_path: Path, content: bytes
) -> List[DetectionResult]:
results: List[DetectionResult] = []
if file_path.name not in self._PROFILE_FILES:
return results
match = self._RE_PROFILE_SUSPICIOUS.search(content)
if match:
results.append(DetectionResult(
threat_name="Spyware.ShellProfile.SuspiciousEntry",
threat_type="SPYWARE",
severity="CRITICAL",
confidence=85,
details=(
f"Suspicious command in shell profile {file_path}: "
f"{match.group()[:100]!r}"
),
detector_name=self.name,
))
return results

View File

@@ -0,0 +1,200 @@
"""YARA-rule detector for AYN Antivirus.
Compiles and caches YARA rule files from the configured rules directory,
then matches them against scanned files. ``yara-python`` is treated as an
optional dependency — if it is missing the detector logs a warning and
returns no results.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any, List, Optional
from ayn_antivirus.constants import DEFAULT_YARA_RULES_DIR
from ayn_antivirus.detectors.base import BaseDetector, DetectionResult
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Conditional import — yara-python is optional.
# ---------------------------------------------------------------------------
try:
import yara # type: ignore[import-untyped]
_YARA_AVAILABLE = True
except ImportError:
_YARA_AVAILABLE = False
yara = None # type: ignore[assignment]
# Severity mapping for YARA rule meta tags.
_META_SEVERITY_MAP = {
"critical": "CRITICAL",
"high": "HIGH",
"medium": "MEDIUM",
"low": "LOW",
}
class YaraDetector(BaseDetector):
"""Detect threats by matching YARA rules against file contents.
Parameters
----------
rules_dir:
Directory containing ``.yar`` / ``.yara`` rule files. Defaults to
the bundled ``signatures/yara_rules/`` directory.
"""
def __init__(self, rules_dir: str | Path = DEFAULT_YARA_RULES_DIR) -> None:
self.rules_dir = Path(rules_dir)
self._rules: Any = None # compiled yara.Rules object
self._rule_count: int = 0
self._loaded = False
# ------------------------------------------------------------------
# BaseDetector interface
# ------------------------------------------------------------------
@property
def name(self) -> str:
return "yara_detector"
@property
def description(self) -> str:
return "Pattern matching using compiled YARA rules"
def detect(
self,
file_path: str | Path,
file_content: Optional[bytes] = None,
file_hash: Optional[str] = None,
) -> List[DetectionResult]:
"""Match all loaded YARA rules against *file_path*.
Falls back to in-memory matching if *file_content* is provided.
"""
if not _YARA_AVAILABLE:
self._warn("yara-python is not installed — skipping YARA detection")
return []
if not self._loaded:
self.load_rules()
if self._rules is None:
return []
file_path = Path(file_path)
results: List[DetectionResult] = []
try:
if file_content is not None:
matches = self._rules.match(data=file_content)
else:
matches = self._rules.match(filepath=str(file_path))
except yara.Error as exc:
self._warn("YARA scan failed for %s: %s", file_path, exc)
return results
for match in matches:
meta = match.meta or {}
severity = _META_SEVERITY_MAP.get(
str(meta.get("severity", "")).lower(), "HIGH"
)
threat_type = meta.get("threat_type", "MALWARE").upper()
threat_name = meta.get("threat_name") or match.rule
matched_strings = []
try:
for offset, identifier, data in match.strings:
matched_strings.append(
f"{identifier} @ 0x{offset:x}"
)
except (TypeError, ValueError):
# match.strings format varies between yara-python versions.
pass
detail_parts = [f"YARA rule '{match.rule}' matched"]
if match.namespace and match.namespace != "default":
detail_parts.append(f"namespace={match.namespace}")
if matched_strings:
detail_parts.append(
f"strings=[{', '.join(matched_strings[:5])}]"
)
if meta.get("description"):
detail_parts.append(meta["description"])
results.append(DetectionResult(
threat_name=threat_name,
threat_type=threat_type,
severity=severity,
confidence=int(meta.get("confidence", 90)),
details=" | ".join(detail_parts),
detector_name=self.name,
))
return results
# ------------------------------------------------------------------
# Rule management
# ------------------------------------------------------------------
def load_rules(self, rules_dir: Optional[str | Path] = None) -> None:
"""Compile all ``.yar`` / ``.yara`` files in *rules_dir*.
Compiled rules are cached in ``self._rules``. Call this again
after updating rule files to pick up changes.
"""
if not _YARA_AVAILABLE:
self._warn("yara-python is not installed — cannot load rules")
return
directory = Path(rules_dir) if rules_dir else self.rules_dir
if not directory.is_dir():
self._warn("YARA rules directory does not exist: %s", directory)
return
rule_files = sorted(
p for p in directory.iterdir()
if p.suffix.lower() in (".yar", ".yara") and p.is_file()
)
if not rule_files:
self._log("No YARA rule files found in %s", directory)
self._rules = None
self._rule_count = 0
self._loaded = True
return
# Build a filepaths dict for yara.compile(filepaths={...}).
filepaths = {}
for idx, rf in enumerate(rule_files):
namespace = rf.stem
filepaths[namespace] = str(rf)
try:
self._rules = yara.compile(filepaths=filepaths)
self._rule_count = len(rule_files)
self._loaded = True
self._log(
"Compiled %d YARA rule file(s) from %s",
self._rule_count,
directory,
)
except yara.SyntaxError as exc:
self._error("YARA compilation error: %s", exc)
self._rules = None
except yara.Error as exc:
self._error("YARA error: %s", exc)
self._rules = None
@property
def rule_count(self) -> int:
"""Number of rule files currently compiled."""
return self._rule_count
@property
def available(self) -> bool:
"""Return ``True`` if ``yara-python`` is installed."""
return _YARA_AVAILABLE