remove infra.md.example, infra.md is the source of truth

2026-03-03 03:06:13 +08:00
parent 1ad3033cc1
commit a3c6d09350
86 changed files with 17093 additions and 39 deletions
--- a/ayn-antivirus/ayn_antivirus/scanners/memory_scanner.py
+++ b/ayn-antivirus/ayn_antivirus/scanners/memory_scanner.py
@@ -0,0 +1,332 @@
+"""Process memory scanner for AYN Antivirus.
+
+Reads ``/proc/<pid>/maps`` and ``/proc/<pid>/mem`` on Linux to search for
+injected code, suspicious byte patterns (mining pool URLs, known malware
+strings), and anomalous RWX memory regions.
+
+Most operations require **root** privileges.  On non-Linux systems the
+scanner gracefully returns empty results.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Sequence
+
+from ayn_antivirus.constants import CRYPTO_POOL_DOMAINS
+from ayn_antivirus.scanners.base import BaseScanner
+
+logger = logging.getLogger(__name__)
+
+# Default byte-level patterns to search for in process memory.
+_DEFAULT_PATTERNS: List[bytes] = [
+    # Mining pool URLs
+    *(domain.encode() for domain in CRYPTO_POOL_DOMAINS),
+    # Common miner stratum strings
+    b"stratum+tcp://",
+    b"stratum+ssl://",
+    b"stratum2+tcp://",
+    # Suspicious shell commands sometimes found in injected memory
+    b"/bin/sh -c",
+    b"/bin/bash -i",
+    b"/dev/tcp/",
+    # Known malware markers
+    b"PAYLOAD_START",
+    b"x86_64-linux-gnu",
+    b"ELF\x02\x01\x01",
+]
+
+# Size of chunks when reading /proc/<pid>/mem.
+_MEM_READ_CHUNK = 65536
+
+# Regex to parse a single line from /proc/<pid>/maps.
+#   address           perms offset  dev   inode  pathname
+#   7f1c2a000000-7f1c2a021000 rw-p 00000000 00:00 0  [heap]
+_MAPS_RE = re.compile(
+    r"^([0-9a-f]+)-([0-9a-f]+)\s+(r[w-][x-][ps-])\s+\S+\s+\S+\s+\d+\s*(.*)",
+    re.MULTILINE,
+)
+
+
+class MemoryScanner(BaseScanner):
+    """Scan process memory for injected code and suspicious patterns.
+
+    .. note::
+       This scanner only works on Linux where ``/proc`` is available.
+       Operations on ``/proc/<pid>/mem`` typically require root or
+       ``CAP_SYS_PTRACE``.
+    """
+
+    # ------------------------------------------------------------------
+    # BaseScanner interface
+    # ------------------------------------------------------------------
+
+    @property
+    def name(self) -> str:
+        return "memory_scanner"
+
+    @property
+    def description(self) -> str:
+        return "Scans process memory for injected code and malicious patterns"
+
+    def scan(self, target: Any) -> Dict[str, Any]:
+        """Scan a single process by PID.
+
+        Parameters
+        ----------
+        target:
+            The PID (``int``) of the process to inspect.
+
+        Returns
+        -------
+        dict
+            ``pid``, ``rwx_regions``, ``pattern_matches``, ``strings_sample``,
+            ``error``.
+        """
+        pid = int(target)
+        result: Dict[str, Any] = {
+            "pid": pid,
+            "rwx_regions": [],
+            "pattern_matches": [],
+            "strings_sample": [],
+            "error": None,
+        }
+
+        if not Path("/proc").is_dir():
+            result["error"] = "Not a Linux system — /proc not available"
+            return result
+
+        try:
+            result["rwx_regions"] = self.find_injected_code(pid)
+            result["pattern_matches"] = self.scan_for_patterns(pid, _DEFAULT_PATTERNS)
+            result["strings_sample"] = self.get_memory_strings(pid, min_length=8)[:200]
+        except PermissionError:
+            result["error"] = f"Permission denied reading /proc/{pid}/mem (need root)"
+        except FileNotFoundError:
+            result["error"] = f"Process {pid} no longer exists"
+        except Exception as exc:
+            result["error"] = str(exc)
+            logger.exception("Error scanning memory for PID %d", pid)
+
+        return result
+
+    # ------------------------------------------------------------------
+    # /proc/<pid>/maps parsing
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _read_maps(pid: int) -> List[Dict[str, Any]]:
+        """Parse ``/proc/<pid>/maps`` and return a list of memory regions.
+
+        Each dict contains ``start`` (int), ``end`` (int), ``perms`` (str),
+        ``pathname`` (str).
+
+        Raises
+        ------
+        FileNotFoundError
+            If the process does not exist.
+        PermissionError
+            If the caller cannot read the maps file.
+        """
+        maps_path = Path(f"/proc/{pid}/maps")
+        content = maps_path.read_text()
+
+        regions: List[Dict[str, Any]] = []
+        for match in _MAPS_RE.finditer(content):
+            regions.append({
+                "start": int(match.group(1), 16),
+                "end": int(match.group(2), 16),
+                "perms": match.group(3),
+                "pathname": match.group(4).strip(),
+            })
+        return regions
+
+    # ------------------------------------------------------------------
+    # Memory reading helper
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _read_region(pid: int, start: int, end: int) -> bytes:
+        """Read bytes from ``/proc/<pid>/mem`` between *start* and *end*.
+
+        Returns as many bytes as could be read; silently returns partial
+        data if parts of the region are not readable.
+        """
+        mem_path = f"/proc/{pid}/mem"
+        data = bytearray()
+        try:
+            fd = os.open(mem_path, os.O_RDONLY)
+            try:
+                os.lseek(fd, start, os.SEEK_SET)
+                remaining = end - start
+                while remaining > 0:
+                    chunk_size = min(_MEM_READ_CHUNK, remaining)
+                    try:
+                        chunk = os.read(fd, chunk_size)
+                    except OSError:
+                        break
+                    if not chunk:
+                        break
+                    data.extend(chunk)
+                    remaining -= len(chunk)
+            finally:
+                os.close(fd)
+        except OSError:
+            pass  # region may be unmapped by the time we read
+        return bytes(data)
+
+    # ------------------------------------------------------------------
+    # Public scanning methods
+    # ------------------------------------------------------------------
+
+    def scan_process_memory(self, pid: int) -> List[Dict[str, Any]]:
+        """Scan all readable regions of a process's address space.
+
+        Returns a list of dicts, one per region, containing ``start``,
+        ``end``, ``perms``, ``pathname``, and a boolean ``has_suspicious``
+        flag set when default patterns are found.
+
+        Raises
+        ------
+        PermissionError, FileNotFoundError
+        """
+        regions = self._read_maps(pid)
+        results: List[Dict[str, Any]] = []
+
+        for region in regions:
+            # Only read regions that are at least readable.
+            if not region["perms"].startswith("r"):
+                continue
+
+            size = region["end"] - region["start"]
+            if size > 50 * 1024 * 1024:
+                continue  # skip very large regions to avoid OOM
+
+            data = self._read_region(pid, region["start"], region["end"])
+            has_suspicious = any(pat in data for pat in _DEFAULT_PATTERNS)
+
+            results.append({
+                "start": hex(region["start"]),
+                "end": hex(region["end"]),
+                "perms": region["perms"],
+                "pathname": region["pathname"],
+                "size": size,
+                "has_suspicious": has_suspicious,
+            })
+
+        return results
+
+    def find_injected_code(self, pid: int) -> List[Dict[str, Any]]:
+        """Find memory regions with **RWX** (read-write-execute) permissions.
+
+        Legitimate applications rarely need RWX regions.  Their presence may
+        indicate code injection, JIT shellcode, or a packed/encrypted payload
+        that has been unpacked at runtime.
+
+        Returns a list of dicts with ``start``, ``end``, ``perms``,
+        ``pathname``, ``size``.
+        """
+        regions = self._read_maps(pid)
+        rwx: List[Dict[str, Any]] = []
+
+        for region in regions:
+            perms = region["perms"]
+            # RWX = positions: r(0) w(1) x(2)
+            if len(perms) >= 3 and perms[0] == "r" and perms[1] == "w" and perms[2] == "x":
+                size = region["end"] - region["start"]
+                rwx.append({
+                    "start": hex(region["start"]),
+                    "end": hex(region["end"]),
+                    "perms": perms,
+                    "pathname": region["pathname"],
+                    "size": size,
+                    "severity": "HIGH",
+                    "reason": f"RWX region ({size} bytes) — possible code injection",
+                })
+
+        return rwx
+
+    def get_memory_strings(
+        self,
+        pid: int,
+        min_length: int = 6,
+    ) -> List[str]:
+        """Extract printable ASCII strings from readable memory regions.
+
+        Parameters
+        ----------
+        min_length:
+            Minimum string length to keep.
+
+        Returns a list of decoded strings (capped at 500 chars each).
+        """
+        regions = self._read_maps(pid)
+        strings: List[str] = []
+        printable_re = re.compile(rb"[\x20-\x7e]{%d,}" % min_length)
+
+        for region in regions:
+            if not region["perms"].startswith("r"):
+                continue
+            size = region["end"] - region["start"]
+            if size > 10 * 1024 * 1024:
+                continue  # skip huge regions
+
+            data = self._read_region(pid, region["start"], region["end"])
+            for match in printable_re.finditer(data):
+                s = match.group().decode("ascii", errors="replace")
+                strings.append(s[:500])
+
+                # Cap total to avoid unbounded memory usage.
+                if len(strings) >= 10_000:
+                    return strings
+
+        return strings
+
+    def scan_for_patterns(
+        self,
+        pid: int,
+        patterns: Optional[Sequence[bytes]] = None,
+    ) -> List[Dict[str, Any]]:
+        """Search process memory for specific byte patterns.
+
+        Parameters
+        ----------
+        patterns:
+            Byte strings to search for.  Defaults to
+            :pydata:`_DEFAULT_PATTERNS` (mining pool URLs, stratum prefixes,
+            shell commands).
+
+        Returns a list of dicts with ``pattern``, ``region_start``,
+        ``region_perms``, ``offset``.
+        """
+        if patterns is None:
+            patterns = _DEFAULT_PATTERNS
+
+        regions = self._read_maps(pid)
+        matches: List[Dict[str, Any]] = []
+
+        for region in regions:
+            if not region["perms"].startswith("r"):
+                continue
+            size = region["end"] - region["start"]
+            if size > 50 * 1024 * 1024:
+                continue
+
+            data = self._read_region(pid, region["start"], region["end"])
+            for pat in patterns:
+                idx = data.find(pat)
+                if idx != -1:
+                    matches.append({
+                        "pattern": pat.decode("utf-8", errors="replace"),
+                        "region_start": hex(region["start"]),
+                        "region_perms": region["perms"],
+                        "region_pathname": region["pathname"],
+                        "offset": idx,
+                        "severity": "HIGH",
+                        "reason": f"Suspicious pattern found in memory: {pat[:60]!r}",
+                    })
+
+        return matches