remove infra.md.example, infra.md is the source of truth
This commit is contained in:
0
ayn-antivirus/ayn_antivirus/signatures/__init__.py
Normal file
0
ayn-antivirus/ayn_antivirus/signatures/__init__.py
Normal file
251
ayn-antivirus/ayn_antivirus/signatures/db/hash_db.py
Normal file
251
ayn-antivirus/ayn_antivirus/signatures/db/hash_db.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""SQLite-backed malware hash database for AYN Antivirus.
|
||||
|
||||
Stores SHA-256 / MD5 hashes of known threats with associated metadata
|
||||
(threat name, type, severity, source feed) and provides efficient lookup,
|
||||
bulk-insert, search, and export operations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import logging
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
|
||||
from ayn_antivirus.constants import DEFAULT_DB_PATH
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema
|
||||
# ---------------------------------------------------------------------------
|
||||
_SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS threats (
|
||||
hash TEXT PRIMARY KEY,
|
||||
threat_name TEXT NOT NULL,
|
||||
threat_type TEXT NOT NULL DEFAULT 'MALWARE',
|
||||
severity TEXT NOT NULL DEFAULT 'HIGH',
|
||||
source TEXT NOT NULL DEFAULT '',
|
||||
added_date TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
details TEXT NOT NULL DEFAULT ''
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_threats_type ON threats(threat_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_threats_source ON threats(source);
|
||||
CREATE INDEX IF NOT EXISTS idx_threats_name ON threats(threat_name);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
class HashDatabase:
|
||||
"""Manage a local SQLite database of known-malicious file hashes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_path:
|
||||
Path to the SQLite file. Created automatically (with parent dirs)
|
||||
if it doesn't exist.
|
||||
"""
|
||||
|
||||
def __init__(self, db_path: str | Path = DEFAULT_DB_PATH) -> None:
|
||||
self.db_path = Path(db_path)
|
||||
self._conn: Optional[sqlite3.Connection] = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def initialize(self) -> None:
|
||||
"""Open the database and create tables if necessary."""
|
||||
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
self._conn.execute("PRAGMA journal_mode=WAL")
|
||||
self._conn.executescript(_SCHEMA)
|
||||
self._conn.commit()
|
||||
logger.info("HashDatabase opened: %s (%d hashes)", self.db_path, self.count())
|
||||
|
||||
def close(self) -> None:
|
||||
"""Flush and close the database."""
|
||||
if self._conn:
|
||||
self._conn.close()
|
||||
self._conn = None
|
||||
|
||||
@property
|
||||
def conn(self) -> sqlite3.Connection:
|
||||
if self._conn is None:
|
||||
self.initialize()
|
||||
assert self._conn is not None
|
||||
return self._conn
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Single-record operations
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def add_hash(
|
||||
self,
|
||||
hash_str: str,
|
||||
threat_name: str,
|
||||
threat_type: str = "MALWARE",
|
||||
severity: str = "HIGH",
|
||||
source: str = "",
|
||||
details: str = "",
|
||||
) -> None:
|
||||
"""Insert or replace a single hash record."""
|
||||
self.conn.execute(
|
||||
"INSERT OR REPLACE INTO threats "
|
||||
"(hash, threat_name, threat_type, severity, source, added_date, details) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
hash_str.lower(),
|
||||
threat_name,
|
||||
threat_type,
|
||||
severity,
|
||||
source,
|
||||
datetime.utcnow().isoformat(),
|
||||
details,
|
||||
),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def lookup(self, hash_str: str) -> Optional[Dict[str, Any]]:
|
||||
"""Look up a hash and return its metadata, or ``None``."""
|
||||
row = self.conn.execute(
|
||||
"SELECT * FROM threats WHERE hash = ?", (hash_str.lower(),)
|
||||
).fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
return dict(row)
|
||||
|
||||
def remove(self, hash_str: str) -> bool:
|
||||
"""Delete a hash record. Returns ``True`` if a row was deleted."""
|
||||
cur = self.conn.execute(
|
||||
"DELETE FROM threats WHERE hash = ?", (hash_str.lower(),)
|
||||
)
|
||||
self.conn.commit()
|
||||
return cur.rowcount > 0
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Bulk operations
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def bulk_add(
|
||||
self,
|
||||
records: Sequence[Tuple[str, str, str, str, str, str]],
|
||||
) -> int:
|
||||
"""Efficiently insert new hashes in a single transaction.
|
||||
|
||||
Uses ``INSERT OR IGNORE`` so existing entries are preserved and
|
||||
only genuinely new hashes are counted.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
records:
|
||||
Sequence of ``(hash, threat_name, threat_type, severity, source, details)``
|
||||
tuples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
Number of **new** rows actually inserted.
|
||||
"""
|
||||
if not records:
|
||||
return 0
|
||||
now = datetime.utcnow().isoformat()
|
||||
rows = [
|
||||
(h.lower(), name, ttype, sev, src, now, det)
|
||||
for h, name, ttype, sev, src, det in records
|
||||
]
|
||||
before = self.count()
|
||||
self.conn.executemany(
|
||||
"INSERT OR IGNORE INTO threats "
|
||||
"(hash, threat_name, threat_type, severity, source, added_date, details) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
rows,
|
||||
)
|
||||
self.conn.commit()
|
||||
return self.count() - before
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Query helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def count(self) -> int:
|
||||
"""Total number of hashes in the database."""
|
||||
return self.conn.execute("SELECT COUNT(*) FROM threats").fetchone()[0]
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Return aggregate statistics about the database."""
|
||||
c = self.conn
|
||||
by_type = {
|
||||
row[0]: row[1]
|
||||
for row in c.execute(
|
||||
"SELECT threat_type, COUNT(*) FROM threats GROUP BY threat_type"
|
||||
).fetchall()
|
||||
}
|
||||
by_source = {
|
||||
row[0]: row[1]
|
||||
for row in c.execute(
|
||||
"SELECT source, COUNT(*) FROM threats GROUP BY source"
|
||||
).fetchall()
|
||||
}
|
||||
latest = c.execute(
|
||||
"SELECT MAX(added_date) FROM threats"
|
||||
).fetchone()[0]
|
||||
return {
|
||||
"total": self.count(),
|
||||
"by_type": by_type,
|
||||
"by_source": by_source,
|
||||
"latest_update": latest,
|
||||
}
|
||||
|
||||
def search(self, query: str) -> List[Dict[str, Any]]:
|
||||
"""Search threat names with a SQL LIKE pattern.
|
||||
|
||||
Example: ``search("%Trojan%")``
|
||||
"""
|
||||
rows = self.conn.execute(
|
||||
"SELECT * FROM threats WHERE threat_name LIKE ? ORDER BY added_date DESC LIMIT 500",
|
||||
(query,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Export
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def export_hashes(self, filepath: str | Path) -> int:
|
||||
"""Export all hashes to a CSV file. Returns the row count."""
|
||||
filepath = Path(filepath)
|
||||
filepath.parent.mkdir(parents=True, exist_ok=True)
|
||||
rows = self.conn.execute(
|
||||
"SELECT hash, threat_name, threat_type, severity, source, added_date, details "
|
||||
"FROM threats ORDER BY added_date DESC"
|
||||
).fetchall()
|
||||
with open(filepath, "w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(["hash", "threat_name", "threat_type", "severity", "source", "added_date", "details"])
|
||||
for row in rows:
|
||||
writer.writerow(list(row))
|
||||
return len(rows)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Meta helpers (used by manager to track feed state)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def set_meta(self, key: str, value: str) -> None:
|
||||
self.conn.execute(
|
||||
"INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)", (key, value)
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_meta(self, key: str) -> Optional[str]:
|
||||
row = self.conn.execute(
|
||||
"SELECT value FROM meta WHERE key = ?", (key,)
|
||||
).fetchone()
|
||||
return row[0] if row else None
|
||||
259
ayn-antivirus/ayn_antivirus/signatures/db/ioc_db.py
Normal file
259
ayn-antivirus/ayn_antivirus/signatures/db/ioc_db.py
Normal file
@@ -0,0 +1,259 @@
|
||||
"""SQLite-backed Indicator of Compromise (IOC) database for AYN Antivirus.
|
||||
|
||||
Stores malicious IPs, domains, and URLs sourced from threat-intelligence
|
||||
feeds so that the network scanner and detectors can perform real-time
|
||||
lookups.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence, Set, Tuple
|
||||
|
||||
from ayn_antivirus.constants import DEFAULT_DB_PATH
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema
|
||||
# ---------------------------------------------------------------------------
|
||||
_SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS ioc_ips (
|
||||
ip TEXT PRIMARY KEY,
|
||||
threat_name TEXT NOT NULL DEFAULT '',
|
||||
type TEXT NOT NULL DEFAULT 'C2',
|
||||
source TEXT NOT NULL DEFAULT '',
|
||||
added_date TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_ioc_ips_source ON ioc_ips(source);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ioc_domains (
|
||||
domain TEXT PRIMARY KEY,
|
||||
threat_name TEXT NOT NULL DEFAULT '',
|
||||
type TEXT NOT NULL DEFAULT 'C2',
|
||||
source TEXT NOT NULL DEFAULT '',
|
||||
added_date TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_ioc_domains_source ON ioc_domains(source);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ioc_urls (
|
||||
url TEXT PRIMARY KEY,
|
||||
threat_name TEXT NOT NULL DEFAULT '',
|
||||
type TEXT NOT NULL DEFAULT 'malware_distribution',
|
||||
source TEXT NOT NULL DEFAULT '',
|
||||
added_date TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_ioc_urls_source ON ioc_urls(source);
|
||||
"""
|
||||
|
||||
|
||||
class IOCDatabase:
|
||||
"""Manage a local SQLite store of Indicators of Compromise.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_path:
|
||||
Path to the SQLite file. Shares the same file as
|
||||
:class:`HashDatabase` by default; each uses its own tables.
|
||||
"""
|
||||
|
||||
_VALID_TABLES: frozenset = frozenset({"ioc_ips", "ioc_domains", "ioc_urls"})
|
||||
|
||||
def __init__(self, db_path: str | Path = DEFAULT_DB_PATH) -> None:
|
||||
self.db_path = Path(db_path)
|
||||
self._conn: Optional[sqlite3.Connection] = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def initialize(self) -> None:
|
||||
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
self._conn.execute("PRAGMA journal_mode=WAL")
|
||||
self._conn.executescript(_SCHEMA)
|
||||
self._conn.commit()
|
||||
logger.info(
|
||||
"IOCDatabase opened: %s (IPs=%d, domains=%d, URLs=%d)",
|
||||
self.db_path,
|
||||
self._count("ioc_ips"),
|
||||
self._count("ioc_domains"),
|
||||
self._count("ioc_urls"),
|
||||
)
|
||||
|
||||
def close(self) -> None:
|
||||
if self._conn:
|
||||
self._conn.close()
|
||||
self._conn = None
|
||||
|
||||
@property
|
||||
def conn(self) -> sqlite3.Connection:
|
||||
if self._conn is None:
|
||||
self.initialize()
|
||||
assert self._conn is not None
|
||||
return self._conn
|
||||
|
||||
def _count(self, table: str) -> int:
|
||||
if table not in self._VALID_TABLES:
|
||||
raise ValueError(f"Invalid table name: {table}")
|
||||
return self.conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# IPs
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def add_ip(
|
||||
self,
|
||||
ip: str,
|
||||
threat_name: str = "",
|
||||
type: str = "C2",
|
||||
source: str = "",
|
||||
) -> None:
|
||||
self.conn.execute(
|
||||
"INSERT OR REPLACE INTO ioc_ips (ip, threat_name, type, source, added_date) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
(ip, threat_name, type, source, datetime.utcnow().isoformat()),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def bulk_add_ips(
|
||||
self,
|
||||
records: Sequence[Tuple[str, str, str, str]],
|
||||
) -> int:
|
||||
"""Bulk-insert IPs. Each tuple: ``(ip, threat_name, type, source)``.
|
||||
|
||||
Returns the number of **new** rows actually inserted.
|
||||
"""
|
||||
if not records:
|
||||
return 0
|
||||
now = datetime.utcnow().isoformat()
|
||||
rows = [(ip, tn, t, src, now) for ip, tn, t, src in records]
|
||||
before = self._count("ioc_ips")
|
||||
self.conn.executemany(
|
||||
"INSERT OR IGNORE INTO ioc_ips (ip, threat_name, type, source, added_date) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
rows,
|
||||
)
|
||||
self.conn.commit()
|
||||
return self._count("ioc_ips") - before
|
||||
|
||||
def lookup_ip(self, ip: str) -> Optional[Dict[str, Any]]:
|
||||
row = self.conn.execute(
|
||||
"SELECT * FROM ioc_ips WHERE ip = ?", (ip,)
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def get_all_malicious_ips(self) -> Set[str]:
|
||||
"""Return every stored malicious IP as a set for fast membership tests."""
|
||||
rows = self.conn.execute("SELECT ip FROM ioc_ips").fetchall()
|
||||
return {row[0] for row in rows}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Domains
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def add_domain(
|
||||
self,
|
||||
domain: str,
|
||||
threat_name: str = "",
|
||||
type: str = "C2",
|
||||
source: str = "",
|
||||
) -> None:
|
||||
self.conn.execute(
|
||||
"INSERT OR REPLACE INTO ioc_domains (domain, threat_name, type, source, added_date) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
(domain.lower(), threat_name, type, source, datetime.utcnow().isoformat()),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def bulk_add_domains(
|
||||
self,
|
||||
records: Sequence[Tuple[str, str, str, str]],
|
||||
) -> int:
|
||||
"""Bulk-insert domains. Each tuple: ``(domain, threat_name, type, source)``.
|
||||
|
||||
Returns the number of **new** rows actually inserted.
|
||||
"""
|
||||
if not records:
|
||||
return 0
|
||||
now = datetime.utcnow().isoformat()
|
||||
rows = [(d.lower(), tn, t, src, now) for d, tn, t, src in records]
|
||||
before = self._count("ioc_domains")
|
||||
self.conn.executemany(
|
||||
"INSERT OR IGNORE INTO ioc_domains (domain, threat_name, type, source, added_date) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
rows,
|
||||
)
|
||||
self.conn.commit()
|
||||
return self._count("ioc_domains") - before
|
||||
|
||||
def lookup_domain(self, domain: str) -> Optional[Dict[str, Any]]:
|
||||
row = self.conn.execute(
|
||||
"SELECT * FROM ioc_domains WHERE domain = ?", (domain.lower(),)
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def get_all_malicious_domains(self) -> Set[str]:
|
||||
"""Return every stored malicious domain as a set."""
|
||||
rows = self.conn.execute("SELECT domain FROM ioc_domains").fetchall()
|
||||
return {row[0] for row in rows}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# URLs
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def add_url(
|
||||
self,
|
||||
url: str,
|
||||
threat_name: str = "",
|
||||
type: str = "malware_distribution",
|
||||
source: str = "",
|
||||
) -> None:
|
||||
self.conn.execute(
|
||||
"INSERT OR REPLACE INTO ioc_urls (url, threat_name, type, source, added_date) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
(url, threat_name, type, source, datetime.utcnow().isoformat()),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def bulk_add_urls(
|
||||
self,
|
||||
records: Sequence[Tuple[str, str, str, str]],
|
||||
) -> int:
|
||||
"""Bulk-insert URLs. Each tuple: ``(url, threat_name, type, source)``.
|
||||
|
||||
Returns the number of **new** rows actually inserted.
|
||||
"""
|
||||
if not records:
|
||||
return 0
|
||||
now = datetime.utcnow().isoformat()
|
||||
rows = [(u, tn, t, src, now) for u, tn, t, src in records]
|
||||
before = self._count("ioc_urls")
|
||||
self.conn.executemany(
|
||||
"INSERT OR IGNORE INTO ioc_urls (url, threat_name, type, source, added_date) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
rows,
|
||||
)
|
||||
self.conn.commit()
|
||||
return self._count("ioc_urls") - before
|
||||
|
||||
def lookup_url(self, url: str) -> Optional[Dict[str, Any]]:
|
||||
row = self.conn.execute(
|
||||
"SELECT * FROM ioc_urls WHERE url = ?", (url,)
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Aggregate stats
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"ips": self._count("ioc_ips"),
|
||||
"domains": self._count("ioc_domains"),
|
||||
"urls": self._count("ioc_urls"),
|
||||
}
|
||||
92
ayn-antivirus/ayn_antivirus/signatures/feeds/base_feed.py
Normal file
92
ayn-antivirus/ayn_antivirus/signatures/feeds/base_feed.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""Abstract base class for AYN threat-intelligence feeds."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseFeed(ABC):
|
||||
"""Common interface for all external threat-intelligence feeds.
|
||||
|
||||
Provides rate-limiting, last-updated tracking, and a uniform
|
||||
``fetch()`` contract so the :class:`SignatureManager` can orchestrate
|
||||
updates without knowing feed internals.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
rate_limit_seconds:
|
||||
Minimum interval between successive HTTP requests to the same feed.
|
||||
"""
|
||||
|
||||
def __init__(self, rate_limit_seconds: float = 2.0) -> None:
|
||||
self._rate_limit = rate_limit_seconds
|
||||
self._last_request_time: float = 0.0
|
||||
self._last_updated: Optional[datetime] = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Identity
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@abstractmethod
|
||||
def get_name(self) -> str:
|
||||
"""Return a short, human-readable feed name."""
|
||||
...
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Fetching
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@abstractmethod
|
||||
def fetch(self) -> List[Dict[str, Any]]:
|
||||
"""Download the latest entries from the feed.
|
||||
|
||||
Returns a list of dicts. The exact keys depend on the feed type
|
||||
(hashes, IOCs, rules, etc.). The :class:`SignatureManager` is
|
||||
responsible for routing each entry to the correct database.
|
||||
"""
|
||||
...
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# State
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def last_updated(self) -> Optional[datetime]:
|
||||
"""Timestamp of the most recent successful fetch."""
|
||||
return self._last_updated
|
||||
|
||||
def _mark_updated(self) -> None:
|
||||
"""Record the current time as the last-successful-fetch timestamp."""
|
||||
self._last_updated = datetime.utcnow()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Rate limiting
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _rate_limit_wait(self) -> None:
|
||||
"""Block until the rate-limit window has elapsed."""
|
||||
elapsed = time.monotonic() - self._last_request_time
|
||||
remaining = self._rate_limit - elapsed
|
||||
if remaining > 0:
|
||||
logger.debug("[%s] Rate-limiting: sleeping %.1fs", self.get_name(), remaining)
|
||||
time.sleep(remaining)
|
||||
self._last_request_time = time.monotonic()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Logging helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _log(self, msg: str, *args: Any) -> None:
|
||||
logger.info("[%s] " + msg, self.get_name(), *args)
|
||||
|
||||
def _warn(self, msg: str, *args: Any) -> None:
|
||||
logger.warning("[%s] " + msg, self.get_name(), *args)
|
||||
|
||||
def _error(self, msg: str, *args: Any) -> None:
|
||||
logger.error("[%s] " + msg, self.get_name(), *args)
|
||||
124
ayn-antivirus/ayn_antivirus/signatures/feeds/emergingthreats.py
Normal file
124
ayn-antivirus/ayn_antivirus/signatures/feeds/emergingthreats.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""Emerging Threats (ET Open) feed for AYN Antivirus.
|
||||
|
||||
Parses community Suricata / Snort rules from Proofpoint's ET Open project
|
||||
to extract IOCs (IP addresses and domains) referenced in active detection
|
||||
rules.
|
||||
|
||||
Source: https://rules.emergingthreats.net/open/suricata/rules/
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Any, Dict, List, Set
|
||||
|
||||
import requests
|
||||
|
||||
from ayn_antivirus.signatures.feeds.base_feed import BaseFeed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# We focus on the compromised-IP and C2 rule files.
|
||||
_RULE_URLS = [
|
||||
"https://rules.emergingthreats.net/open/suricata/rules/compromised-ips.txt",
|
||||
"https://rules.emergingthreats.net/open/suricata/rules/botcc.rules",
|
||||
"https://rules.emergingthreats.net/open/suricata/rules/ciarmy.rules",
|
||||
"https://rules.emergingthreats.net/open/suricata/rules/emerging-malware.rules",
|
||||
]
|
||||
_TIMEOUT = 30
|
||||
|
||||
# Regex patterns to extract IPs and domains from rule bodies.
|
||||
_RE_IPV4 = re.compile(r"\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b")
|
||||
_RE_DOMAIN = re.compile(
|
||||
r'content:"([a-zA-Z0-9](?:[a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?'
|
||||
r'(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?)*'
|
||||
r'\.[a-zA-Z]{2,})"'
|
||||
)
|
||||
|
||||
# Private / non-routable ranges to exclude from IP results.
|
||||
_PRIVATE_PREFIXES = (
|
||||
"10.", "127.", "172.16.", "172.17.", "172.18.", "172.19.",
|
||||
"172.20.", "172.21.", "172.22.", "172.23.", "172.24.", "172.25.",
|
||||
"172.26.", "172.27.", "172.28.", "172.29.", "172.30.", "172.31.",
|
||||
"192.168.", "0.", "255.", "224.",
|
||||
)
|
||||
|
||||
|
||||
class EmergingThreatsFeed(BaseFeed):
|
||||
"""Parse ET Open rule files to extract malicious IPs and domains."""
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "emergingthreats"
|
||||
|
||||
def fetch(self) -> List[Dict[str, Any]]:
|
||||
"""Download and parse ET Open rules, returning IOC dicts.
|
||||
|
||||
Each dict has: ``ioc_type`` (``"ip"`` or ``"domain"``), ``value``,
|
||||
``threat_name``, ``type``, ``source``.
|
||||
"""
|
||||
self._log("Downloading ET Open rule files")
|
||||
|
||||
all_ips: Set[str] = set()
|
||||
all_domains: Set[str] = set()
|
||||
|
||||
for url in _RULE_URLS:
|
||||
self._rate_limit_wait()
|
||||
try:
|
||||
resp = requests.get(url, timeout=_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
text = resp.text
|
||||
except requests.RequestException as exc:
|
||||
self._warn("Failed to fetch %s: %s", url, exc)
|
||||
continue
|
||||
|
||||
# Extract IPs.
|
||||
if url.endswith(".txt"):
|
||||
# Plain text IP list (one per line).
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
match = _RE_IPV4.match(line)
|
||||
if match:
|
||||
ip = match.group(1)
|
||||
if not ip.startswith(_PRIVATE_PREFIXES):
|
||||
all_ips.add(ip)
|
||||
else:
|
||||
# Suricata rule file — extract IPs from rule body.
|
||||
for ip_match in _RE_IPV4.finditer(text):
|
||||
ip = ip_match.group(1)
|
||||
if not ip.startswith(_PRIVATE_PREFIXES):
|
||||
all_ips.add(ip)
|
||||
|
||||
# Extract domains from content matches.
|
||||
for domain_match in _RE_DOMAIN.finditer(text):
|
||||
domain = domain_match.group(1).lower()
|
||||
# Filter out very short or generic patterns.
|
||||
if "." in domain and len(domain) > 4:
|
||||
all_domains.add(domain)
|
||||
|
||||
# Build result list.
|
||||
results: List[Dict[str, Any]] = []
|
||||
for ip in all_ips:
|
||||
results.append({
|
||||
"ioc_type": "ip",
|
||||
"value": ip,
|
||||
"threat_name": "ET.Compromised",
|
||||
"type": "C2",
|
||||
"source": "emergingthreats",
|
||||
"details": "IP from Emerging Threats ET Open rules",
|
||||
})
|
||||
for domain in all_domains:
|
||||
results.append({
|
||||
"ioc_type": "domain",
|
||||
"value": domain,
|
||||
"threat_name": "ET.MaliciousDomain",
|
||||
"type": "C2",
|
||||
"source": "emergingthreats",
|
||||
"details": "Domain extracted from ET Open Suricata rules",
|
||||
})
|
||||
|
||||
self._log("Extracted %d IP(s) and %d domain(s)", len(all_ips), len(all_domains))
|
||||
self._mark_updated()
|
||||
return results
|
||||
73
ayn-antivirus/ayn_antivirus/signatures/feeds/feodotracker.py
Normal file
73
ayn-antivirus/ayn_antivirus/signatures/feeds/feodotracker.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""Feodo Tracker feed for AYN Antivirus.
|
||||
|
||||
Downloads the recommended IP blocklist from the abuse.ch Feodo Tracker
|
||||
project. The list contains IP addresses of verified botnet C2 servers
|
||||
(Dridex, Emotet, TrickBot, QakBot, etc.).
|
||||
|
||||
Source: https://feodotracker.abuse.ch/blocklist/
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from ayn_antivirus.signatures.feeds.base_feed import BaseFeed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BLOCKLIST_URL = "https://feodotracker.abuse.ch/downloads/ipblocklist_aggressive.txt"
|
||||
_TIMEOUT = 30
|
||||
|
||||
|
||||
class FeodoTrackerFeed(BaseFeed):
|
||||
"""Fetch C2 server IPs from the Feodo Tracker blocklist."""
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "feodotracker"
|
||||
|
||||
def fetch(self) -> List[Dict[str, Any]]:
|
||||
"""Download the recommended IP blocklist.
|
||||
|
||||
Returns a list of dicts, each with:
|
||||
``ioc_type="ip"``, ``value``, ``threat_name``, ``type``, ``source``.
|
||||
"""
|
||||
self._rate_limit_wait()
|
||||
self._log("Downloading Feodo Tracker IP blocklist")
|
||||
|
||||
try:
|
||||
resp = requests.get(_BLOCKLIST_URL, timeout=_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
self._error("Download failed: %s", exc)
|
||||
return []
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
for line in resp.text.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
# Basic IPv4 validation.
|
||||
parts = line.split(".")
|
||||
if len(parts) != 4:
|
||||
continue
|
||||
try:
|
||||
if not all(0 <= int(p) <= 255 for p in parts):
|
||||
continue
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
results.append({
|
||||
"ioc_type": "ip",
|
||||
"value": line,
|
||||
"threat_name": "Botnet.C2.Feodo",
|
||||
"type": "C2",
|
||||
"source": "feodotracker",
|
||||
"details": "Verified botnet C2 IP from Feodo Tracker",
|
||||
})
|
||||
|
||||
self._log("Fetched %d C2 IP(s)", len(results))
|
||||
self._mark_updated()
|
||||
return results
|
||||
174
ayn-antivirus/ayn_antivirus/signatures/feeds/malwarebazaar.py
Normal file
174
ayn-antivirus/ayn_antivirus/signatures/feeds/malwarebazaar.py
Normal file
@@ -0,0 +1,174 @@
|
||||
"""MalwareBazaar feed for AYN Antivirus.
|
||||
|
||||
Fetches recent malware sample hashes from the abuse.ch MalwareBazaar
|
||||
CSV export (free, no API key required).
|
||||
|
||||
CSV export: https://bazaar.abuse.ch/export/
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from ayn_antivirus.signatures.feeds.base_feed import BaseFeed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_CSV_RECENT_URL = "https://bazaar.abuse.ch/export/csv/recent/"
|
||||
_CSV_FULL_URL = "https://bazaar.abuse.ch/export/csv/full/"
|
||||
_API_URL = "https://mb-api.abuse.ch/api/v1/"
|
||||
_TIMEOUT = 60
|
||||
|
||||
|
||||
class MalwareBazaarFeed(BaseFeed):
|
||||
"""Fetch malware SHA-256 hashes from MalwareBazaar.
|
||||
|
||||
Uses the free CSV export by default. Falls back to JSON API
|
||||
if an api_key is provided.
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, **kwargs: Any) -> None:
|
||||
super().__init__(**kwargs)
|
||||
self.api_key = api_key
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "malwarebazaar"
|
||||
|
||||
def fetch(self) -> List[Dict[str, Any]]:
|
||||
"""Fetch recent malware hashes from CSV export."""
|
||||
return self._fetch_csv(_CSV_RECENT_URL)
|
||||
|
||||
def fetch_recent(self, hours: int = 24) -> List[Dict[str, Any]]:
|
||||
"""Fetch recent samples. CSV export returns last ~1000 samples."""
|
||||
return self._fetch_csv(_CSV_RECENT_URL)
|
||||
|
||||
def _fetch_csv(self, url: str) -> List[Dict[str, Any]]:
|
||||
"""Download and parse the MalwareBazaar CSV export."""
|
||||
self._rate_limit_wait()
|
||||
self._log("Fetching hashes from %s", url)
|
||||
|
||||
try:
|
||||
resp = requests.get(url, timeout=_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
self._error("CSV download failed: %s", exc)
|
||||
return []
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
lines = [
|
||||
line for line in resp.text.splitlines()
|
||||
if line.strip() and not line.startswith("#")
|
||||
]
|
||||
|
||||
reader = csv.reader(io.StringIO("\n".join(lines)))
|
||||
for row in reader:
|
||||
if len(row) < 8:
|
||||
continue
|
||||
# CSV columns:
|
||||
# 0: first_seen, 1: sha256, 2: md5, 3: sha1,
|
||||
# 4: reporter, 5: filename, 6: file_type, 7: mime_type,
|
||||
# 8+: signature, ...
|
||||
sha256 = row[1].strip().strip('"')
|
||||
if not sha256 or len(sha256) != 64:
|
||||
continue
|
||||
|
||||
filename = row[5].strip().strip('"') if len(row) > 5 else ""
|
||||
file_type = row[6].strip().strip('"') if len(row) > 6 else ""
|
||||
signature = row[8].strip().strip('"') if len(row) > 8 else ""
|
||||
reporter = row[4].strip().strip('"') if len(row) > 4 else ""
|
||||
|
||||
threat_name = (
|
||||
signature
|
||||
if signature and signature not in ("null", "n/a", "None", "")
|
||||
else f"Malware.{_map_type_name(file_type)}"
|
||||
)
|
||||
|
||||
results.append({
|
||||
"hash": sha256.lower(),
|
||||
"threat_name": threat_name,
|
||||
"threat_type": _map_type(file_type),
|
||||
"severity": "HIGH",
|
||||
"source": "malwarebazaar",
|
||||
"details": (
|
||||
f"file={filename}, type={file_type}, reporter={reporter}"
|
||||
),
|
||||
})
|
||||
|
||||
self._log("Parsed %d hash signature(s) from CSV", len(results))
|
||||
self._mark_updated()
|
||||
return results
|
||||
|
||||
def fetch_by_tag(self, tag: str) -> List[Dict[str, Any]]:
|
||||
"""Fetch samples by tag (requires API key, falls back to empty)."""
|
||||
if not self.api_key:
|
||||
self._warn("fetch_by_tag requires API key")
|
||||
return []
|
||||
|
||||
self._rate_limit_wait()
|
||||
payload = {"query": "get_taginfo", "tag": tag, "limit": 100}
|
||||
if self.api_key:
|
||||
payload["api_key"] = self.api_key
|
||||
|
||||
try:
|
||||
resp = requests.post(_API_URL, data=payload, timeout=_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except requests.RequestException as exc:
|
||||
self._error("API request failed: %s", exc)
|
||||
return []
|
||||
|
||||
if data.get("query_status") != "ok":
|
||||
return []
|
||||
|
||||
results = []
|
||||
for entry in data.get("data", []):
|
||||
sha256 = entry.get("sha256_hash", "")
|
||||
if not sha256:
|
||||
continue
|
||||
results.append({
|
||||
"hash": sha256.lower(),
|
||||
"threat_name": entry.get("signature") or f"Malware.{tag}",
|
||||
"threat_type": _map_type(entry.get("file_type", "")),
|
||||
"severity": "HIGH",
|
||||
"source": "malwarebazaar",
|
||||
"details": f"tag={tag}, file_type={entry.get('file_type', '')}",
|
||||
})
|
||||
self._mark_updated()
|
||||
return results
|
||||
|
||||
|
||||
def _map_type(file_type: str) -> str:
|
||||
ft = file_type.lower()
|
||||
if any(x in ft for x in ("exe", "dll", "elf", "pe32")):
|
||||
return "MALWARE"
|
||||
if any(x in ft for x in ("doc", "xls", "pdf", "rtf")):
|
||||
return "MALWARE"
|
||||
if any(x in ft for x in ("script", "js", "vbs", "ps1", "bat", "sh")):
|
||||
return "MALWARE"
|
||||
return "MALWARE"
|
||||
|
||||
|
||||
def _map_type_name(file_type: str) -> str:
|
||||
"""Map file type to a readable threat name suffix."""
|
||||
ft = file_type.lower().strip()
|
||||
m = {
|
||||
"exe": "Win32.Executable", "dll": "Win32.DLL", "msi": "Win32.Installer",
|
||||
"elf": "Linux.ELF", "so": "Linux.SharedLib",
|
||||
"doc": "Office.Document", "docx": "Office.Document",
|
||||
"xls": "Office.Spreadsheet", "xlsx": "Office.Spreadsheet",
|
||||
"pdf": "PDF.Document", "rtf": "Office.RTF",
|
||||
"js": "Script.JavaScript", "vbs": "Script.VBScript",
|
||||
"ps1": "Script.PowerShell", "bat": "Script.Batch",
|
||||
"sh": "Script.Shell", "py": "Script.Python",
|
||||
"apk": "Android.APK", "ipa": "iOS.IPA",
|
||||
"app": "macOS.App", "pkg": "macOS.Pkg", "dmg": "macOS.DMG",
|
||||
"rar": "Archive.RAR", "zip": "Archive.ZIP",
|
||||
"7z": "Archive.7Z", "tar": "Archive.TAR", "gz": "Archive.GZ",
|
||||
"iso": "DiskImage.ISO", "img": "DiskImage.IMG",
|
||||
}
|
||||
return m.get(ft, "Generic")
|
||||
117
ayn-antivirus/ayn_antivirus/signatures/feeds/threatfox.py
Normal file
117
ayn-antivirus/ayn_antivirus/signatures/feeds/threatfox.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""ThreatFox feed for AYN Antivirus.
|
||||
|
||||
Fetches IOCs (IPs, domains, URLs, hashes) from the abuse.ch ThreatFox
|
||||
CSV export (free, no API key required).
|
||||
|
||||
CSV export: https://threatfox.abuse.ch/export/
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
import logging
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from ayn_antivirus.signatures.feeds.base_feed import BaseFeed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_CSV_RECENT_URL = "https://threatfox.abuse.ch/export/csv/recent/"
|
||||
_CSV_FULL_URL = "https://threatfox.abuse.ch/export/csv/full/"
|
||||
_TIMEOUT = 60
|
||||
|
||||
|
||||
class ThreatFoxFeed(BaseFeed):
|
||||
"""Fetch IOCs from ThreatFox CSV export."""
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "threatfox"
|
||||
|
||||
def fetch(self) -> List[Dict[str, Any]]:
|
||||
return self.fetch_recent()
|
||||
|
||||
def fetch_recent(self, days: int = 7) -> List[Dict[str, Any]]:
|
||||
"""Fetch recent IOCs from CSV export."""
|
||||
self._rate_limit_wait()
|
||||
self._log("Fetching IOCs from CSV export")
|
||||
|
||||
try:
|
||||
resp = requests.get(_CSV_RECENT_URL, timeout=_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
self._error("CSV download failed: %s", exc)
|
||||
return []
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
lines = [l for l in resp.text.splitlines() if l.strip() and not l.startswith("#")]
|
||||
reader = csv.reader(io.StringIO("\n".join(lines)))
|
||||
|
||||
for row in reader:
|
||||
if len(row) < 6:
|
||||
continue
|
||||
# CSV: 0:first_seen, 1:ioc_id, 2:ioc_value, 3:ioc_type,
|
||||
# 4:threat_type, 5:malware, 6:malware_alias,
|
||||
# 7:malware_printable, 8:last_seen, 9:confidence,
|
||||
# 10:reference, 11:tags, 12:reporter
|
||||
ioc_value = row[2].strip().strip('"')
|
||||
ioc_type_raw = row[3].strip().strip('"').lower()
|
||||
threat_type = row[4].strip().strip('"') if len(row) > 4 else ""
|
||||
malware = row[5].strip().strip('"') if len(row) > 5 else ""
|
||||
malware_printable = row[7].strip().strip('"') if len(row) > 7 else ""
|
||||
confidence = row[9].strip().strip('"') if len(row) > 9 else "0"
|
||||
|
||||
if not ioc_value:
|
||||
continue
|
||||
|
||||
# Classify IOC type
|
||||
ioc_type = _classify_ioc(ioc_type_raw, ioc_value)
|
||||
threat_name = malware_printable or malware or "Unknown"
|
||||
|
||||
# Hash IOCs go into hash DB
|
||||
if ioc_type == "hash":
|
||||
results.append({
|
||||
"hash": ioc_value.lower(),
|
||||
"threat_name": threat_name,
|
||||
"threat_type": "MALWARE",
|
||||
"severity": "HIGH",
|
||||
"source": "threatfox",
|
||||
"details": f"threat={threat_type}, confidence={confidence}",
|
||||
})
|
||||
else:
|
||||
clean_value = ioc_value
|
||||
if ioc_type == "ip" and ":" in ioc_value:
|
||||
clean_value = ioc_value.rsplit(":", 1)[0]
|
||||
|
||||
results.append({
|
||||
"ioc_type": ioc_type,
|
||||
"value": clean_value,
|
||||
"threat_name": threat_name,
|
||||
"type": threat_type or "C2",
|
||||
"source": "threatfox",
|
||||
"confidence": int(confidence) if confidence.isdigit() else 0,
|
||||
})
|
||||
|
||||
self._log("Fetched %d IOC(s)", len(results))
|
||||
self._mark_updated()
|
||||
return results
|
||||
|
||||
|
||||
def _classify_ioc(raw_type: str, value: str) -> str:
|
||||
if "ip" in raw_type:
|
||||
return "ip"
|
||||
if "domain" in raw_type:
|
||||
return "domain"
|
||||
if "url" in raw_type:
|
||||
return "url"
|
||||
if "hash" in raw_type or "sha256" in raw_type or "md5" in raw_type:
|
||||
return "hash"
|
||||
if value.startswith("http://") or value.startswith("https://"):
|
||||
return "url"
|
||||
if len(value) == 64 and all(c in "0123456789abcdef" for c in value.lower()):
|
||||
return "hash"
|
||||
if ":" in value and value.replace(".", "").replace(":", "").isdigit():
|
||||
return "ip"
|
||||
return "domain"
|
||||
131
ayn-antivirus/ayn_antivirus/signatures/feeds/urlhaus.py
Normal file
131
ayn-antivirus/ayn_antivirus/signatures/feeds/urlhaus.py
Normal file
@@ -0,0 +1,131 @@
|
||||
"""URLhaus feed for AYN Antivirus.
|
||||
|
||||
Fetches malicious URLs and payload hashes from the abuse.ch URLhaus
|
||||
CSV/text exports (free, no API key required).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
import logging
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from ayn_antivirus.signatures.feeds.base_feed import BaseFeed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_CSV_RECENT_URL = "https://urlhaus.abuse.ch/downloads/csv_recent/"
|
||||
_TEXT_ONLINE_URL = "https://urlhaus.abuse.ch/downloads/text_online/"
|
||||
_PAYLOAD_RECENT_URL = "https://urlhaus.abuse.ch/downloads/payloads_recent/"
|
||||
_TIMEOUT = 60
|
||||
|
||||
|
||||
class URLHausFeed(BaseFeed):
|
||||
"""Fetch malware URLs and payload hashes from URLhaus."""
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "urlhaus"
|
||||
|
||||
def fetch(self) -> List[Dict[str, Any]]:
|
||||
results = self.fetch_recent()
|
||||
results.extend(self.fetch_payloads())
|
||||
return results
|
||||
|
||||
def fetch_recent(self) -> List[Dict[str, Any]]:
|
||||
"""Fetch recent malicious URLs from CSV export."""
|
||||
self._rate_limit_wait()
|
||||
self._log("Fetching recent URLs from CSV export")
|
||||
|
||||
try:
|
||||
resp = requests.get(_CSV_RECENT_URL, timeout=_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
self._error("CSV download failed: %s", exc)
|
||||
return []
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
lines = [l for l in resp.text.splitlines() if l.strip() and not l.startswith("#")]
|
||||
reader = csv.reader(io.StringIO("\n".join(lines)))
|
||||
for row in reader:
|
||||
if len(row) < 4:
|
||||
continue
|
||||
# 0:id, 1:dateadded, 2:url, 3:url_status, 4:threat, 5:tags, 6:urlhaus_link, 7:reporter
|
||||
url = row[2].strip().strip('"')
|
||||
if not url or not url.startswith("http"):
|
||||
continue
|
||||
threat = row[4].strip().strip('"') if len(row) > 4 else ""
|
||||
results.append({
|
||||
"ioc_type": "url",
|
||||
"value": url,
|
||||
"threat_name": threat if threat and threat != "None" else "Malware.Distribution",
|
||||
"type": "malware_distribution",
|
||||
"source": "urlhaus",
|
||||
})
|
||||
|
||||
self._log("Fetched %d URL(s)", len(results))
|
||||
self._mark_updated()
|
||||
return results
|
||||
|
||||
def fetch_payloads(self) -> List[Dict[str, Any]]:
|
||||
"""Fetch recent payload hashes (SHA256) from URLhaus."""
|
||||
self._rate_limit_wait()
|
||||
self._log("Fetching payload hashes")
|
||||
|
||||
try:
|
||||
resp = requests.get(_PAYLOAD_RECENT_URL, timeout=_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
self._error("Payload download failed: %s", exc)
|
||||
return []
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
lines = [l for l in resp.text.splitlines() if l.strip() and not l.startswith("#")]
|
||||
reader = csv.reader(io.StringIO("\n".join(lines)))
|
||||
for row in reader:
|
||||
if len(row) < 7:
|
||||
continue
|
||||
# 0:first_seen, 1:url, 2:file_type, 3:md5, 4:sha256, 5:signature
|
||||
sha256 = row[4].strip().strip('"') if len(row) > 4 else ""
|
||||
if not sha256 or len(sha256) != 64:
|
||||
continue
|
||||
sig = row[5].strip().strip('"') if len(row) > 5 else ""
|
||||
results.append({
|
||||
"hash": sha256.lower(),
|
||||
"threat_name": sig if sig and sig != "None" else "Malware.URLhaus.Payload",
|
||||
"threat_type": "MALWARE",
|
||||
"severity": "HIGH",
|
||||
"source": "urlhaus",
|
||||
"details": f"file_type={row[2].strip()}" if len(row) > 2 else "",
|
||||
})
|
||||
|
||||
self._log("Fetched %d payload hash(es)", len(results))
|
||||
return results
|
||||
|
||||
def fetch_active(self) -> List[Dict[str, Any]]:
|
||||
"""Fetch currently-active malware URLs."""
|
||||
self._rate_limit_wait()
|
||||
try:
|
||||
resp = requests.get(_TEXT_ONLINE_URL, timeout=_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
self._error("Download failed: %s", exc)
|
||||
return []
|
||||
|
||||
results = []
|
||||
for line in resp.text.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
results.append({
|
||||
"ioc_type": "url",
|
||||
"value": line,
|
||||
"threat_name": "Malware.Distribution.Active",
|
||||
"type": "malware_distribution",
|
||||
"source": "urlhaus",
|
||||
})
|
||||
self._log("Fetched %d active URL(s)", len(results))
|
||||
self._mark_updated()
|
||||
return results
|
||||
114
ayn-antivirus/ayn_antivirus/signatures/feeds/virusshare.py
Normal file
114
ayn-antivirus/ayn_antivirus/signatures/feeds/virusshare.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""VirusShare feed for AYN Antivirus.
|
||||
|
||||
Downloads MD5 hash lists from VirusShare.com — one of the largest
|
||||
free malware hash databases. Each list contains 65,536 MD5 hashes
|
||||
of known malware samples (.exe, .dll, .rar, .doc, .pdf, .app, etc).
|
||||
|
||||
https://virusshare.com/hashes
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from ayn_antivirus.signatures.feeds.base_feed import BaseFeed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_BASE_URL = "https://virusshare.com/hashfiles/VirusShare_{:05d}.md5"
|
||||
_TIMEOUT = 30
|
||||
_STATE_FILE = "/var/lib/ayn-antivirus/.virusshare_last"
|
||||
|
||||
|
||||
class VirusShareFeed(BaseFeed):
|
||||
"""Fetch malware MD5 hashes from VirusShare.
|
||||
|
||||
Tracks the last downloaded list number so incremental updates
|
||||
only fetch new lists.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
super().__init__(**kwargs)
|
||||
self._last_list = self._load_state()
|
||||
|
||||
def get_name(self) -> str:
|
||||
return "virusshare"
|
||||
|
||||
def fetch(self) -> List[Dict[str, Any]]:
|
||||
"""Fetch new hash lists since last update."""
|
||||
return self.fetch_new_lists(max_lists=3)
|
||||
|
||||
def fetch_new_lists(self, max_lists: int = 3) -> List[Dict[str, Any]]:
|
||||
"""Download up to max_lists new VirusShare hash files."""
|
||||
results: List[Dict[str, Any]] = []
|
||||
start = self._last_list + 1
|
||||
fetched = 0
|
||||
|
||||
for i in range(start, start + max_lists):
|
||||
self._rate_limit_wait()
|
||||
url = _BASE_URL.format(i)
|
||||
self._log("Fetching VirusShare_%05d", i)
|
||||
|
||||
try:
|
||||
resp = requests.get(url, timeout=_TIMEOUT)
|
||||
if resp.status_code == 404:
|
||||
self._log("VirusShare_%05d not found — at latest", i)
|
||||
break
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
self._error("Failed to fetch list %d: %s", i, exc)
|
||||
break
|
||||
|
||||
hashes = [
|
||||
line.strip()
|
||||
for line in resp.text.splitlines()
|
||||
if line.strip() and not line.startswith("#") and len(line.strip()) == 32
|
||||
]
|
||||
|
||||
for h in hashes:
|
||||
results.append({
|
||||
"hash": h.lower(),
|
||||
"threat_name": "Malware.VirusShare",
|
||||
"threat_type": "MALWARE",
|
||||
"severity": "HIGH",
|
||||
"source": "virusshare",
|
||||
"details": f"md5,list={i:05d}",
|
||||
})
|
||||
|
||||
self._last_list = i
|
||||
self._save_state(i)
|
||||
fetched += 1
|
||||
self._log("VirusShare_%05d: %d hashes", i, len(hashes))
|
||||
|
||||
self._log("Fetched %d list(s), %d total hashes", fetched, len(results))
|
||||
if results:
|
||||
self._mark_updated()
|
||||
return results
|
||||
|
||||
def fetch_initial(self, start_list: int = 470, count: int = 11) -> List[Dict[str, Any]]:
|
||||
"""Bulk download for initial setup."""
|
||||
old = self._last_list
|
||||
self._last_list = start_list - 1
|
||||
results = self.fetch_new_lists(max_lists=count)
|
||||
if not results:
|
||||
self._last_list = old
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def _load_state() -> int:
|
||||
try:
|
||||
return int(Path(_STATE_FILE).read_text().strip())
|
||||
except Exception:
|
||||
return 480 # Default: start after list 480
|
||||
|
||||
@staticmethod
|
||||
def _save_state(n: int) -> None:
|
||||
try:
|
||||
Path(_STATE_FILE).write_text(str(n))
|
||||
except Exception:
|
||||
pass
|
||||
320
ayn-antivirus/ayn_antivirus/signatures/manager.py
Normal file
320
ayn-antivirus/ayn_antivirus/signatures/manager.py
Normal file
@@ -0,0 +1,320 @@
|
||||
"""Signature manager for AYN Antivirus.
|
||||
|
||||
Orchestrates all threat-intelligence feeds, routes fetched entries into the
|
||||
correct database (hash DB or IOC DB), and exposes high-level update /
|
||||
status / integrity operations for the CLI and scheduler.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from ayn_antivirus.config import Config
|
||||
from ayn_antivirus.constants import DEFAULT_DB_PATH
|
||||
from ayn_antivirus.core.event_bus import EventType, event_bus
|
||||
from ayn_antivirus.signatures.db.hash_db import HashDatabase
|
||||
from ayn_antivirus.signatures.db.ioc_db import IOCDatabase
|
||||
from ayn_antivirus.signatures.feeds.base_feed import BaseFeed
|
||||
from ayn_antivirus.signatures.feeds.emergingthreats import EmergingThreatsFeed
|
||||
from ayn_antivirus.signatures.feeds.feodotracker import FeodoTrackerFeed
|
||||
from ayn_antivirus.signatures.feeds.malwarebazaar import MalwareBazaarFeed
|
||||
from ayn_antivirus.signatures.feeds.threatfox import ThreatFoxFeed
|
||||
from ayn_antivirus.signatures.feeds.urlhaus import URLHausFeed
|
||||
from ayn_antivirus.signatures.feeds.virusshare import VirusShareFeed
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SignatureManager:
|
||||
"""Central coordinator for signature / IOC updates.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
config:
|
||||
Application configuration.
|
||||
db_path:
|
||||
Override the database path from config.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: Config,
|
||||
db_path: Optional[str | Path] = None,
|
||||
) -> None:
|
||||
self.config = config
|
||||
self._db_path = Path(db_path or config.db_path)
|
||||
|
||||
# Databases.
|
||||
self.hash_db = HashDatabase(self._db_path)
|
||||
self.ioc_db = IOCDatabase(self._db_path)
|
||||
|
||||
# Feeds — instantiated lazily so missing API keys don't crash init.
|
||||
self._feeds: Dict[str, BaseFeed] = {}
|
||||
self._init_feeds()
|
||||
|
||||
# Auto-update thread handle.
|
||||
self._auto_update_stop = threading.Event()
|
||||
self._auto_update_thread: Optional[threading.Thread] = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Feed registry
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _init_feeds(self) -> None:
|
||||
"""Register the built-in feeds."""
|
||||
api_keys = self.config.api_keys
|
||||
|
||||
self._feeds["malwarebazaar"] = MalwareBazaarFeed(
|
||||
api_key=api_keys.get("malwarebazaar"),
|
||||
)
|
||||
self._feeds["threatfox"] = ThreatFoxFeed()
|
||||
self._feeds["urlhaus"] = URLHausFeed()
|
||||
self._feeds["feodotracker"] = FeodoTrackerFeed()
|
||||
self._feeds["emergingthreats"] = EmergingThreatsFeed()
|
||||
self._feeds["virusshare"] = VirusShareFeed()
|
||||
|
||||
@property
|
||||
def feed_names(self) -> List[str]:
|
||||
return list(self._feeds.keys())
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Update operations
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def update_all(self) -> Dict[str, Any]:
|
||||
"""Fetch from every registered feed and store results.
|
||||
|
||||
Returns a summary dict with per-feed statistics.
|
||||
"""
|
||||
self.hash_db.initialize()
|
||||
self.ioc_db.initialize()
|
||||
|
||||
summary: Dict[str, Any] = {"feeds": {}, "total_new": 0, "errors": []}
|
||||
|
||||
for name, feed in self._feeds.items():
|
||||
try:
|
||||
stats = self._update_single(name, feed)
|
||||
summary["feeds"][name] = stats
|
||||
summary["total_new"] += stats.get("inserted", 0)
|
||||
except Exception as exc:
|
||||
logger.exception("Feed '%s' failed", name)
|
||||
summary["feeds"][name] = {"error": str(exc)}
|
||||
summary["errors"].append(name)
|
||||
|
||||
event_bus.publish(EventType.SIGNATURE_UPDATED, {
|
||||
"source": "manager",
|
||||
"feeds_updated": len(summary["feeds"]) - len(summary["errors"]),
|
||||
"total_new": summary["total_new"],
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Signature update complete: %d feed(s), %d new entries, %d error(s)",
|
||||
len(self._feeds),
|
||||
summary["total_new"],
|
||||
len(summary["errors"]),
|
||||
)
|
||||
return summary
|
||||
|
||||
def update_feed(self, feed_name: str) -> Dict[str, Any]:
|
||||
"""Update a single feed by name.
|
||||
|
||||
Raises ``KeyError`` if *feed_name* is not registered.
|
||||
"""
|
||||
if feed_name not in self._feeds:
|
||||
raise KeyError(f"Unknown feed: {feed_name!r} (available: {self.feed_names})")
|
||||
|
||||
self.hash_db.initialize()
|
||||
self.ioc_db.initialize()
|
||||
|
||||
feed = self._feeds[feed_name]
|
||||
stats = self._update_single(feed_name, feed)
|
||||
|
||||
event_bus.publish(EventType.SIGNATURE_UPDATED, {
|
||||
"source": "manager",
|
||||
"feed": feed_name,
|
||||
"inserted": stats.get("inserted", 0),
|
||||
})
|
||||
|
||||
return stats
|
||||
|
||||
def _update_single(self, name: str, feed: BaseFeed) -> Dict[str, Any]:
|
||||
"""Fetch from one feed and route entries to the right DB."""
|
||||
logger.info("Updating feed: %s", name)
|
||||
entries = feed.fetch()
|
||||
|
||||
hashes_added = 0
|
||||
ips_added = 0
|
||||
domains_added = 0
|
||||
urls_added = 0
|
||||
|
||||
# Classify and batch entries.
|
||||
hash_rows = []
|
||||
ip_rows = []
|
||||
domain_rows = []
|
||||
url_rows = []
|
||||
|
||||
for entry in entries:
|
||||
ioc_type = entry.get("ioc_type")
|
||||
|
||||
if ioc_type is None:
|
||||
# Hash-based entry (from MalwareBazaar).
|
||||
hash_rows.append((
|
||||
entry.get("hash", ""),
|
||||
entry.get("threat_name", ""),
|
||||
entry.get("threat_type", "MALWARE"),
|
||||
entry.get("severity", "HIGH"),
|
||||
entry.get("source", name),
|
||||
entry.get("details", ""),
|
||||
))
|
||||
elif ioc_type == "ip":
|
||||
ip_rows.append((
|
||||
entry.get("value", ""),
|
||||
entry.get("threat_name", ""),
|
||||
entry.get("type", "C2"),
|
||||
entry.get("source", name),
|
||||
))
|
||||
elif ioc_type == "domain":
|
||||
domain_rows.append((
|
||||
entry.get("value", ""),
|
||||
entry.get("threat_name", ""),
|
||||
entry.get("type", "C2"),
|
||||
entry.get("source", name),
|
||||
))
|
||||
elif ioc_type == "url":
|
||||
url_rows.append((
|
||||
entry.get("value", ""),
|
||||
entry.get("threat_name", ""),
|
||||
entry.get("type", "malware_distribution"),
|
||||
entry.get("source", name),
|
||||
))
|
||||
|
||||
if hash_rows:
|
||||
hashes_added = self.hash_db.bulk_add(hash_rows)
|
||||
if ip_rows:
|
||||
ips_added = self.ioc_db.bulk_add_ips(ip_rows)
|
||||
if domain_rows:
|
||||
domains_added = self.ioc_db.bulk_add_domains(domain_rows)
|
||||
if url_rows:
|
||||
urls_added = self.ioc_db.bulk_add_urls(url_rows)
|
||||
|
||||
total = hashes_added + ips_added + domains_added + urls_added
|
||||
|
||||
# Persist last-update timestamp.
|
||||
self.hash_db.set_meta(f"feed_{name}_updated", datetime.utcnow().isoformat())
|
||||
|
||||
logger.info(
|
||||
"Feed '%s': %d hashes, %d IPs, %d domains, %d URLs",
|
||||
name, hashes_added, ips_added, domains_added, urls_added,
|
||||
)
|
||||
|
||||
return {
|
||||
"feed": name,
|
||||
"fetched": len(entries),
|
||||
"inserted": total,
|
||||
"hashes": hashes_added,
|
||||
"ips": ips_added,
|
||||
"domains": domains_added,
|
||||
"urls": urls_added,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Status
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Return per-feed last-update times and aggregate stats."""
|
||||
self.hash_db.initialize()
|
||||
self.ioc_db.initialize()
|
||||
|
||||
feed_status: Dict[str, Any] = {}
|
||||
for name in self._feeds:
|
||||
last = self.hash_db.get_meta(f"feed_{name}_updated")
|
||||
feed_status[name] = {
|
||||
"last_updated": last,
|
||||
}
|
||||
|
||||
return {
|
||||
"db_path": str(self._db_path),
|
||||
"hash_count": self.hash_db.count(),
|
||||
"hash_stats": self.hash_db.get_stats(),
|
||||
"ioc_stats": self.ioc_db.get_stats(),
|
||||
"feeds": feed_status,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Auto-update
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def auto_update(self, interval_hours: int = 6) -> None:
|
||||
"""Start a background thread that periodically calls :meth:`update_all`.
|
||||
|
||||
Call :meth:`stop_auto_update` to stop the thread.
|
||||
"""
|
||||
if self._auto_update_thread and self._auto_update_thread.is_alive():
|
||||
logger.warning("Auto-update thread is already running")
|
||||
return
|
||||
|
||||
self._auto_update_stop.clear()
|
||||
|
||||
def _loop() -> None:
|
||||
logger.info("Auto-update started (every %d hours)", interval_hours)
|
||||
while not self._auto_update_stop.is_set():
|
||||
try:
|
||||
self.update_all()
|
||||
except Exception:
|
||||
logger.exception("Auto-update cycle failed")
|
||||
self._auto_update_stop.wait(timeout=interval_hours * 3600)
|
||||
logger.info("Auto-update stopped")
|
||||
|
||||
self._auto_update_thread = threading.Thread(
|
||||
target=_loop, name="ayn-auto-update", daemon=True
|
||||
)
|
||||
self._auto_update_thread.start()
|
||||
|
||||
def stop_auto_update(self) -> None:
|
||||
"""Signal the auto-update thread to stop."""
|
||||
self._auto_update_stop.set()
|
||||
if self._auto_update_thread:
|
||||
self._auto_update_thread.join(timeout=5)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Integrity
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def verify_db_integrity(self) -> Dict[str, Any]:
|
||||
"""Run ``PRAGMA integrity_check`` on the database.
|
||||
|
||||
Returns a dict with ``ok`` (bool) and ``details`` (str).
|
||||
"""
|
||||
self.hash_db.initialize()
|
||||
|
||||
try:
|
||||
result = self.hash_db.conn.execute("PRAGMA integrity_check").fetchone()
|
||||
ok = result[0] == "ok" if result else False
|
||||
detail = result[0] if result else "no result"
|
||||
except sqlite3.DatabaseError as exc:
|
||||
ok = False
|
||||
detail = str(exc)
|
||||
|
||||
status = {"ok": ok, "details": detail}
|
||||
if not ok:
|
||||
logger.error("Database integrity check FAILED: %s", detail)
|
||||
else:
|
||||
logger.info("Database integrity check passed")
|
||||
return status
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Cleanup
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def close(self) -> None:
|
||||
"""Stop background threads and close databases."""
|
||||
self.stop_auto_update()
|
||||
self.hash_db.close()
|
||||
self.ioc_db.close()
|
||||
Reference in New Issue
Block a user