remove infra.md.example, infra.md is the source of truth
This commit is contained in:
251
ayn-antivirus/ayn_antivirus/signatures/db/hash_db.py
Normal file
251
ayn-antivirus/ayn_antivirus/signatures/db/hash_db.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""SQLite-backed malware hash database for AYN Antivirus.
|
||||
|
||||
Stores SHA-256 / MD5 hashes of known threats with associated metadata
|
||||
(threat name, type, severity, source feed) and provides efficient lookup,
|
||||
bulk-insert, search, and export operations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import logging
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
|
||||
from ayn_antivirus.constants import DEFAULT_DB_PATH
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema
|
||||
# ---------------------------------------------------------------------------
|
||||
_SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS threats (
|
||||
hash TEXT PRIMARY KEY,
|
||||
threat_name TEXT NOT NULL,
|
||||
threat_type TEXT NOT NULL DEFAULT 'MALWARE',
|
||||
severity TEXT NOT NULL DEFAULT 'HIGH',
|
||||
source TEXT NOT NULL DEFAULT '',
|
||||
added_date TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
details TEXT NOT NULL DEFAULT ''
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_threats_type ON threats(threat_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_threats_source ON threats(source);
|
||||
CREATE INDEX IF NOT EXISTS idx_threats_name ON threats(threat_name);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT
|
||||
);
|
||||
"""
|
||||
|
||||
|
||||
class HashDatabase:
|
||||
"""Manage a local SQLite database of known-malicious file hashes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_path:
|
||||
Path to the SQLite file. Created automatically (with parent dirs)
|
||||
if it doesn't exist.
|
||||
"""
|
||||
|
||||
def __init__(self, db_path: str | Path = DEFAULT_DB_PATH) -> None:
|
||||
self.db_path = Path(db_path)
|
||||
self._conn: Optional[sqlite3.Connection] = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def initialize(self) -> None:
|
||||
"""Open the database and create tables if necessary."""
|
||||
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
self._conn.execute("PRAGMA journal_mode=WAL")
|
||||
self._conn.executescript(_SCHEMA)
|
||||
self._conn.commit()
|
||||
logger.info("HashDatabase opened: %s (%d hashes)", self.db_path, self.count())
|
||||
|
||||
def close(self) -> None:
|
||||
"""Flush and close the database."""
|
||||
if self._conn:
|
||||
self._conn.close()
|
||||
self._conn = None
|
||||
|
||||
@property
|
||||
def conn(self) -> sqlite3.Connection:
|
||||
if self._conn is None:
|
||||
self.initialize()
|
||||
assert self._conn is not None
|
||||
return self._conn
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Single-record operations
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def add_hash(
|
||||
self,
|
||||
hash_str: str,
|
||||
threat_name: str,
|
||||
threat_type: str = "MALWARE",
|
||||
severity: str = "HIGH",
|
||||
source: str = "",
|
||||
details: str = "",
|
||||
) -> None:
|
||||
"""Insert or replace a single hash record."""
|
||||
self.conn.execute(
|
||||
"INSERT OR REPLACE INTO threats "
|
||||
"(hash, threat_name, threat_type, severity, source, added_date, details) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
hash_str.lower(),
|
||||
threat_name,
|
||||
threat_type,
|
||||
severity,
|
||||
source,
|
||||
datetime.utcnow().isoformat(),
|
||||
details,
|
||||
),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def lookup(self, hash_str: str) -> Optional[Dict[str, Any]]:
|
||||
"""Look up a hash and return its metadata, or ``None``."""
|
||||
row = self.conn.execute(
|
||||
"SELECT * FROM threats WHERE hash = ?", (hash_str.lower(),)
|
||||
).fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
return dict(row)
|
||||
|
||||
def remove(self, hash_str: str) -> bool:
|
||||
"""Delete a hash record. Returns ``True`` if a row was deleted."""
|
||||
cur = self.conn.execute(
|
||||
"DELETE FROM threats WHERE hash = ?", (hash_str.lower(),)
|
||||
)
|
||||
self.conn.commit()
|
||||
return cur.rowcount > 0
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Bulk operations
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def bulk_add(
|
||||
self,
|
||||
records: Sequence[Tuple[str, str, str, str, str, str]],
|
||||
) -> int:
|
||||
"""Efficiently insert new hashes in a single transaction.
|
||||
|
||||
Uses ``INSERT OR IGNORE`` so existing entries are preserved and
|
||||
only genuinely new hashes are counted.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
records:
|
||||
Sequence of ``(hash, threat_name, threat_type, severity, source, details)``
|
||||
tuples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
Number of **new** rows actually inserted.
|
||||
"""
|
||||
if not records:
|
||||
return 0
|
||||
now = datetime.utcnow().isoformat()
|
||||
rows = [
|
||||
(h.lower(), name, ttype, sev, src, now, det)
|
||||
for h, name, ttype, sev, src, det in records
|
||||
]
|
||||
before = self.count()
|
||||
self.conn.executemany(
|
||||
"INSERT OR IGNORE INTO threats "
|
||||
"(hash, threat_name, threat_type, severity, source, added_date, details) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
rows,
|
||||
)
|
||||
self.conn.commit()
|
||||
return self.count() - before
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Query helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def count(self) -> int:
|
||||
"""Total number of hashes in the database."""
|
||||
return self.conn.execute("SELECT COUNT(*) FROM threats").fetchone()[0]
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Return aggregate statistics about the database."""
|
||||
c = self.conn
|
||||
by_type = {
|
||||
row[0]: row[1]
|
||||
for row in c.execute(
|
||||
"SELECT threat_type, COUNT(*) FROM threats GROUP BY threat_type"
|
||||
).fetchall()
|
||||
}
|
||||
by_source = {
|
||||
row[0]: row[1]
|
||||
for row in c.execute(
|
||||
"SELECT source, COUNT(*) FROM threats GROUP BY source"
|
||||
).fetchall()
|
||||
}
|
||||
latest = c.execute(
|
||||
"SELECT MAX(added_date) FROM threats"
|
||||
).fetchone()[0]
|
||||
return {
|
||||
"total": self.count(),
|
||||
"by_type": by_type,
|
||||
"by_source": by_source,
|
||||
"latest_update": latest,
|
||||
}
|
||||
|
||||
def search(self, query: str) -> List[Dict[str, Any]]:
|
||||
"""Search threat names with a SQL LIKE pattern.
|
||||
|
||||
Example: ``search("%Trojan%")``
|
||||
"""
|
||||
rows = self.conn.execute(
|
||||
"SELECT * FROM threats WHERE threat_name LIKE ? ORDER BY added_date DESC LIMIT 500",
|
||||
(query,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Export
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def export_hashes(self, filepath: str | Path) -> int:
|
||||
"""Export all hashes to a CSV file. Returns the row count."""
|
||||
filepath = Path(filepath)
|
||||
filepath.parent.mkdir(parents=True, exist_ok=True)
|
||||
rows = self.conn.execute(
|
||||
"SELECT hash, threat_name, threat_type, severity, source, added_date, details "
|
||||
"FROM threats ORDER BY added_date DESC"
|
||||
).fetchall()
|
||||
with open(filepath, "w", newline="") as fh:
|
||||
writer = csv.writer(fh)
|
||||
writer.writerow(["hash", "threat_name", "threat_type", "severity", "source", "added_date", "details"])
|
||||
for row in rows:
|
||||
writer.writerow(list(row))
|
||||
return len(rows)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Meta helpers (used by manager to track feed state)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def set_meta(self, key: str, value: str) -> None:
|
||||
self.conn.execute(
|
||||
"INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)", (key, value)
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_meta(self, key: str) -> Optional[str]:
|
||||
row = self.conn.execute(
|
||||
"SELECT value FROM meta WHERE key = ?", (key,)
|
||||
).fetchone()
|
||||
return row[0] if row else None
|
||||
Reference in New Issue
Block a user