252 lines
8.4 KiB
Python
252 lines
8.4 KiB
Python
"""SQLite-backed malware hash database for AYN Antivirus.
|
|
|
|
Stores SHA-256 / MD5 hashes of known threats with associated metadata
|
|
(threat name, type, severity, source feed) and provides efficient lookup,
|
|
bulk-insert, search, and export operations.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
import logging
|
|
import sqlite3
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
|
|
|
from ayn_antivirus.constants import DEFAULT_DB_PATH
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Schema
|
|
# ---------------------------------------------------------------------------
|
|
_SCHEMA = """
|
|
CREATE TABLE IF NOT EXISTS threats (
|
|
hash TEXT PRIMARY KEY,
|
|
threat_name TEXT NOT NULL,
|
|
threat_type TEXT NOT NULL DEFAULT 'MALWARE',
|
|
severity TEXT NOT NULL DEFAULT 'HIGH',
|
|
source TEXT NOT NULL DEFAULT '',
|
|
added_date TEXT NOT NULL DEFAULT (datetime('now')),
|
|
details TEXT NOT NULL DEFAULT ''
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_threats_type ON threats(threat_type);
|
|
CREATE INDEX IF NOT EXISTS idx_threats_source ON threats(source);
|
|
CREATE INDEX IF NOT EXISTS idx_threats_name ON threats(threat_name);
|
|
|
|
CREATE TABLE IF NOT EXISTS meta (
|
|
key TEXT PRIMARY KEY,
|
|
value TEXT
|
|
);
|
|
"""
|
|
|
|
|
|
class HashDatabase:
|
|
"""Manage a local SQLite database of known-malicious file hashes.
|
|
|
|
Parameters
|
|
----------
|
|
db_path:
|
|
Path to the SQLite file. Created automatically (with parent dirs)
|
|
if it doesn't exist.
|
|
"""
|
|
|
|
def __init__(self, db_path: str | Path = DEFAULT_DB_PATH) -> None:
|
|
self.db_path = Path(db_path)
|
|
self._conn: Optional[sqlite3.Connection] = None
|
|
|
|
# ------------------------------------------------------------------
|
|
# Lifecycle
|
|
# ------------------------------------------------------------------
|
|
|
|
def initialize(self) -> None:
|
|
"""Open the database and create tables if necessary."""
|
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
|
|
self._conn.row_factory = sqlite3.Row
|
|
self._conn.execute("PRAGMA journal_mode=WAL")
|
|
self._conn.executescript(_SCHEMA)
|
|
self._conn.commit()
|
|
logger.info("HashDatabase opened: %s (%d hashes)", self.db_path, self.count())
|
|
|
|
def close(self) -> None:
|
|
"""Flush and close the database."""
|
|
if self._conn:
|
|
self._conn.close()
|
|
self._conn = None
|
|
|
|
@property
|
|
def conn(self) -> sqlite3.Connection:
|
|
if self._conn is None:
|
|
self.initialize()
|
|
assert self._conn is not None
|
|
return self._conn
|
|
|
|
# ------------------------------------------------------------------
|
|
# Single-record operations
|
|
# ------------------------------------------------------------------
|
|
|
|
def add_hash(
|
|
self,
|
|
hash_str: str,
|
|
threat_name: str,
|
|
threat_type: str = "MALWARE",
|
|
severity: str = "HIGH",
|
|
source: str = "",
|
|
details: str = "",
|
|
) -> None:
|
|
"""Insert or replace a single hash record."""
|
|
self.conn.execute(
|
|
"INSERT OR REPLACE INTO threats "
|
|
"(hash, threat_name, threat_type, severity, source, added_date, details) "
|
|
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
(
|
|
hash_str.lower(),
|
|
threat_name,
|
|
threat_type,
|
|
severity,
|
|
source,
|
|
datetime.utcnow().isoformat(),
|
|
details,
|
|
),
|
|
)
|
|
self.conn.commit()
|
|
|
|
def lookup(self, hash_str: str) -> Optional[Dict[str, Any]]:
|
|
"""Look up a hash and return its metadata, or ``None``."""
|
|
row = self.conn.execute(
|
|
"SELECT * FROM threats WHERE hash = ?", (hash_str.lower(),)
|
|
).fetchone()
|
|
if row is None:
|
|
return None
|
|
return dict(row)
|
|
|
|
def remove(self, hash_str: str) -> bool:
|
|
"""Delete a hash record. Returns ``True`` if a row was deleted."""
|
|
cur = self.conn.execute(
|
|
"DELETE FROM threats WHERE hash = ?", (hash_str.lower(),)
|
|
)
|
|
self.conn.commit()
|
|
return cur.rowcount > 0
|
|
|
|
# ------------------------------------------------------------------
|
|
# Bulk operations
|
|
# ------------------------------------------------------------------
|
|
|
|
def bulk_add(
|
|
self,
|
|
records: Sequence[Tuple[str, str, str, str, str, str]],
|
|
) -> int:
|
|
"""Efficiently insert new hashes in a single transaction.
|
|
|
|
Uses ``INSERT OR IGNORE`` so existing entries are preserved and
|
|
only genuinely new hashes are counted.
|
|
|
|
Parameters
|
|
----------
|
|
records:
|
|
Sequence of ``(hash, threat_name, threat_type, severity, source, details)``
|
|
tuples.
|
|
|
|
Returns
|
|
-------
|
|
int
|
|
Number of **new** rows actually inserted.
|
|
"""
|
|
if not records:
|
|
return 0
|
|
now = datetime.utcnow().isoformat()
|
|
rows = [
|
|
(h.lower(), name, ttype, sev, src, now, det)
|
|
for h, name, ttype, sev, src, det in records
|
|
]
|
|
before = self.count()
|
|
self.conn.executemany(
|
|
"INSERT OR IGNORE INTO threats "
|
|
"(hash, threat_name, threat_type, severity, source, added_date, details) "
|
|
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
rows,
|
|
)
|
|
self.conn.commit()
|
|
return self.count() - before
|
|
|
|
# ------------------------------------------------------------------
|
|
# Query helpers
|
|
# ------------------------------------------------------------------
|
|
|
|
def count(self) -> int:
|
|
"""Total number of hashes in the database."""
|
|
return self.conn.execute("SELECT COUNT(*) FROM threats").fetchone()[0]
|
|
|
|
def get_stats(self) -> Dict[str, Any]:
|
|
"""Return aggregate statistics about the database."""
|
|
c = self.conn
|
|
by_type = {
|
|
row[0]: row[1]
|
|
for row in c.execute(
|
|
"SELECT threat_type, COUNT(*) FROM threats GROUP BY threat_type"
|
|
).fetchall()
|
|
}
|
|
by_source = {
|
|
row[0]: row[1]
|
|
for row in c.execute(
|
|
"SELECT source, COUNT(*) FROM threats GROUP BY source"
|
|
).fetchall()
|
|
}
|
|
latest = c.execute(
|
|
"SELECT MAX(added_date) FROM threats"
|
|
).fetchone()[0]
|
|
return {
|
|
"total": self.count(),
|
|
"by_type": by_type,
|
|
"by_source": by_source,
|
|
"latest_update": latest,
|
|
}
|
|
|
|
def search(self, query: str) -> List[Dict[str, Any]]:
|
|
"""Search threat names with a SQL LIKE pattern.
|
|
|
|
Example: ``search("%Trojan%")``
|
|
"""
|
|
rows = self.conn.execute(
|
|
"SELECT * FROM threats WHERE threat_name LIKE ? ORDER BY added_date DESC LIMIT 500",
|
|
(query,),
|
|
).fetchall()
|
|
return [dict(r) for r in rows]
|
|
|
|
# ------------------------------------------------------------------
|
|
# Export
|
|
# ------------------------------------------------------------------
|
|
|
|
def export_hashes(self, filepath: str | Path) -> int:
|
|
"""Export all hashes to a CSV file. Returns the row count."""
|
|
filepath = Path(filepath)
|
|
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
rows = self.conn.execute(
|
|
"SELECT hash, threat_name, threat_type, severity, source, added_date, details "
|
|
"FROM threats ORDER BY added_date DESC"
|
|
).fetchall()
|
|
with open(filepath, "w", newline="") as fh:
|
|
writer = csv.writer(fh)
|
|
writer.writerow(["hash", "threat_name", "threat_type", "severity", "source", "added_date", "details"])
|
|
for row in rows:
|
|
writer.writerow(list(row))
|
|
return len(rows)
|
|
|
|
# ------------------------------------------------------------------
|
|
# Meta helpers (used by manager to track feed state)
|
|
# ------------------------------------------------------------------
|
|
|
|
def set_meta(self, key: str, value: str) -> None:
|
|
self.conn.execute(
|
|
"INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)", (key, value)
|
|
)
|
|
self.conn.commit()
|
|
|
|
def get_meta(self, key: str) -> Optional[str]:
|
|
row = self.conn.execute(
|
|
"SELECT value FROM meta WHERE key = ?", (key,)
|
|
).fetchone()
|
|
return row[0] if row else None
|