Files
calvana/ayn-antivirus/ayn_antivirus/signatures/feeds/urlhaus.py

132 lines
4.6 KiB
Python

"""URLhaus feed for AYN Antivirus.
Fetches malicious URLs and payload hashes from the abuse.ch URLhaus
CSV/text exports (free, no API key required).
"""
from __future__ import annotations
import csv
import io
import logging
from typing import Any, Dict, List
import requests
from ayn_antivirus.signatures.feeds.base_feed import BaseFeed
logger = logging.getLogger(__name__)
_CSV_RECENT_URL = "https://urlhaus.abuse.ch/downloads/csv_recent/"
_TEXT_ONLINE_URL = "https://urlhaus.abuse.ch/downloads/text_online/"
_PAYLOAD_RECENT_URL = "https://urlhaus.abuse.ch/downloads/payloads_recent/"
_TIMEOUT = 60
class URLHausFeed(BaseFeed):
"""Fetch malware URLs and payload hashes from URLhaus."""
def get_name(self) -> str:
return "urlhaus"
def fetch(self) -> List[Dict[str, Any]]:
results = self.fetch_recent()
results.extend(self.fetch_payloads())
return results
def fetch_recent(self) -> List[Dict[str, Any]]:
"""Fetch recent malicious URLs from CSV export."""
self._rate_limit_wait()
self._log("Fetching recent URLs from CSV export")
try:
resp = requests.get(_CSV_RECENT_URL, timeout=_TIMEOUT)
resp.raise_for_status()
except requests.RequestException as exc:
self._error("CSV download failed: %s", exc)
return []
results: List[Dict[str, Any]] = []
lines = [l for l in resp.text.splitlines() if l.strip() and not l.startswith("#")]
reader = csv.reader(io.StringIO("\n".join(lines)))
for row in reader:
if len(row) < 4:
continue
# 0:id, 1:dateadded, 2:url, 3:url_status, 4:threat, 5:tags, 6:urlhaus_link, 7:reporter
url = row[2].strip().strip('"')
if not url or not url.startswith("http"):
continue
threat = row[4].strip().strip('"') if len(row) > 4 else ""
results.append({
"ioc_type": "url",
"value": url,
"threat_name": threat if threat and threat != "None" else "Malware.Distribution",
"type": "malware_distribution",
"source": "urlhaus",
})
self._log("Fetched %d URL(s)", len(results))
self._mark_updated()
return results
def fetch_payloads(self) -> List[Dict[str, Any]]:
"""Fetch recent payload hashes (SHA256) from URLhaus."""
self._rate_limit_wait()
self._log("Fetching payload hashes")
try:
resp = requests.get(_PAYLOAD_RECENT_URL, timeout=_TIMEOUT)
resp.raise_for_status()
except requests.RequestException as exc:
self._error("Payload download failed: %s", exc)
return []
results: List[Dict[str, Any]] = []
lines = [l for l in resp.text.splitlines() if l.strip() and not l.startswith("#")]
reader = csv.reader(io.StringIO("\n".join(lines)))
for row in reader:
if len(row) < 7:
continue
# 0:first_seen, 1:url, 2:file_type, 3:md5, 4:sha256, 5:signature
sha256 = row[4].strip().strip('"') if len(row) > 4 else ""
if not sha256 or len(sha256) != 64:
continue
sig = row[5].strip().strip('"') if len(row) > 5 else ""
results.append({
"hash": sha256.lower(),
"threat_name": sig if sig and sig != "None" else "Malware.URLhaus.Payload",
"threat_type": "MALWARE",
"severity": "HIGH",
"source": "urlhaus",
"details": f"file_type={row[2].strip()}" if len(row) > 2 else "",
})
self._log("Fetched %d payload hash(es)", len(results))
return results
def fetch_active(self) -> List[Dict[str, Any]]:
"""Fetch currently-active malware URLs."""
self._rate_limit_wait()
try:
resp = requests.get(_TEXT_ONLINE_URL, timeout=_TIMEOUT)
resp.raise_for_status()
except requests.RequestException as exc:
self._error("Download failed: %s", exc)
return []
results = []
for line in resp.text.splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
results.append({
"ioc_type": "url",
"value": line,
"threat_name": "Malware.Distribution.Active",
"type": "malware_distribution",
"source": "urlhaus",
})
self._log("Fetched %d active URL(s)", len(results))
self._mark_updated()
return results