132 lines
4.6 KiB
Python
132 lines
4.6 KiB
Python
"""URLhaus feed for AYN Antivirus.
|
|
|
|
Fetches malicious URLs and payload hashes from the abuse.ch URLhaus
|
|
CSV/text exports (free, no API key required).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
import io
|
|
import logging
|
|
from typing import Any, Dict, List
|
|
|
|
import requests
|
|
|
|
from ayn_antivirus.signatures.feeds.base_feed import BaseFeed
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_CSV_RECENT_URL = "https://urlhaus.abuse.ch/downloads/csv_recent/"
|
|
_TEXT_ONLINE_URL = "https://urlhaus.abuse.ch/downloads/text_online/"
|
|
_PAYLOAD_RECENT_URL = "https://urlhaus.abuse.ch/downloads/payloads_recent/"
|
|
_TIMEOUT = 60
|
|
|
|
|
|
class URLHausFeed(BaseFeed):
|
|
"""Fetch malware URLs and payload hashes from URLhaus."""
|
|
|
|
def get_name(self) -> str:
|
|
return "urlhaus"
|
|
|
|
def fetch(self) -> List[Dict[str, Any]]:
|
|
results = self.fetch_recent()
|
|
results.extend(self.fetch_payloads())
|
|
return results
|
|
|
|
def fetch_recent(self) -> List[Dict[str, Any]]:
|
|
"""Fetch recent malicious URLs from CSV export."""
|
|
self._rate_limit_wait()
|
|
self._log("Fetching recent URLs from CSV export")
|
|
|
|
try:
|
|
resp = requests.get(_CSV_RECENT_URL, timeout=_TIMEOUT)
|
|
resp.raise_for_status()
|
|
except requests.RequestException as exc:
|
|
self._error("CSV download failed: %s", exc)
|
|
return []
|
|
|
|
results: List[Dict[str, Any]] = []
|
|
lines = [l for l in resp.text.splitlines() if l.strip() and not l.startswith("#")]
|
|
reader = csv.reader(io.StringIO("\n".join(lines)))
|
|
for row in reader:
|
|
if len(row) < 4:
|
|
continue
|
|
# 0:id, 1:dateadded, 2:url, 3:url_status, 4:threat, 5:tags, 6:urlhaus_link, 7:reporter
|
|
url = row[2].strip().strip('"')
|
|
if not url or not url.startswith("http"):
|
|
continue
|
|
threat = row[4].strip().strip('"') if len(row) > 4 else ""
|
|
results.append({
|
|
"ioc_type": "url",
|
|
"value": url,
|
|
"threat_name": threat if threat and threat != "None" else "Malware.Distribution",
|
|
"type": "malware_distribution",
|
|
"source": "urlhaus",
|
|
})
|
|
|
|
self._log("Fetched %d URL(s)", len(results))
|
|
self._mark_updated()
|
|
return results
|
|
|
|
def fetch_payloads(self) -> List[Dict[str, Any]]:
|
|
"""Fetch recent payload hashes (SHA256) from URLhaus."""
|
|
self._rate_limit_wait()
|
|
self._log("Fetching payload hashes")
|
|
|
|
try:
|
|
resp = requests.get(_PAYLOAD_RECENT_URL, timeout=_TIMEOUT)
|
|
resp.raise_for_status()
|
|
except requests.RequestException as exc:
|
|
self._error("Payload download failed: %s", exc)
|
|
return []
|
|
|
|
results: List[Dict[str, Any]] = []
|
|
lines = [l for l in resp.text.splitlines() if l.strip() and not l.startswith("#")]
|
|
reader = csv.reader(io.StringIO("\n".join(lines)))
|
|
for row in reader:
|
|
if len(row) < 7:
|
|
continue
|
|
# 0:first_seen, 1:url, 2:file_type, 3:md5, 4:sha256, 5:signature
|
|
sha256 = row[4].strip().strip('"') if len(row) > 4 else ""
|
|
if not sha256 or len(sha256) != 64:
|
|
continue
|
|
sig = row[5].strip().strip('"') if len(row) > 5 else ""
|
|
results.append({
|
|
"hash": sha256.lower(),
|
|
"threat_name": sig if sig and sig != "None" else "Malware.URLhaus.Payload",
|
|
"threat_type": "MALWARE",
|
|
"severity": "HIGH",
|
|
"source": "urlhaus",
|
|
"details": f"file_type={row[2].strip()}" if len(row) > 2 else "",
|
|
})
|
|
|
|
self._log("Fetched %d payload hash(es)", len(results))
|
|
return results
|
|
|
|
def fetch_active(self) -> List[Dict[str, Any]]:
|
|
"""Fetch currently-active malware URLs."""
|
|
self._rate_limit_wait()
|
|
try:
|
|
resp = requests.get(_TEXT_ONLINE_URL, timeout=_TIMEOUT)
|
|
resp.raise_for_status()
|
|
except requests.RequestException as exc:
|
|
self._error("Download failed: %s", exc)
|
|
return []
|
|
|
|
results = []
|
|
for line in resp.text.splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
results.append({
|
|
"ioc_type": "url",
|
|
"value": line,
|
|
"threat_name": "Malware.Distribution.Active",
|
|
"type": "malware_distribution",
|
|
"source": "urlhaus",
|
|
})
|
|
self._log("Fetched %d active URL(s)", len(results))
|
|
self._mark_updated()
|
|
return results
|