"""URLhaus feed for AYN Antivirus. Fetches malicious URLs and payload hashes from the abuse.ch URLhaus CSV/text exports (free, no API key required). """ from __future__ import annotations import csv import io import logging from typing import Any, Dict, List import requests from ayn_antivirus.signatures.feeds.base_feed import BaseFeed logger = logging.getLogger(__name__) _CSV_RECENT_URL = "https://urlhaus.abuse.ch/downloads/csv_recent/" _TEXT_ONLINE_URL = "https://urlhaus.abuse.ch/downloads/text_online/" _PAYLOAD_RECENT_URL = "https://urlhaus.abuse.ch/downloads/payloads_recent/" _TIMEOUT = 60 class URLHausFeed(BaseFeed): """Fetch malware URLs and payload hashes from URLhaus.""" def get_name(self) -> str: return "urlhaus" def fetch(self) -> List[Dict[str, Any]]: results = self.fetch_recent() results.extend(self.fetch_payloads()) return results def fetch_recent(self) -> List[Dict[str, Any]]: """Fetch recent malicious URLs from CSV export.""" self._rate_limit_wait() self._log("Fetching recent URLs from CSV export") try: resp = requests.get(_CSV_RECENT_URL, timeout=_TIMEOUT) resp.raise_for_status() except requests.RequestException as exc: self._error("CSV download failed: %s", exc) return [] results: List[Dict[str, Any]] = [] lines = [l for l in resp.text.splitlines() if l.strip() and not l.startswith("#")] reader = csv.reader(io.StringIO("\n".join(lines))) for row in reader: if len(row) < 4: continue # 0:id, 1:dateadded, 2:url, 3:url_status, 4:threat, 5:tags, 6:urlhaus_link, 7:reporter url = row[2].strip().strip('"') if not url or not url.startswith("http"): continue threat = row[4].strip().strip('"') if len(row) > 4 else "" results.append({ "ioc_type": "url", "value": url, "threat_name": threat if threat and threat != "None" else "Malware.Distribution", "type": "malware_distribution", "source": "urlhaus", }) self._log("Fetched %d URL(s)", len(results)) self._mark_updated() return results def fetch_payloads(self) -> List[Dict[str, Any]]: """Fetch recent payload hashes (SHA256) from URLhaus.""" self._rate_limit_wait() self._log("Fetching payload hashes") try: resp = requests.get(_PAYLOAD_RECENT_URL, timeout=_TIMEOUT) resp.raise_for_status() except requests.RequestException as exc: self._error("Payload download failed: %s", exc) return [] results: List[Dict[str, Any]] = [] lines = [l for l in resp.text.splitlines() if l.strip() and not l.startswith("#")] reader = csv.reader(io.StringIO("\n".join(lines))) for row in reader: if len(row) < 7: continue # 0:first_seen, 1:url, 2:file_type, 3:md5, 4:sha256, 5:signature sha256 = row[4].strip().strip('"') if len(row) > 4 else "" if not sha256 or len(sha256) != 64: continue sig = row[5].strip().strip('"') if len(row) > 5 else "" results.append({ "hash": sha256.lower(), "threat_name": sig if sig and sig != "None" else "Malware.URLhaus.Payload", "threat_type": "MALWARE", "severity": "HIGH", "source": "urlhaus", "details": f"file_type={row[2].strip()}" if len(row) > 2 else "", }) self._log("Fetched %d payload hash(es)", len(results)) return results def fetch_active(self) -> List[Dict[str, Any]]: """Fetch currently-active malware URLs.""" self._rate_limit_wait() try: resp = requests.get(_TEXT_ONLINE_URL, timeout=_TIMEOUT) resp.raise_for_status() except requests.RequestException as exc: self._error("Download failed: %s", exc) return [] results = [] for line in resp.text.splitlines(): line = line.strip() if not line or line.startswith("#"): continue results.append({ "ioc_type": "url", "value": line, "threat_name": "Malware.Distribution.Active", "type": "malware_distribution", "source": "urlhaus", }) self._log("Fetched %d active URL(s)", len(results)) self._mark_updated() return results