"""ThreatFox feed for AYN Antivirus.

Fetches IOCs (IPs, domains, URLs, hashes) from the abuse.ch ThreatFox
CSV export (free, no API key required).

CSV export: https://threatfox.abuse.ch/export/
"""

from __future__ import annotations

import csv
import io
import logging
from typing import Any, Dict, List

import requests

from ayn_antivirus.signatures.feeds.base_feed import BaseFeed

logger = logging.getLogger(__name__)

_CSV_RECENT_URL = "https://threatfox.abuse.ch/export/csv/recent/"
_CSV_FULL_URL = "https://threatfox.abuse.ch/export/csv/full/"
_TIMEOUT = 60


class ThreatFoxFeed(BaseFeed):
    """Fetch IOCs from ThreatFox CSV export."""

    def get_name(self) -> str:
        return "threatfox"

    def fetch(self) -> List[Dict[str, Any]]:
        return self.fetch_recent()

    def fetch_recent(self, days: int = 7) -> List[Dict[str, Any]]:
        """Fetch recent IOCs from CSV export."""
        self._rate_limit_wait()
        self._log("Fetching IOCs from CSV export")

        try:
            resp = requests.get(_CSV_RECENT_URL, timeout=_TIMEOUT)
            resp.raise_for_status()
        except requests.RequestException as exc:
            self._error("CSV download failed: %s", exc)
            return []

        results: List[Dict[str, Any]] = []
        lines = [l for l in resp.text.splitlines() if l.strip() and not l.startswith("#")]
        reader = csv.reader(io.StringIO("\n".join(lines)))

        for row in reader:
            if len(row) < 6:
                continue
            # CSV: 0:first_seen, 1:ioc_id, 2:ioc_value, 3:ioc_type,
            #      4:threat_type, 5:malware, 6:malware_alias,
            #      7:malware_printable, 8:last_seen, 9:confidence,
            #      10:reference, 11:tags, 12:reporter
            ioc_value = row[2].strip().strip('"')
            ioc_type_raw = row[3].strip().strip('"').lower()
            threat_type = row[4].strip().strip('"') if len(row) > 4 else ""
            malware = row[5].strip().strip('"') if len(row) > 5 else ""
            malware_printable = row[7].strip().strip('"') if len(row) > 7 else ""
            confidence = row[9].strip().strip('"') if len(row) > 9 else "0"

            if not ioc_value:
                continue

            # Classify IOC type
            ioc_type = _classify_ioc(ioc_type_raw, ioc_value)
            threat_name = malware_printable or malware or "Unknown"

            # Hash IOCs go into hash DB
            if ioc_type == "hash":
                results.append({
                    "hash": ioc_value.lower(),
                    "threat_name": threat_name,
                    "threat_type": "MALWARE",
                    "severity": "HIGH",
                    "source": "threatfox",
                    "details": f"threat={threat_type}, confidence={confidence}",
                })
            else:
                clean_value = ioc_value
                if ioc_type == "ip" and ":" in ioc_value:
                    clean_value = ioc_value.rsplit(":", 1)[0]

                results.append({
                    "ioc_type": ioc_type,
                    "value": clean_value,
                    "threat_name": threat_name,
                    "type": threat_type or "C2",
                    "source": "threatfox",
                    "confidence": int(confidence) if confidence.isdigit() else 0,
                })

        self._log("Fetched %d IOC(s)", len(results))
        self._mark_updated()
        return results


def _classify_ioc(raw_type: str, value: str) -> str:
    if "ip" in raw_type:
        return "ip"
    if "domain" in raw_type:
        return "domain"
    if "url" in raw_type:
        return "url"
    if "hash" in raw_type or "sha256" in raw_type or "md5" in raw_type:
        return "hash"
    if value.startswith("http://") or value.startswith("https://"):
        return "url"
    if len(value) == 64 and all(c in "0123456789abcdef" for c in value.lower()):
        return "hash"
    if ":" in value and value.replace(".", "").replace(":", "").isdigit():
        return "ip"
    return "domain"