From 76adff71d767692929df0d94ffbf67a969a296d0 Mon Sep 17 00:00:00 2001 From: DerGrumpf Date: Sun, 26 Apr 2026 16:56:13 +0000 Subject: [PATCH] Init --- README.md | 159 +++++++++++++ pyproject.toml | 21 ++ xonotic_exporter/__init__.py | 2 + xonotic_exporter/cli.py | 276 +++++++++++++++++++++++ xonotic_exporter/config.py | 179 +++++++++++++++ xonotic_exporter/geoip.py | 154 +++++++++++++ xonotic_exporter/metrics_parser.py | 297 +++++++++++++++++++++++++ xonotic_exporter/prometheus.py | 287 ++++++++++++++++++++++++ xonotic_exporter/rcon.py | 298 +++++++++++++++++++++++++ xonotic_exporter/server.py | 343 +++++++++++++++++++++++++++++ 10 files changed, 2016 insertions(+) create mode 100644 README.md create mode 100644 pyproject.toml create mode 100644 xonotic_exporter/__init__.py create mode 100644 xonotic_exporter/cli.py create mode 100644 xonotic_exporter/config.py create mode 100644 xonotic_exporter/geoip.py create mode 100644 xonotic_exporter/metrics_parser.py create mode 100644 xonotic_exporter/prometheus.py create mode 100644 xonotic_exporter/rcon.py create mode 100644 xonotic_exporter/server.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..2c5de94 --- /dev/null +++ b/README.md @@ -0,0 +1,159 @@ +# xonotic_exporter + +Prometheus exporter for [Xonotic](https://xonotic.org/) game servers. +Scrapes metrics via RCON (`sv_public` + `status 1`) and exposes them +in Prometheus exposition format. + +## Features + +- **Python 3.11+** (fully compatible with 3.13 — no deprecated asyncio `loop=` args) +- **TOML configuration** — one file, all servers, one systemd unit +- **All three RCON modes** — nonsecure, secure-time, secure-challenge (MD4) +- **Blackbox-style multi-target** — one exporter, many game servers +- **CLI test interface** — human-readable or raw Prometheus output +- **Zero-downtime config reload** — `kill -HUP $PID` or `POST /-/reload` + +## Installation + +```bash +python3 -m venv /opt/xonotic_exporter/venv +/opt/xonotic_exporter/venv/bin/pip install . +``` + +## Configuration + +Copy `examples/xonotic_exporter.toml` to `/etc/xonotic_exporter/xonotic_exporter.toml` +and fill in your `rcon_password` values. + +```toml +[exporter] +host = "0.0.0.0" +port = 9260 + +[[servers]] +name = "vehicles" +host = "localhost" +port = 26010 +rcon_password = "secret" +rcon_mode = 2 # 2 = secure-challenge (MD4) — recommended + +[[servers]] +name = "resurrection" +host = "localhost" +port = 26015 +rcon_password = "secret" +rcon_mode = 2 + +[[servers]] +name = "insurrection" +host = "localhost" +port = 26016 +rcon_password = "secret" +rcon_mode = 2 +``` + +**rcon_mode values** match your server's `server.cfg`: + +| Value | Name | server.cfg setting | +|-------|-------------------|--------------------| +| `0` | nonsecure | `rcon_restricted 0` | +| `1` | secure-time | `rcon_secure 1` | +| `2` | secure-challenge | `rcon_secure 2` | + +## Running + +### As a systemd service + +```bash +sudo cp examples/xonotic_exporter.service /etc/systemd/system/ +sudo systemctl daemon-reload +sudo systemctl enable --now xonotic_exporter +``` + +Reload config without restarting: + +```bash +sudo systemctl reload xonotic_exporter +# or +curl -XPOST http://localhost:9260/-/reload +``` + +### Manually + +```bash +xonotic-exporter serve /etc/xonotic_exporter/xonotic_exporter.toml +``` + +## CLI Testing + +```bash +# Human-readable output (default) +xonotic-exporter query vehicles --config xonotic_exporter.toml + +# Raw Prometheus exposition +xonotic-exporter query vehicles --config xonotic_exporter.toml --prometheus + +# Ad-hoc (no config file needed) +xonotic-exporter query --host localhost --port 26010 --password secret --mode 2 + +# Ad-hoc with verbose logging +xonotic-exporter -v query --host localhost --port 26010 --password secret + +# Validate config +xonotic-exporter validate /etc/xonotic_exporter/xonotic_exporter.toml +``` + +Human-readable output example: + +``` + RCON mode : secure-challenge (MD4) +──────────────────────────────────────────────────── + Server : vehicles + Hostname : My Xonotic Server + Map : warfare + Public : yes (sv_public=1) +──────────────────────────────────────────────────── + Ping : 2.3 ms + CPU : 1.4 % + Lost : 0.0 % + Offset : avg=0.02ms max=1.20ms sdev=0.10ms +──────────────────────────────────────────────────── + Players : 4/16 + Active : 3 + Spectators : 1 + Bots : 0 +──────────────────────────────────────────────────── +``` + +## Prometheus Configuration + +See `examples/prometheus.yml` for a ready-to-use scrape config. + +## Metrics + +| Metric | Description | +|--------|-------------| +| `xonotic_up` | 1 if server reachable | +| `xonotic_sv_public` | Value of sv_public cvar | +| `xonotic_ping_seconds` | Round-trip time to server | +| `xonotic_timing_cpu_percent` | Server CPU usage % | +| `xonotic_timing_lost_percent` | Packet loss % | +| `xonotic_timing_offset_avg_ms` | Average timing offset ms | +| `xonotic_timing_offset_max_ms` | Max timing offset ms | +| `xonotic_timing_offset_sdev_ms` | Timing offset std dev ms | +| `xonotic_players_active` | Active (scoring) players | +| `xonotic_players_spectators` | Spectators | +| `xonotic_players_bots` | Bots | +| `xonotic_players_total` | Total connected | +| `xonotic_players_max` | Max player slots | + +All metrics carry an `instance` label set to the server name from TOML. + +## Endpoints + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/` | GET | HTML index with all configured servers | +| `/metrics?target=` | GET | Prometheus metrics for one server | +| `/-/reload` | POST | Reload config from disk | +| `/-/healthy` | GET | Liveness probe | diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..c62d573 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,21 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "xonotic_exporter" +version = "1.0.0" +description = "Prometheus exporter for Xonotic game servers" +requires-python = ">=3.11" +license = { text = "MIT" } +dependencies = [ + "xrcon>=0.1", + "prometheus-client>=0.20", +] + +[project.scripts] +xonotic-exporter = "xonotic_exporter.cli:main" + +[tool.setuptools.packages.find] +where = ["."] +include = ["xonotic_exporter*"] diff --git a/xonotic_exporter/__init__.py b/xonotic_exporter/__init__.py new file mode 100644 index 0000000..5d38578 --- /dev/null +++ b/xonotic_exporter/__init__.py @@ -0,0 +1,2 @@ +"""Xonotic Prometheus Exporter""" +__version__ = "1.0.0" diff --git a/xonotic_exporter/cli.py b/xonotic_exporter/cli.py new file mode 100644 index 0000000..2d0eba6 --- /dev/null +++ b/xonotic_exporter/cli.py @@ -0,0 +1,276 @@ +""" +CLI entry point for xonotic-exporter. + +Sub-commands +──────────── +serve Start the HTTP exporter (default when no sub-command given). +query Scrape one server and print results (human-readable or raw Prometheus). +validate Check a config file for errors and exit. + +Examples +──────── + # Start the exporter + xonotic-exporter serve /etc/xonotic_exporter/config.toml + + # Query a server defined in a config file + xonotic-exporter query vehicles --config /etc/xonotic_exporter/config.toml + + # Query a server ad-hoc (no config file needed) + xonotic-exporter query --host localhost --port 26010 \\ + --password secret --mode 2 + + # Same but print raw Prometheus exposition format + xonotic-exporter query vehicles --config config.toml --prometheus + + # Validate config only + xonotic-exporter validate /etc/xonotic_exporter/config.toml +""" + +from __future__ import annotations + +import argparse +import asyncio +import logging +import sys +from typing import Optional + +from .config import ( + ConfigError, + ExporterConfig, + ServerConfig, + RCON_MODE_NAMES, + RCON_MODE_SECURE_CHALLENGE, + load_config, +) +from .metrics_parser import XonoticMetrics +from .prometheus import build_registry +from .rcon import RconError, scrape_server +from .server import run_server + + +# ── Logging setup ────────────────────────────────────────────────────────────── + +def _setup_logging(verbose: bool) -> None: + level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig( + format="%(asctime)s %(levelname)-8s %(name)s: %(message)s", + level=level, + ) + + +# ── Human-readable output ────────────────────────────────────────────────────── + +_SEP = "─" * 52 + +def _print_human(server_name: str, metrics: XonoticMetrics) -> None: + mode_label = RCON_MODE_NAMES.get(0, "unknown") # filled per-server in query + + ping_ms = metrics.ping * 1000 + + print(_SEP) + print(f" Server : {server_name}") + print(f" Hostname : {metrics.hostname}") + print(f" Map : {metrics.map_name}") + print(f" Public : {'yes' if metrics.sv_public > 0 else 'no'} (sv_public={metrics.sv_public})") + print(_SEP) + print(f" Ping : {ping_ms:.1f} ms") + print(f" CPU : {metrics.timing_cpu:.1f} %") + print(f" Lost : {metrics.timing_lost:.1f} %") + print(f" Offset : avg={metrics.timing_offset_avg:.2f}ms " + f"max={metrics.timing_offset_max:.2f}ms " + f"sdev={metrics.timing_offset_sdev:.2f}ms") + print(_SEP) + total = metrics.players_active + metrics.players_spectators + metrics.players_bots + print(f" Players : {total}/{metrics.players_max}") + print(f" Active : {metrics.players_active}") + print(f" Spectators : {metrics.players_spectators}") + print(f" Bots : {metrics.players_bots}") + print(_SEP) + + +def _print_prometheus(server_name: str, metrics: XonoticMetrics) -> None: + _, raw = build_registry(server_name, metrics, up=True) + sys.stdout.buffer.write(raw) + + +# ── Sub-command: serve ──────────────────────────────────────────────────────── + +def _cmd_serve(args: argparse.Namespace) -> None: + try: + cfg = load_config(args.config) + except ConfigError as exc: + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + + _setup_logging(args.verbose) + + # CLI overrides for listen address / port + if args.host: + cfg.host = args.host + if args.port: + cfg.port = args.port + + run_server(cfg, config_path=args.config) + + +# ── Sub-command: query ──────────────────────────────────────────────────────── + +def _cmd_query(args: argparse.Namespace) -> None: + _setup_logging(args.verbose) + + # Build a ServerConfig — either from config file or ad-hoc flags + server: ServerConfig + + if args.config and args.target: + # named server from config file + try: + cfg = load_config(args.config) + except ConfigError as exc: + print(f"Config error: {exc}", file=sys.stderr) + sys.exit(1) + server = cfg.get_server(args.target) + if server is None: + print( + f"Server {args.target!r} not found in config. " + f"Available: {cfg.server_names()}", + file=sys.stderr, + ) + sys.exit(1) + + elif args.host: + # ad-hoc connection from CLI flags + name = args.target or f"{args.host}:{args.port}" + try: + server = ServerConfig( + name=name, + host=args.host, + port=args.port, + rcon_password=args.password or "", + rcon_mode=args.mode, + ) + except ConfigError as exc: + print(f"Invalid arguments: {exc}", file=sys.stderr) + sys.exit(1) + + else: + print( + "Error: provide either --config + target name, or --host (with optional flags).", + file=sys.stderr, + ) + sys.exit(1) + + # Run the scrape + try: + metrics = asyncio.run(scrape_server(server, retries=args.retries, timeout=args.timeout)) + except RconError as exc: + print(f"Scrape failed: {exc}", file=sys.stderr) + sys.exit(2) + + rcon_mode_label = RCON_MODE_NAMES.get(server.rcon_mode, str(server.rcon_mode)) + + if args.prometheus: + _print_prometheus(server.name, metrics) + else: + print(f"\n RCON mode : {rcon_mode_label}") + _print_human(server.name, metrics) + + +# ── Sub-command: validate ───────────────────────────────────────────────────── + +def _cmd_validate(args: argparse.Namespace) -> None: + _setup_logging(False) + try: + cfg = load_config(args.config) + except ConfigError as exc: + print(f"Invalid: {exc}", file=sys.stderr) + sys.exit(1) + + print(f"OK — {len(cfg.servers)} server(s) configured:") + for s in cfg.servers: + mode_label = RCON_MODE_NAMES.get(s.rcon_mode, str(s.rcon_mode)) + print(f" {s.name:<20} {s.host}:{s.port} rcon_mode={s.rcon_mode} ({mode_label})") + + +# ── Argument parser ─────────────────────────────────────────────────────────── + +def _build_parser() -> argparse.ArgumentParser: + root = argparse.ArgumentParser( + prog="xonotic-exporter", + description="Xonotic Prometheus exporter — multi-server, TOML config", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + root.add_argument("-v", "--verbose", action="store_true", help="Debug logging") + + sub = root.add_subparsers(dest="command", metavar="") + + # ── serve ────────────────────────────────────────────────────────────────── + p_serve = sub.add_parser("serve", help="Start the HTTP exporter") + p_serve.add_argument("config", metavar="CONFIG.toml", help="Path to TOML config file") + p_serve.add_argument("-l", "--host", default=None, help="Override listen host") + p_serve.add_argument("-p", "--port", type=int, default=None, help="Override listen port") + + # ── query ────────────────────────────────────────────────────────────────── + p_query = sub.add_parser( + "query", + help="Scrape a server and print metrics (testing)", + description=( + "Scrape one server.\n\n" + "Use a name from a config file:\n" + " xonotic-exporter query vehicles --config config.toml\n\n" + "Or specify connection details ad-hoc:\n" + " xonotic-exporter query --host localhost --port 26010 --password s3cr3t --mode 2" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p_query.add_argument( + "target", nargs="?", metavar="SERVER_NAME", + help="Name of the server as defined in config (requires --config)" + ) + p_query.add_argument("--config", metavar="CONFIG.toml", help="TOML config file") + # ad-hoc flags + p_query.add_argument("--host", help="Server hostname or IP") + p_query.add_argument("--port", type=int, default=26000, help="Server port (default 26000)") + p_query.add_argument("--password", default="", help="RCON password") + p_query.add_argument( + "--mode", type=int, default=RCON_MODE_SECURE_CHALLENGE, + choices=list(RCON_MODE_NAMES), + help=( + "RCON mode: " + + ", ".join(f"{k}={v}" for k, v in RCON_MODE_NAMES.items()) + + f" (default {RCON_MODE_SECURE_CHALLENGE})" + ), + ) + p_query.add_argument( + "--prometheus", action="store_true", + help="Output raw Prometheus exposition format instead of human-readable", + ) + p_query.add_argument("--retries", type=int, default=3, help="Scrape retry count (default 3)") + p_query.add_argument("--timeout", type=float, default=5.0, help="Per-attempt timeout seconds (default 5)") + + # ── validate ─────────────────────────────────────────────────────────────── + p_val = sub.add_parser("validate", help="Validate a config file and exit") + p_val.add_argument("config", metavar="CONFIG.toml") + + return root + + +# ── Entry point ─────────────────────────────────────────────────────────────── + +def main(argv: Optional[list[str]] = None) -> None: + parser = _build_parser() + args = parser.parse_args(argv) + + if args.command is None: + parser.print_help() + sys.exit(0) + + dispatch = { + "serve": _cmd_serve, + "query": _cmd_query, + "validate": _cmd_validate, + } + dispatch[args.command](args) + + +if __name__ == "__main__": + main() diff --git a/xonotic_exporter/config.py b/xonotic_exporter/config.py new file mode 100644 index 0000000..d32d83f --- /dev/null +++ b/xonotic_exporter/config.py @@ -0,0 +1,179 @@ +""" +Configuration loader for xonotic_exporter. + +TOML format: + [exporter] + host = "0.0.0.0" + port = 9260 + + [[servers]] + name = "vehicles" + host = "localhost" + port = 26010 + rcon_password = "" + rcon_mode = 2 # 0=nonsecure, 1=secure-time, 2=secure-challenge (MD4) +""" + +from __future__ import annotations + +import tomllib +import os +from dataclasses import dataclass, field +from typing import Optional + + +# ── RCON mode constants ──────────────────────────────────────────────────────── + +RCON_MODE_NONSECURE = 0 +RCON_MODE_SECURE_TIME = 1 +RCON_MODE_SECURE_CHALLENGE = 2 # MD4 HMAC challenge/response + +RCON_MODE_NAMES = { + RCON_MODE_NONSECURE: "nonsecure", + RCON_MODE_SECURE_TIME: "secure-time", + RCON_MODE_SECURE_CHALLENGE: "secure-challenge (MD4)", +} + +VALID_RCON_MODES = set(RCON_MODE_NAMES) + + +# ── Data classes ─────────────────────────────────────────────────────────────── + +@dataclass +class ServerConfig: + name: str + host: str + port: int + rcon_password: str + rcon_mode: int + + def __post_init__(self) -> None: + if not self.name: + raise ConfigError("server 'name' must not be empty") + if not self.host: + raise ConfigError(f"[{self.name}] 'host' must not be empty") + if not (1 <= self.port <= 65535): + raise ConfigError(f"[{self.name}] 'port' must be 1-65535, got {self.port}") + if self.rcon_mode not in VALID_RCON_MODES: + raise ConfigError( + f"[{self.name}] 'rcon_mode' must be one of {sorted(VALID_RCON_MODES)}, " + f"got {self.rcon_mode}" + ) + + +@dataclass +class ExporterConfig: + host: str = "0.0.0.0" + port: int = 9260 + servers: list[ServerConfig] = field(default_factory=list) + + # derived index for O(1) lookup by name + _index: dict[str, ServerConfig] = field(default_factory=dict, init=False, repr=False) + + def __post_init__(self) -> None: + self._build_index() + + def _build_index(self) -> None: + self._index = {s.name: s for s in self.servers} + + def get_server(self, name: str) -> Optional[ServerConfig]: + return self._index.get(name) + + def server_names(self) -> list[str]: + return list(self._index) + + +# ── Exceptions ───────────────────────────────────────────────────────────────── + +class ConfigError(ValueError): + """Raised for any configuration problem.""" + + +# ── Loader ───────────────────────────────────────────────────────────────────── + +_EXPORTER_DEFAULTS: dict = { + "host": "0.0.0.0", + "port": 9260, +} + +_SERVER_DEFAULTS: dict = { + "port": 26000, + "rcon_mode": RCON_MODE_SECURE_CHALLENGE, +} + + +def load_config(path: str) -> ExporterConfig: + """Parse and validate *path* (a TOML file). Returns :class:`ExporterConfig`.""" + try: + with open(path, "rb") as fh: + raw = tomllib.load(fh) + except FileNotFoundError: + raise ConfigError(f"config file not found: {path}") + except tomllib.TOMLDecodeError as exc: + raise ConfigError(f"TOML parse error: {exc}") from exc + + return _parse_raw(raw) + + +def load_config_from_string(text: str) -> ExporterConfig: + """Parse TOML from *text* (used in tests / --validate).""" + try: + raw = tomllib.loads(text) + except tomllib.TOMLDecodeError as exc: + raise ConfigError(f"TOML parse error: {exc}") from exc + return _parse_raw(raw) + + +def _parse_raw(raw: dict) -> ExporterConfig: + exp_raw = raw.get("exporter", {}) + exp_host = exp_raw.get("host", _EXPORTER_DEFAULTS["host"]) + exp_port = exp_raw.get("port", _EXPORTER_DEFAULTS["port"]) + + if not isinstance(exp_port, int) or not (1 <= exp_port <= 65535): + raise ConfigError(f"[exporter] 'port' must be 1-65535, got {exp_port!r}") + + servers_raw = raw.get("servers", []) + if not isinstance(servers_raw, list): + raise ConfigError("'servers' must be an array of tables ([[servers]])") + if not servers_raw: + raise ConfigError("at least one [[servers]] entry is required") + + servers: list[ServerConfig] = [] + seen_names: set[str] = set() + + for idx, entry in enumerate(servers_raw, start=1): + if not isinstance(entry, dict): + raise ConfigError(f"servers[{idx}] must be a table") + + name = entry.get("name", "") + if not isinstance(name, str) or not name: + raise ConfigError(f"servers[{idx}] missing or empty 'name'") + if name in seen_names: + raise ConfigError(f"duplicate server name: {name!r}") + seen_names.add(name) + + host = entry.get("host", "") + if not isinstance(host, str): + raise ConfigError(f"[{name}] 'host' must be a string") + + port = entry.get("port", _SERVER_DEFAULTS["port"]) + if not isinstance(port, int): + raise ConfigError(f"[{name}] 'port' must be an integer") + + rcon_password = entry.get("rcon_password", "") + if not isinstance(rcon_password, str): + raise ConfigError(f"[{name}] 'rcon_password' must be a string") + + rcon_mode = entry.get("rcon_mode", _SERVER_DEFAULTS["rcon_mode"]) + if not isinstance(rcon_mode, int): + raise ConfigError(f"[{name}] 'rcon_mode' must be an integer (0, 1, or 2)") + + servers.append(ServerConfig( + name=name, + host=host, + port=port, + rcon_password=rcon_password, + rcon_mode=rcon_mode, + )) + + return ExporterConfig(host=exp_host, port=exp_port, servers=servers) diff --git a/xonotic_exporter/geoip.py b/xonotic_exporter/geoip.py new file mode 100644 index 0000000..2788719 --- /dev/null +++ b/xonotic_exporter/geoip.py @@ -0,0 +1,154 @@ +""" +GeoIP resolution via ip-api.com (free, no key required). + +- Batch endpoint: up to 100 IPs per POST request +- Rate limit: 15 requests/min on the free tier (= 1500 IPs/min, plenty) +- Cache TTL: 24 hours (geo data for an IP never changes meaningfully) +- Private/loopback IPs are excluded entirely +""" + +from __future__ import annotations + +import asyncio +import ipaddress +import json +import logging +import time +from typing import Optional +from urllib.request import urlopen, Request + +log = logging.getLogger(__name__) + +TTL_SECONDS = 86400 # 24 hours + +# fields we request from ip-api.com +_FIELDS = "status,query,countryCode,city,lat,lon" +_BATCH_URL = f"http://ip-api.com/batch?fields={_FIELDS}" +_BATCH_SIZE = 100 # ip-api.com hard limit + + +def _is_private(ip_str: str) -> bool: + """Return True for loopback, private, link-local, or unspecified addresses.""" + try: + addr = ipaddress.ip_address(ip_str) + return ( + addr.is_private + or addr.is_loopback + or addr.is_link_local + or addr.is_unspecified + or addr.is_reserved + ) + except ValueError: + return True # unparseable → skip + + +class GeoIPCache: + """ + Thread-safe async GeoIP cache backed by ip-api.com batch endpoint. + + Usage: + cache = GeoIPCache() + results = await cache.lookup(["1.2.3.4", "5.6.7.8"]) + """ + + def __init__(self) -> None: + # ip -> (timestamp, GeoResult) + self._cache: dict[str, tuple[float, GeoResult]] = {} + self._lock = asyncio.Lock() + + async def lookup(self, ips: list[str]) -> dict[str, "GeoResult"]: + """ + Resolve a list of IPs. Returns a dict of ip -> GeoResult. + Private IPs are excluded from the result entirely. + Cache hits never trigger a network call. + """ + public_ips = [ip for ip in ips if not _is_private(ip)] + if not public_ips: + return {} + + now = time.monotonic() + results: dict[str, GeoResult] = {} + missing: list[str] = [] + + async with self._lock: + for ip in public_ips: + entry = self._cache.get(ip) + if entry and (now - entry[0]) < TTL_SECONDS: + results[ip] = entry[1] + else: + missing.append(ip) + + if missing: + log.debug("GeoIP cache miss for %d IP(s): %s", len(missing), missing) + fetched = await _fetch_batch(missing) + async with self._lock: + for ip, geo in fetched.items(): + self._cache[ip] = (time.monotonic(), geo) + results[ip] = geo + + return results + + def cache_size(self) -> int: + return len(self._cache) + + +class GeoResult: + __slots__ = ("ip", "country", "city", "lat", "lon") + + def __init__(self, ip: str, country: str, city: str, lat: float, lon: float) -> None: + self.ip = ip + self.country = country + self.city = city + self.lat = lat + self.lon = lon + + def __repr__(self) -> str: + return f"GeoResult({self.ip} → {self.city}, {self.country} [{self.lat},{self.lon}])" + + +async def _fetch_batch(ips: list[str]) -> dict[str, GeoResult]: + """ + POST up to 100 IPs to ip-api.com/batch and return parsed results. + Splits into multiple requests if needed (shouldn't happen in practice + for a game server, but correct to handle it). + """ + results: dict[str, GeoResult] = {} + loop = asyncio.get_running_loop() + + for chunk_start in range(0, len(ips), _BATCH_SIZE): + chunk = ips[chunk_start:chunk_start + _BATCH_SIZE] + try: + geo_map = await loop.run_in_executor(None, _post_batch, chunk) + results.update(geo_map) + except Exception as exc: + log.warning("GeoIP batch fetch failed: %s", exc) + + return results + + +def _post_batch(ips: list[str]) -> dict[str, GeoResult]: + """Synchronous HTTP POST — runs in executor thread.""" + payload = json.dumps(ips).encode() + req = Request( + _BATCH_URL, + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urlopen(req, timeout=10) as resp: + data = json.loads(resp.read()) + + results: dict[str, GeoResult] = {} + for entry in data: + if entry.get("status") != "success": + log.debug("GeoIP failed for %s: %s", entry.get("query"), entry.get("message")) + continue + ip = entry["query"] + results[ip] = GeoResult( + ip=ip, + country=entry.get("countryCode", ""), + city=entry.get("city", ""), + lat=float(entry.get("lat", 0.0)), + lon=float(entry.get("lon", 0.0)), + ) + return results diff --git a/xonotic_exporter/metrics_parser.py b/xonotic_exporter/metrics_parser.py new file mode 100644 index 0000000..6e028a1 --- /dev/null +++ b/xonotic_exporter/metrics_parser.py @@ -0,0 +1,297 @@ +""" +Parser for the combined output of: + sv_public + status 1 + +Both commands are sent in a single RCON packet separated by \\0. +The server sends them back as two separate UDP datagrams (or sometimes one). +We feed raw bytes in chunks until the state machine reaches DONE. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from enum import Enum, auto +from typing import Optional + + +class ParseError(ValueError): + """Raised when the parser receives unexpected input.""" + + +class _State(Enum): + SV_PUBLIC = auto() + HOSTNAME = auto() + VERSION = auto() + PROTOCOL = auto() + MAP = auto() + TIMING = auto() + PLAYERS_HEADER = auto() + STATUS_HEADER = auto() + PLAYER_ROW = auto() + DONE = auto() + + +@dataclass +class PlayerRow: + ip: str + port: str + ping: int + packetloss: int + time_seconds: int + frags: int + slot: str + name: str + is_bot: bool + is_spectator: bool + + +@dataclass +class XonoticMetrics: + """All metrics scraped from one server query.""" + # server identity + hostname: str = "" + map_name: str = "" + sv_public: int = 0 + + # timing + timing_cpu: float = 0.0 + timing_lost: float = 0.0 + timing_offset_avg: float = 0.0 + timing_offset_max: float = 0.0 + timing_offset_sdev: float = 0.0 + + # players + players: list = field(default_factory=list) # list[PlayerRow] + players_count: int = 0 # from "players: N active (M max)" + players_max: int = 0 + players_active: int = 0 # scored players + players_spectators: int = 0 + players_bots: int = 0 + + # network RTT (filled in by the protocol layer) + ping: float = 0.0 + + +# ── compiled regexes ─────────────────────────────────────────────────────────── + +_COLORS_RE = re.compile(rb"\^(?:\d|x[\dA-Fa-f]{3})") +_SV_PUBLIC_RE = re.compile(rb'sv_public\S*\s+is\s+"(-?\d+)') +_HOST_RE = re.compile(rb"^host:\s+(.+)$") +_MAP_RE = re.compile(rb"^map:\s+(\S+)") +_TIMING_RE = re.compile( + rb"^timing:\s+" + rb"(?P-?[\d.]+)%\s+CPU,\s+" + rb"(?P-?[\d.]+)%\s+lost,\s+" + rb"offset\s+avg\s+(?P-?[\d.]+)\s*ms,\s+" + rb"max\s+(?P-?[\d.]+)ms,\s+" + rb"sdev\s+(?P-?[\d.]+)ms" +) +_PLAYERS_RE = re.compile( + rb"players:\s+(?P\d+)\s+active\s+\((?P\d+)\s+max\)" +) +_STATUS_HDR_RE = re.compile(rb"^\^?2?IP\s") + + +class XonoticMetricsParser: + """ + Feed raw UDP payload bytes via :meth:`feed` until :attr:`done` is True. + Access the result via :attr:`metrics`. + """ + + def __init__(self) -> None: + self._buf: bytes = b"" + self._state: _State = _State.SV_PUBLIC + self._expected_players: int = 0 + self._seen_players: int = 0 + self.metrics: XonoticMetrics = XonoticMetrics() + + @property + def done(self) -> bool: + return self._state is _State.DONE + + # ── public feed ──────────────────────────────────────────────────────────── + + def feed(self, data: bytes) -> None: + """Append *data* and process all complete lines.""" + if self._state is _State.DONE: + return + + self._buf += data + while not self.done: + try: + line, self._buf = self._buf.split(b"\n", 1) + except ValueError: + break # wait for more data + self._process_line(line.rstrip(b"\r")) + + # ── dispatcher ──────────────────────────────────────────────────────────── + + def _process_line(self, line: bytes) -> None: + handlers = { + _State.SV_PUBLIC: self._parse_sv_public, + _State.HOSTNAME: self._parse_hostname, + _State.VERSION: self._parse_version, + _State.PROTOCOL: self._parse_protocol, + _State.MAP: self._parse_map, + _State.TIMING: self._parse_timing, + _State.PLAYERS_HEADER: self._parse_players_header, + _State.STATUS_HEADER: self._parse_status_header, + _State.PLAYER_ROW: self._parse_player_row, + } + handler = handlers.get(self._state) + if handler: + handler(line) + + # ── state handlers ──────────────────────────────────────────────────────── + + def _parse_sv_public(self, line: bytes) -> None: + m = _SV_PUBLIC_RE.search(_COLORS_RE.sub(b"", line)) + if m: + self.metrics.sv_public = int(m.group(1)) + self._state = _State.HOSTNAME + # ignore blank lines / stray output before sv_public + + def _parse_hostname(self, line: bytes) -> None: + m = _HOST_RE.match(line) + if m: + raw = _COLORS_RE.sub(b"", m.group(1).strip()) + self.metrics.hostname = raw.decode("utf-8", "replace") + self._state = _State.VERSION + + def _parse_version(self, line: bytes) -> None: + if line.startswith(b"version:"): + self._state = _State.PROTOCOL + + def _parse_protocol(self, line: bytes) -> None: + if line.startswith(b"protocol:"): + self._state = _State.MAP + + def _parse_map(self, line: bytes) -> None: + m = _MAP_RE.match(line) + if m: + raw = _COLORS_RE.sub(b"", m.group(1)) + self.metrics.map_name = raw.decode("utf-8", "replace") + self._state = _State.TIMING + + def _parse_timing(self, line: bytes) -> None: + m = _TIMING_RE.match(line) + if m: + d = m.groupdict() + self.metrics.timing_cpu = float(d["cpu"]) + self.metrics.timing_lost = float(d["lost"]) + self.metrics.timing_offset_avg = float(d["offset_avg"]) + self.metrics.timing_offset_max = float(d["max"]) + self.metrics.timing_offset_sdev = float(d["sdev"]) + self._state = _State.PLAYERS_HEADER + + def _parse_players_header(self, line: bytes) -> None: + m = _PLAYERS_RE.search(_COLORS_RE.sub(b"", line)) + if m: + self.metrics.players_count = int(m.group("count")) + self.metrics.players_max = int(m.group("max")) + self._state = _State.STATUS_HEADER + + def _parse_status_header(self, line: bytes) -> None: + # "IP " or "^2IP " depending on Xonotic version + if _STATUS_HDR_RE.match(line): + if self.metrics.players_count > 0: + self._expected_players = self.metrics.players_count + self._seen_players = 0 + self._state = _State.PLAYER_ROW + else: + self._state = _State.DONE + + def _parse_player_row(self, line: bytes) -> None: + clean = _COLORS_RE.sub(b"", line) + fields = clean.split() + if len(fields) < 5: + return + + self._seen_players += 1 + ip_raw = fields[0].decode("utf-8", "replace") + + # split ip:port + if ip_raw == "botclient": + ip, port = "botclient", "0" + is_bot = True + elif ":" in ip_raw: + if ip_raw.startswith("["): + # IPv6: [2a02:...]:port + bracket_end = ip_raw.rfind("]") + ip = ip_raw[1:bracket_end] + port = ip_raw[bracket_end+2:] if bracket_end+2 < len(ip_raw) else "0" + else: + # IPv4: 1.2.3.4:port + ip, _, port = ip_raw.rpartition(":") + is_bot = False + else: + ip, port = ip_raw, "0" + is_bot = False + + # packetloss — fields[1] + try: + packetloss = int(fields[1]) + except (ValueError, IndexError): + packetloss = 0 + + # ping — fields[2] + try: + ping = int(fields[2]) + except (ValueError, IndexError): + ping = 0 + + # time — fields[3], format is H:MM:SS or M:SS + try: + time_raw = fields[3].decode("utf-8", "replace") + parts = time_raw.split(":") + if len(parts) == 3: + time_seconds = int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2]) + elif len(parts) == 2: + time_seconds = int(parts[0]) * 60 + int(parts[1]) + else: + time_seconds = 0 + except (ValueError, IndexError): + time_seconds = 0 + + # frags — fields[4] + try: + frags = int(fields[4]) + except (ValueError, IndexError): + frags = 0 + + is_spectator = (frags == -666) and not is_bot + + slot = fields[5].decode("utf-8", "replace") if len(fields) > 5 else "" + name_parts = fields[6:] if len(fields) > 6 else [] + name = b" ".join(name_parts).decode("utf-8", "replace") + + if is_bot: + self.metrics.players_bots += 1 + elif is_spectator: + self.metrics.players_spectators += 1 + else: + self.metrics.players_active += 1 + + self.metrics.players.append(PlayerRow( + ip=ip, + port=port, + ping=ping, + packetloss=packetloss, + time_seconds=time_seconds, + frags=frags, + slot=slot, + name=name, + is_bot=is_bot, + is_spectator=is_spectator, + )) + + if self._seen_players >= self._expected_players: + self._state = _State.DONE + + # ── helpers ─────────────────────────────────────────────────────────────── + + @staticmethod + def strip_colors(data: bytes) -> bytes: + return _COLORS_RE.sub(b"", data) diff --git a/xonotic_exporter/prometheus.py b/xonotic_exporter/prometheus.py new file mode 100644 index 0000000..e499857 --- /dev/null +++ b/xonotic_exporter/prometheus.py @@ -0,0 +1,287 @@ +""" +Prometheus metric definitions and exposition helpers. + +We use the low-level prometheus_client primitives so we can build a *per-scrape* +registry (no global state) — this is the correct approach for a multi-target +exporter where each /metrics?target= request returns metrics for one server. +""" + +from __future__ import annotations + +from prometheus_client import ( + CollectorRegistry, + Gauge, + generate_latest, + CONTENT_TYPE_LATEST, +) + +from .metrics_parser import XonoticMetrics + + +# ── Metric definitions (names, help strings, labels) ───────────────────────── + +_METRIC_DEFS: list[tuple[str, str]] = [ + ("xonotic_up", + "1 if the server was reachable, 0 otherwise"), + + ("xonotic_sv_public", + "Value of sv_public cvar (1 = listed on master server)"), + + ("xonotic_ping_seconds", + "Round-trip time to the server in seconds"), + + ("xonotic_timing_cpu_percent", + "Server CPU usage percentage reported by status"), + + ("xonotic_timing_lost_percent", + "Percentage of packets lost reported by status"), + + ("xonotic_timing_offset_avg_ms", + "Average timing offset in milliseconds"), + + ("xonotic_timing_offset_max_ms", + "Maximum timing offset in milliseconds"), + + ("xonotic_timing_offset_sdev_ms", + "Standard deviation of timing offset in milliseconds"), + + ("xonotic_players_active", + "Number of active (scoring) players"), + + ("xonotic_players_spectators", + "Number of spectators"), + + ("xonotic_players_bots", + "Number of bots"), + + ("xonotic_players_total", + "Total players connected (active + spectators + bots)"), + + ("xonotic_players_max", + "Maximum player slots on the server"), +] + + +def build_registry( + server_name: str, + metrics: XonoticMetrics | None, + *, + up: bool, + extra_labels: dict[str, str] | None = None, +) -> tuple[CollectorRegistry, bytes]: + """ + Build a fresh :class:`CollectorRegistry` populated with *metrics* and + return ``(registry, exposition_bytes)``. + + Parameters + ---------- + server_name: + Value of the ``instance`` label attached to every metric. + metrics: + Scraped metrics object. Pass ``None`` when the server is unreachable + (only ``xonotic_up`` will be emitted, set to 0). + up: + Whether the scrape succeeded. + extra_labels: + Additional constant labels to attach to every metric (optional). + """ + registry = CollectorRegistry(auto_describe=False) + labels = {"instance": server_name, **(extra_labels or {})} + + def _gauge(name: str, helpstr: str, value: float) -> None: + g = Gauge(name, helpstr, labelnames=list(labels), registry=registry) + g.labels(**labels).set(value) + + # always emit "up" + _gauge("xonotic_up", "1 if the server was reachable, 0 otherwise", 1.0 if up else 0.0) + + if metrics is not None and up: + _gauge("xonotic_sv_public", "Value of sv_public cvar", metrics.sv_public) + _gauge("xonotic_ping_seconds", "Round-trip time in seconds", metrics.ping) + _gauge("xonotic_timing_cpu_percent", "Server CPU usage %", metrics.timing_cpu) + _gauge("xonotic_timing_lost_percent", "Packet loss %", metrics.timing_lost) + _gauge("xonotic_timing_offset_avg_ms", "Avg timing offset ms", metrics.timing_offset_avg) + _gauge("xonotic_timing_offset_max_ms", "Max timing offset ms", metrics.timing_offset_max) + _gauge("xonotic_timing_offset_sdev_ms","Timing offset sdev ms", metrics.timing_offset_sdev) + _gauge("xonotic_players_active", "Active (scoring) players", metrics.players_active) + _gauge("xonotic_players_spectators", "Spectators", metrics.players_spectators) + _gauge("xonotic_players_bots", "Bots", metrics.players_bots) + _gauge( + "xonotic_players_total", + "Total connected (active + spectators + bots)", + metrics.players_active + metrics.players_spectators + metrics.players_bots, + ) + _gauge("xonotic_players_max", "Max player slots", metrics.players_max) + + raw = generate_latest(registry) + return registry, raw + + +CONTENT_TYPE = CONTENT_TYPE_LATEST + +def _safe_label(value: str) -> str: + """Keep only printable ASCII characters (32–126).""" + r = "".join(c for c in value if 32 <= ord(c) <= 126).strip() + if r: + return r + return "Anonymous" + +def build_player_geo_registry( + server_name: str, + players: list, # list[PlayerRow] + geo_results: dict, # dict[ip, GeoResult] +) -> bytes: + """ + Emits xonotic_player_info and xonotic_player_geo metrics. + One time series per connected public-IP player. + """ + registry = CollectorRegistry(auto_describe=False) + + info_gauge = Gauge( + "xonotic_player_info", + "Connected player metadata", + labelnames=["instance", "ip", "port", "slot", "name", "country", "city"], + registry=registry, + ) + geo_gauge = Gauge( + "xonotic_player_geo", + "Connected player geolocation (lat/lon as label, value always 1)", + labelnames=["instance", "ip", "name", "country", "city", "lat", "lon", "ping"], + registry=registry, + ) + ping_gauge = Gauge( + "xonotic_player_ping", + "Per-player ping to the server in ms", + labelnames=["instance", "ip", "name"], + registry=registry, + ) + pl_gauge = Gauge( + "xonotic_player_packetloss_percent", + "Per-player packet loss percentage", + labelnames=["instance", "ip", "name"], + registry=registry, + ) + + for player in players: + if player.is_bot or player.ip == "botclient": + continue + + geo = geo_results.get(player.ip) + if geo is None: + continue # private IP — excluded + + country = geo.country + city = geo.city + lat = str(round(geo.lat, 4)) + lon = str(round(geo.lon, 4)) + + info_gauge.labels( + instance=server_name, + ip=player.ip, + port=player.port, + slot=player.slot, + name=_safe_label(player.name), + country=country, + city=city + ).set(1) + + geo_gauge.labels( + instance=server_name, + ip=player.ip, + name=_safe_label(player.name), + country=country, + city=city, + lat=lat, + lon=lon, + ping=str(player.ping) + ).set(1) + + ping_gauge.labels( + instance=server_name, + ip=player.ip, + name=_safe_label(player.name), + ).set(player.ping) + + pl_gauge.labels( + instance=server_name, + ip=player.ip, + name=_safe_label(player.name), + ).set(player.packetloss) + + return generate_latest(registry) + +def build_match_registry( + server_name: str, + metrics: XonoticMetrics, + match_meta: dict, +) -> bytes: + """ + Emits match-level and per-player metrics for the match dashboard. + """ + registry = CollectorRegistry(auto_describe=False) + labels = {"instance": server_name} + + def _gauge(name, helpstr, value): + g = Gauge(name, helpstr, labelnames=list(labels), registry=registry) + g.labels(**labels).set(value) + + # match metadata + _gauge("xonotic_match_timelimit_seconds", + "Match time limit in seconds", + match_meta.get("timelimit", 0) * 60) + _gauge("xonotic_match_fraglimit", + "Frag limit for current match", + match_meta.get("fraglimit", 0)) + _gauge("xonotic_match_teamplay", + "Teamplay mode (0=FFA)", + match_meta.get("teamplay", 0)) + + # map name as a label on a info metric + map_gauge = Gauge( + "xonotic_match_info", + "Match info — map name as label", + labelnames=["instance", "map"], + registry=registry, + ) + map_gauge.labels(instance=server_name, map=metrics.map_name).set(1) + + # per-player metrics + frag_gauge = Gauge( + "xonotic_match_player_frags", + "Current frag count per player", + labelnames=["instance", "name", "slot"], + registry=registry, + ) + spec_gauge = Gauge( + "xonotic_match_spectator", + "1 if player is spectating", + labelnames=["instance", "name", "slot"], + registry=registry, + ) + conn_gauge = Gauge( + "xonotic_match_player_connection", + "Player connection stats", + labelnames=["instance", "name", "slot", "ping", "packetloss", "time_seconds"], + registry=registry, + ) + + for player in metrics.players: + if player.is_bot: + continue + safe_name = _safe_label(player.name) + plabels = dict(instance=server_name, name=safe_name, slot=player.slot) + + if player.is_spectator: + spec_gauge.labels(**plabels).set(1) + else: + frag_gauge.labels(**plabels).set(player.frags) + conn_gauge.labels( + instance=server_name, + name=safe_name, + slot=player.slot, + ping=str(player.ping), + packetloss=str(player.packetloss), + time_seconds=str(player.time_seconds), + ).set(1) + + return generate_latest(registry) diff --git a/xonotic_exporter/rcon.py b/xonotic_exporter/rcon.py new file mode 100644 index 0000000..169d48b --- /dev/null +++ b/xonotic_exporter/rcon.py @@ -0,0 +1,298 @@ +""" +Async UDP protocol for querying Xonotic servers via RCON. + +Supports all three RCON modes defined in config.py: + 0 – nonsecure (plain password in packet) + 1 – secure-time (HMAC-MD4 with timestamp) + 2 – secure-challenge (HMAC-MD4 with server challenge — recommended) + +""" + +from __future__ import annotations + +import asyncio +import logging +import time +from typing import Optional + +from xrcon import utils as xutils + +from .config import ( + RCON_MODE_NONSECURE, + RCON_MODE_SECURE_TIME, + RCON_MODE_SECURE_CHALLENGE, + ServerConfig, +) +from .metrics_parser import ParseError, XonoticMetrics, XonoticMetricsParser + + +log = logging.getLogger(__name__) + +# Quake-style ping +_PING_PACKET = b"\xFF\xFF\xFF\xFFping" +_PONG_PACKET = b"\xFF\xFF\xFF\xFFack" + + +class RconError(OSError): + """Raised when all retries for an RCON command are exhausted.""" + + +# ── Protocol implementation ──────────────────────────────────────────────────── + +class _XonoticProtocol(asyncio.DatagramProtocol): + """ + Low-level asyncio UDP protocol. + + One instance per connection (create_datagram_endpoint). + """ + + def __init__(self, password: str, rcon_mode: int, timeout: float = 5.0) -> None: + self.password = password + self.rcon_mode = rcon_mode + self.timeout = timeout + + self.transport: Optional[asyncio.DatagramTransport] = None + self._remote_addr: Optional[tuple] = None + + # Futures set by datagram_received + self._ping_fut: Optional[asyncio.Future] = None + self._challenge_fut: Optional[asyncio.Future] = None + + # Queue for incoming RCON response chunks + self._rcon_queue: asyncio.Queue[bytes] = asyncio.Queue(maxsize=200) + + # Locks so concurrent callers serialise + self._ping_lock = asyncio.Lock() + self._challenge_lock = asyncio.Lock() + + # ── asyncio.DatagramProtocol callbacks ──────────────────────────────────── + + def connection_made(self, transport: asyncio.DatagramTransport) -> None: # type: ignore[override] + self.transport = transport + self._remote_addr = transport.get_extra_info("peername") + log.debug("UDP endpoint ready → %s", self._remote_addr) + + def datagram_received(self, data: bytes, addr: tuple) -> None: + if addr != self._remote_addr: + return # ignore stray packets + + if data == _PONG_PACKET: + fut = self._ping_fut + if fut and not fut.done(): + fut.set_result(time.monotonic()) + + elif data.startswith(xutils.CHALLENGE_RESPONSE_HEADER): + fut = self._challenge_fut + if fut and not fut.done(): + fut.set_result(xutils.parse_challenge_response(data)) + + elif data.startswith(xutils.RCON_RESPONSE_HEADER): + chunk = xutils.parse_rcon_response(data) + try: + self._rcon_queue.put_nowait(chunk) + except asyncio.QueueFull: + log.warning("RCON receive queue full — dropping packet from %s", addr) + + def error_received(self, exc: Exception) -> None: + log.debug("UDP error from %s: %s", self._remote_addr, exc) + + def connection_lost(self, exc: Optional[Exception]) -> None: + log.debug("UDP connection closed (%s)", self._remote_addr) + + # ── Higher-level helpers ────────────────────────────────────────────────── + + async def ping(self) -> float: + """Return round-trip time in seconds.""" + async with self._ping_lock: + self._ping_fut = asyncio.get_running_loop().create_future() + try: + t0 = time.monotonic() + self.transport.sendto(_PING_PACKET) + await asyncio.wait_for(asyncio.shield(self._ping_fut), self.timeout) + return time.monotonic() - t0 + finally: + self._ping_fut = None + + async def _getchallenge(self) -> bytes: + """Request and return the server's challenge string.""" + async with self._challenge_lock: + self._challenge_fut = asyncio.get_running_loop().create_future() + try: + self.transport.sendto(xutils.CHALLENGE_PACKET) + return await asyncio.wait_for( + asyncio.shield(self._challenge_fut), self.timeout + ) + finally: + self._challenge_fut = None + + async def rcon(self, command: str) -> None: + """Send *command* via RCON using the configured mode.""" + if self.rcon_mode == RCON_MODE_NONSECURE: + pkt = xutils.rcon_nosecure_packet(self.password, command) + self.transport.sendto(pkt) + + elif self.rcon_mode == RCON_MODE_SECURE_TIME: + pkt = xutils.rcon_secure_time_packet(self.password, command) + self.transport.sendto(pkt) + + elif self.rcon_mode == RCON_MODE_SECURE_CHALLENGE: + challenge = await self._getchallenge() + pkt = xutils.rcon_secure_challenge_packet(self.password, challenge, command) + self.transport.sendto(pkt) + + else: + raise ValueError(f"Unknown rcon_mode: {self.rcon_mode}") + + async def read_rcon_response(self) -> XonoticMetrics: + """ + + Uses an adaptive wait: first chunk establishes a baseline RTT, + subsequent waits are scaled to allow for multi-packet responses. + """ + parser = XonoticMetricsParser() + + # first chunk — use the full timeout + t0 = time.monotonic() + chunk = await asyncio.wait_for(self._rcon_queue.get(), self.timeout) + rtt = time.monotonic() - t0 + parser.feed(chunk) + + while not parser.done: + wait = max(rtt * 2.0, 0.3) + try: + t0 = time.monotonic() + chunk = await asyncio.wait_for(self._rcon_queue.get(), wait) + rtt = rtt * 0.8 + (time.monotonic() - t0) * 0.2 + parser.feed(chunk) + except asyncio.TimeoutError: + # No more data — if the parser is not done the server may + # have sent a truncated response; accept what we have. + log.debug("Timed out waiting for more RCON data (partial parse)") + break + + return parser.metrics + + +# ── Public scrape function ──────────────────────────────────────────────────── + +async def scrape_server( + server: ServerConfig, + retries: int = 3, + timeout: float = 5.0, +) -> XonoticMetrics: + """ + Open a UDP endpoint to *server*, send the RCON query, parse and return + :class:`~metrics_parser.XonoticMetrics`. + + Retries up to *retries* times on transient failures. + Always closes the transport before returning. + """ + last_exc: Exception = RconError("no attempts made") + + for attempt in range(1, retries + 1): + transport: Optional[asyncio.DatagramTransport] = None + try: + loop = asyncio.get_running_loop() + proto_factory = lambda: _XonoticProtocol( # noqa: E731 + password=server.rcon_password, + rcon_mode=server.rcon_mode, + timeout=timeout, + ) + transport, proto = await loop.create_datagram_endpoint( + proto_factory, + remote_addr=(server.host, server.port), + ) + + # fire ping and metrics query concurrently + ping_task = asyncio.create_task(proto.ping()) + + # send combined command: sv_public + status 1 + await proto.rcon("sv_public\x00status 1") + + metrics = await proto.read_rcon_response() + + # collect ping (it may finish before or after metrics) + try: + metrics.ping = await asyncio.wait_for(ping_task, timeout) + except asyncio.TimeoutError: + metrics.ping = 0.0 + log.debug("[%s] ping timed out on attempt %d", server.name, attempt) + + log.debug("[%s] scrape OK on attempt %d", server.name, attempt) + return metrics + + except (OSError, asyncio.TimeoutError, ParseError) as exc: + last_exc = exc + log.warning( + "[%s] scrape attempt %d/%d failed: %s", + server.name, attempt, retries, exc, + ) + + finally: + if transport is not None: + transport.close() + + raise RconError( + f"[{server.name}] all {retries} scrape attempts failed" + ) from last_exc + + +async def scrape_match( + server: ServerConfig, + retries: int = 3, + timeout: float = 5.0, +) -> dict: + """ + Scrape match metadata: timelimit, fraglimit, teamplay. + Returns a plain dict with keys: timelimit, fraglimit, teamplay. + """ + import re + _CVAR_RE = re.compile(rb'(\w+) is "(\d+)') + + last_exc: Exception = RconError("no attempts made") + + for attempt in range(1, retries + 1): + transport = None + try: + loop = asyncio.get_running_loop() + proto_factory = lambda: _XonoticProtocol( + password=server.rcon_password, + rcon_mode=server.rcon_mode, + timeout=timeout, + ) + transport, proto = await loop.create_datagram_endpoint( + proto_factory, + remote_addr=(server.host, server.port), + ) + + await proto.rcon("timelimit\x00fraglimit\x00teamplay") + + # collect response chunks + result = {} + deadline = asyncio.get_running_loop().time() + timeout + while asyncio.get_running_loop().time() < deadline: + try: + chunk = await asyncio.wait_for( + proto._rcon_queue.get(), 1.0 + ) + clean = re.sub(rb'\^\d', b'', chunk) + for m in _CVAR_RE.finditer(clean): + key = m.group(1).decode("utf-8", "replace") + val = int(m.group(2)) + result[key] = val + if len(result) >= 3: + break + except asyncio.TimeoutError: + break + + return result + + except (OSError, asyncio.TimeoutError) as exc: + last_exc = exc + log.warning("[%s] match scrape attempt %d/%d failed: %s", + server.name, attempt, retries, exc) + finally: + if transport is not None: + transport.close() + + raise RconError(f"[{server.name}] match scrape failed") from last_exc diff --git a/xonotic_exporter/server.py b/xonotic_exporter/server.py new file mode 100644 index 0000000..b4df4b7 --- /dev/null +++ b/xonotic_exporter/server.py @@ -0,0 +1,343 @@ +""" +Minimal async HTTP server for the exporter. + +Uses only the standard library (http.server + asyncio streams) — no aiohttp +dependency. Endpoints: + + GET / → HTML index listing configured servers + GET /metrics?target= → Prometheus exposition for server + GET /geo?target= → Prometheus player geolocation metrics for server + GET /match?target= → Current Match Data + POST /-/reload → Reload config from disk (SIGHUP also works) + GET /-/healthy → 200 OK liveness probe +""" + +from __future__ import annotations + +import asyncio +import logging +import signal +import sys +from typing import Optional +from urllib.parse import parse_qs, urlparse + +from .config import ExporterConfig, load_config, ConfigError +from .geoip import GeoIPCache +from .prometheus import CONTENT_TYPE, build_registry, build_player_geo_registry, build_match_registry +from .rcon import RconError, scrape_server, scrape_match + +log = logging.getLogger(__name__) + +_INDEX_HTML = """\ + + +Xonotic Exporter + + + +

Xonotic Prometheus Exporter

+

Configured servers:

+
    +{items} +
+

/-/healthy   /-/reload (POST)

+ +""" + + +# ── Request / Response helpers ───────────────────────────────────────────────── + +class _Request: + def __init__(self, method: str, path: str, query: dict, body: bytes) -> None: + self.method = method + self.path = path + self.query = query + self.body = body + + def qparam(self, key: str) -> Optional[str]: + vals = self.query.get(key) + return vals[0] if vals else None + + +def _response( + writer: asyncio.StreamWriter, + status: int, + content_type: str, + body: bytes | str, +) -> None: + if isinstance(body, str): + body = body.encode() + status_text = { + 200: "OK", + 400: "Bad Request", + 404: "Not Found", + 500: "Internal Server Error", + }.get(status, "Unknown") + header = ( + f"HTTP/1.1 {status} {status_text}\r\n" + f"Content-Type: {content_type}\r\n" + f"Content-Length: {len(body)}\r\n" + f"Connection: close\r\n" + f"\r\n" + ).encode() + writer.write(header + body) + + +# ── Main server class ────────────────────────────────────────────────────────── + +class XonoticExporterServer: + + def __init__(self, config: ExporterConfig, config_path: Optional[str] = None) -> None: + self._config = config + self._config_path = config_path + self._server: Optional[asyncio.Server] = None + # shared GeoIP cache — lives for the lifetime of the process + # so cached results survive across Prometheus scrapes + self._geo_cache = GeoIPCache() + + # ── Public API ───────────────────────────────────────────────────────────── + + async def start(self) -> None: + host = self._config.host + port = self._config.port + self._server = await asyncio.start_server( + self._handle_connection, host, port + ) + log.info("Xonotic exporter listening on http://%s:%d", host, port) + + async def serve_forever(self) -> None: + if self._server is None: + await self.start() + async with self._server: + await self._server.serve_forever() + + def reload(self) -> bool: + """Reload configuration from disk. Returns True on success.""" + if not self._config_path: + log.warning("No config path set — cannot reload") + return False + try: + new_cfg = load_config(self._config_path) + self._config = new_cfg + log.info("Configuration reloaded from %s", self._config_path) + return True + except ConfigError as exc: + log.error("Config reload failed: %s", exc) + return False + + # ── Connection handler ───────────────────────────────────────────────────── + + async def _handle_connection( + self, + reader: asyncio.StreamReader, + writer: asyncio.StreamWriter, + ) -> None: + try: + req = await self._parse_request(reader) + if req is None: + writer.close() + return + await self._dispatch(req, writer) + await writer.drain() + except Exception as exc: + log.debug("Connection error: %s", exc) + finally: + writer.close() + try: + await writer.wait_closed() + except Exception: + pass + + async def _parse_request(self, reader: asyncio.StreamReader) -> Optional[_Request]: + try: + request_line = await asyncio.wait_for(reader.readline(), 10.0) + except asyncio.TimeoutError: + return None + + if not request_line: + return None + + parts = request_line.decode(errors="replace").strip().split() + if len(parts) < 2: + return None + + method, raw_path = parts[0].upper(), parts[1] + + # read headers + headers: dict[str, str] = {} + while True: + line = await reader.readline() + stripped = line.strip() + if not stripped: + break + if b":" in stripped: + k, _, v = stripped.partition(b":") + headers[k.strip().lower().decode()] = v.strip().decode() + + # read body if present + content_length = int(headers.get("content-length", 0)) + body = b"" + if content_length > 0: + body = await reader.read(min(content_length, 8192)) + + parsed = urlparse(raw_path) + query = parse_qs(parsed.query) + return _Request(method, parsed.path, query, body) + + # ── Router ───────────────────────────────────────────────────────────────── + + async def _dispatch(self, req: _Request, writer: asyncio.StreamWriter) -> None: + if req.path == "/" and req.method == "GET": + self._handle_index(writer) + elif req.path == "/metrics" and req.method == "GET": + await self._handle_metrics(req, writer) + elif req.path == "/geo" and req.method == "GET": + await self._handle_geo(req, writer) + elif req.path == "/-/reload" and req.method == "POST": + self._handle_reload(writer) + elif req.path == "/-/healthy" and req.method == "GET": + _response(writer, 200, "text/plain", "OK\n") + elif req.path == "/match" and req.method == "GET": + await self._handle_match(req, writer) + else: + _response(writer, 404, "text/plain", "Not found\n") + + # ── Handlers ─────────────────────────────────────────────────────────────── + + def _handle_index(self, writer: asyncio.StreamWriter) -> None: + names = self._config.server_names() + items = "\n".join( + f'
  • ' + f'{n} metrics   ' + f'{n} geo' + f'
  • ' + for n in names + ) + _response(writer, 200, "text/html; charset=utf-8", _INDEX_HTML.format(items=items)) + + async def _handle_metrics(self, req: _Request, writer: asyncio.StreamWriter) -> None: + target = req.qparam("target") + if not target: + _response(writer, 400, "text/plain", "'target' query parameter required\n") + return + + server = self._config.get_server(target) + if server is None: + _response(writer, 400, "text/plain", + f"Unknown target: {target!r}. Known: {self._config.server_names()}\n") + return + + try: + metrics = await scrape_server(server) + _, raw = build_registry(server.name, metrics, up=True) + except (RconError, OSError) as exc: + log.error("[%s] scrape failed: %s", target, exc) + _, raw = build_registry(server.name, None, up=False) + + _response(writer, 200, CONTENT_TYPE, raw) + + async def _handle_geo(self, req: _Request, writer: asyncio.StreamWriter) -> None: + target = req.qparam("target") + if not target: + _response(writer, 400, "text/plain", "'target' query parameter required\n") + return + + server = self._config.get_server(target) + if server is None: + _response(writer, 400, "text/plain", + f"Unknown target: {target!r}. Known: {self._config.server_names()}\n") + return + + try: + metrics = await scrape_server(server) + except (RconError, OSError) as exc: + log.error("[%s] scrape failed for geo endpoint: %s", target, exc) + _response(writer, 200, CONTENT_TYPE, b"") + return + + # collect public IPs from non-bot players only + public_ips = [ + p.ip for p in metrics.players + if not p.is_bot + ] + + # resolve via cache — only misses hit ip-api.com + geo_results = await self._geo_cache.lookup(public_ips) + log.debug( + "[%s] geo: %d players, %d public IPs, %d resolved, cache size %d", + target, + len(metrics.players), + len(public_ips), + len(geo_results), + self._geo_cache.cache_size(), + ) + + try: + raw = build_player_geo_registry(server.name, metrics.players, geo_results) + except Exception as exc: + log.error("[%s] geo registry build failed: %s", target, exc, exc_info=True) + _response(writer, 200, CONTENT_TYPE, b"") + return + _response(writer, 200, CONTENT_TYPE, raw) + + def _handle_reload(self, writer: asyncio.StreamWriter) -> None: + ok = self.reload() + if ok: + _response(writer, 200, "text/plain", "Config reloaded\n") + else: + _response(writer, 500, "text/plain", "Config reload failed — check logs\n") + + async def _handle_match(self, req: _Request, writer: asyncio.StreamWriter) -> None: + target = req.qparam("target") + if not target: + _response(writer, 400, "text/plain", "'target' query parameter required\n") + return + + server = self._config.get_server(target) + if server is None: + _response(writer, 400, "text/plain", + f"Unknown target: {target!r}\n") + return + + try: + metrics, match_meta = await asyncio.gather( + scrape_server(server), + scrape_match(server), + ) + except (RconError, OSError) as exc: + log.error("[%s] match scrape failed: %s", target, exc) + _response(writer, 200, CONTENT_TYPE, b"") + return + + try: + raw = build_match_registry(server.name, metrics, match_meta) + except Exception as exc: + log.error("[%s] match registry build failed: %s", target, exc, exc_info=True) + _response(writer, 200, CONTENT_TYPE, b"") + return + + _response(writer, 200, CONTENT_TYPE, raw) + +# ── Entrypoint used by cli.py ────────────────────────────────────────────────── + +def run_server(config: ExporterConfig, config_path: Optional[str] = None) -> None: + """Blocking call — runs until SIGINT/SIGTERM.""" + server = XonoticExporterServer(config, config_path) + + async def _run() -> None: + await server.start() + + loop = asyncio.get_running_loop() + + # SIGHUP → reload config (Unix only) + if hasattr(signal, "SIGHUP") and sys.platform != "win32": + loop.add_signal_handler(signal.SIGHUP, server.reload) + log.info("Send SIGHUP to reload configuration") + + await server.serve_forever() + + try: + asyncio.run(_run()) + except KeyboardInterrupt: + log.info("Interrupted — shutting down")