rip out stats bloat

2025-10-23 02:57:14 +03:00 · 2024-12-14 21:40:14 +00:00
parent cb4615c95d
commit 06c2e236b8
5 changed files with 0 additions and 68 deletions
--- a/exo/main.py
+++ b/exo/main.py
@@ -52,7 +52,6 @@ parser.add_argument("--models-seed-dir", type=str, default=None, help="Model see
 parser.add_argument("--listen-port", type=int, default=5678, help="Listening port for discovery")
 parser.add_argument("--download-quick-check", action="store_true", help="Quick check local path for model shards download")
 parser.add_argument("--max-parallel-downloads", type=int, default=4, help="Max parallel downloads for model shards download")
-parser.add_argument("--prometheus-client-port", type=int, default=None, help="Prometheus client port")
 parser.add_argument("--broadcast-port", type=int, default=5678, help="Broadcast port for discovery")
 parser.add_argument("--discovery-module", type=str, choices=["udp", "tailscale", "manual"], default="udp", help="Discovery module to use")
 parser.add_argument("--discovery-timeout", type=int, default=30, help="Discovery timeout in seconds")
@@ -170,10 +169,6 @@ def preemptively_start_download(request_id: str, opaque_status: str):

 node.on_opaque_status.register("start_download").on_next(preemptively_start_download)

-if args.prometheus_client_port:
-  from exo.stats.metrics import start_metrics_server
-  start_metrics_server(node, args.prometheus_client_port)
-
 last_broadcast_time = 0


--- a/exo/stats/init.py
+++ b/exo/stats/init.py
--- a/exo/stats/docker-compose-stats.yml
+++ b/exo/stats/docker-compose-stats.yml
@@ -1,27 +0,0 @@
-version: '3.8'
-
-services:
-  prometheus:
-    image: prom/prometheus:latest
-    container_name: prometheus
-    volumes:
-      - ./prometheus.yml:/etc/prometheus/prometheus.yml
-    command:
-      - '--config.file=/etc/prometheus/prometheus.yml'
-    ports:
-      - "9090:9090"
-    networks:
-      - monitoring
-
-  grafana:
-    image: grafana/grafana:latest
-    container_name: grafana
-    ports:
-      - "3000:3000"
-    networks:
-      - monitoring
-    depends_on:
-      - prometheus
-
-networks:
-  monitoring:
--- a/exo/stats/metrics.py
+++ b/exo/stats/metrics.py
@@ -1,29 +0,0 @@
-from exo.orchestration import Node
-from prometheus_client import start_http_server, Counter, Histogram
-import json
-
-# Create metrics to track time spent and requests made.
-PROCESS_PROMPT_COUNTER = Counter("process_prompt_total", "Total number of prompts processed", ["node_id"])
-PROCESS_TENSOR_COUNTER = Counter("process_tensor_total", "Total number of tensors processed", ["node_id"])
-PROCESS_TENSOR_TIME = Histogram("process_tensor_seconds", "Time spent processing tensor", ["node_id"])
-
-
-def start_metrics_server(node: Node, port: int):
-  start_http_server(port)
-
-  def _on_opaque_status(request_id, opaque_status: str):
-    status_data = json.loads(opaque_status)
-    _type = status_data.get("type", "")
-    node_id = status_data.get("node_id", "")
-    if _type != "node_status":
-      return
-    status = status_data.get("status", "")
-
-    if status == "end_process_prompt":
-      PROCESS_PROMPT_COUNTER.labels(node_id=node_id).inc()
-    elif status == "end_process_tensor":
-      elapsed_time_ns = status_data.get("elapsed_time_ns", 0)
-      PROCESS_TENSOR_COUNTER.labels(node_id=node_id).inc()
-      PROCESS_TENSOR_TIME.labels(node_id=node_id).observe(elapsed_time_ns/1e9)  # Convert ns to seconds
-
-  node.on_opaque_status.register("stats").on_next(_on_opaque_status)
--- a/exo/stats/prometheus.yml
+++ b/exo/stats/prometheus.yml
@@ -1,7 +0,0 @@
-global:
-  scrape_interval: 15s
-
-scrape_configs:
-  - job_name: 'exo-node'
-    static_configs:
-      - targets: ['host.docker.internal:8005']