mirror of
https://github.com/exo-explore/exo.git
synced 2025-10-23 02:57:14 +03:00
initial setup of manual networking config
This commit is contained in:
0
exo/networking/manual/__init__.py
Normal file
0
exo/networking/manual/__init__.py
Normal file
74
exo/networking/manual/manual_discovery.py
Normal file
74
exo/networking/manual/manual_discovery.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import asyncio
|
||||
from exo.networking.discovery import Discovery
|
||||
from typing import Dict, List, Callable
|
||||
|
||||
from exo.topology.device_capabilities import DeviceCapabilities
|
||||
from exo.networking.manual.network_topology_config import NetworkTopology, PeerConfig
|
||||
from exo.helpers import DEBUG_DISCOVERY
|
||||
from exo.networking.peer_handle import PeerHandle
|
||||
|
||||
|
||||
class ManualDiscovery(Discovery):
|
||||
def __init__(
|
||||
self,
|
||||
network_config_path: str,
|
||||
node_id: str,
|
||||
create_peer_handle: Callable[[str, str, DeviceCapabilities], PeerHandle],
|
||||
discovery_timeout: int = 30,
|
||||
):
|
||||
self.topology = NetworkTopology.from_path(network_config_path)
|
||||
self.node_id = node_id
|
||||
self.create_peer_handle = create_peer_handle
|
||||
self.discovery_timeout = discovery_timeout
|
||||
|
||||
try:
|
||||
self.node = self.topology.peers[node_id]
|
||||
except KeyError as e:
|
||||
print(f"Node ID {node_id} not found in network config file {network_config_path}. Please run with `node_id` set to one of the keys in the config file: {[k for k, _ in self.topology.peers]}")
|
||||
raise e
|
||||
|
||||
self.node_port = self.node.port
|
||||
|
||||
self.listen_task = None
|
||||
self.cleanup_task = None
|
||||
|
||||
self.known_peers: Dict[str, PeerHandle] = {}
|
||||
self.peers_in_network: Dict[str, PeerConfig] = self.topology.peers
|
||||
self.node_config = self.peers_in_network.pop(node_id)
|
||||
|
||||
async def start(self) -> None:
|
||||
self.listen_task = asyncio.create_task(self.task_find_peers_from_config())
|
||||
|
||||
async def stop(self) -> None:
|
||||
if self.listen_task:
|
||||
self.listen_task.cancel()
|
||||
|
||||
async def discover_peers(self, wait_for_peers: int = 0) -> List[PeerHandle]:
|
||||
if DEBUG_DISCOVERY >= 2: print("Starting discovery...")
|
||||
if wait_for_peers > 0:
|
||||
while len(self.known_peers) < wait_for_peers:
|
||||
if DEBUG_DISCOVERY >= 2: print(f"Current peers: {len(self.known_peers)}/{wait_for_peers}. Waiting for more peers...")
|
||||
await asyncio.sleep(0.1)
|
||||
return list(self.known_peers.values())
|
||||
|
||||
async def task_find_peers_from_config(self):
|
||||
if DEBUG_DISCOVERY >= 2: print("Starting task to find peers from config...")
|
||||
while True:
|
||||
for peer_id, peer_config in self.peers_in_network.items():
|
||||
if DEBUG_DISCOVERY >= 2: print(f"Checking peer {peer_id=} at {peer_config.address}:{peer_config.port}")
|
||||
peer = self.known_peers.get(peer_id)
|
||||
if not peer:
|
||||
if DEBUG_DISCOVERY >= 2: print(f"{peer_id=} not found in known peers. Adding.")
|
||||
new_peer_handle = self.create_peer_handle(peer_id, f"{peer_config.address}:{peer_config.port}", peer_config.device_capabilities)
|
||||
self.known_peers[peer_id] = new_peer_handle
|
||||
peer = new_peer_handle
|
||||
|
||||
is_healthy = await peer.health_check()
|
||||
if not is_healthy:
|
||||
if DEBUG_DISCOVERY >= 2: print(f"{peer_id=} at {peer_config.address}:{peer_config.port} is not healthy. Removing.")
|
||||
del self.known_peers[peer_id]
|
||||
elif DEBUG_DISCOVERY >= 2: print(f"{peer_id=} at {peer_config.address}:{peer_config.port} is healthy.")
|
||||
|
||||
if DEBUG_DISCOVERY >= 2: print(f"Current known peers: {[peer.id() for peer in self.known_peers.values()]}")
|
||||
|
||||
await asyncio.sleep(self.discovery_timeout)
|
||||
48
exo/networking/manual/network_topology_config.py
Normal file
48
exo/networking/manual/network_topology_config.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from typing import Dict
|
||||
from dataclasses import dataclass
|
||||
|
||||
import json
|
||||
|
||||
from exo.topology.device_capabilities import DeviceCapabilities
|
||||
|
||||
|
||||
@dataclass
|
||||
class PeerConfig:
|
||||
address: str
|
||||
port: int
|
||||
device_capabilities: DeviceCapabilities
|
||||
|
||||
|
||||
@dataclass
|
||||
class NetworkTopology:
|
||||
"""Configuration of the network. A collection outlining all nodes in the network, including the node this is running from."""
|
||||
|
||||
peers: Dict[str, PeerConfig]
|
||||
"""
|
||||
node_id to PeerConfig. The node_id is used to identify the peer in the discovery process. The node that this is running from should be included in this dict.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def from_path(cls, path: str) -> "NetworkTopology":
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
config = json.load(f)
|
||||
except FileNotFoundError:
|
||||
raise FileNotFoundError(f"Config file not found at {path}")
|
||||
except json.JSONDecodeError as e:
|
||||
raise json.JSONDecodeError(f"Error decoding JSON data from {path}: {e}", e.doc, e.pos)
|
||||
|
||||
try:
|
||||
peers = {}
|
||||
for node_id, peer_data in config["peers"].items():
|
||||
device_capabilities = DeviceCapabilities(**peer_data["device_capabilities"])
|
||||
peer_config = PeerConfig(address=peer_data["address"], port=peer_data["port"], device_capabilities=device_capabilities)
|
||||
peers[node_id] = peer_config
|
||||
|
||||
networking_config = cls(peers=peers)
|
||||
except KeyError as e:
|
||||
raise KeyError(f"Missing required key in config file: {e}")
|
||||
except TypeError as e:
|
||||
raise TypeError(f"Error parsing networking config from {path}: {e}")
|
||||
|
||||
return networking_config
|
||||
Reference in New Issue
Block a user