feat: new cluster forming protocoll

This commit is contained in:
2025-10-19 12:51:31 +02:00
parent b6b55c0a6f
commit d166b0b634
3 changed files with 149 additions and 139 deletions

View File

@@ -49,7 +49,7 @@ The backend now includes automatic UDP discovery for SPORE nodes on the network.
### 🚀 How It Works
1. **UDP Server**: The backend listens on port 4210 for UDP messages
2. **Discovery Message**: Nodes send `CLUSTER_DISCOVERY` messages to broadcast address `255.255.255.255:4210`
2. **Heartbeat Message**: Nodes send `CLUSTER_HEARTBEAT` messages to broadcast address `255.255.255.255:4210`
3. **Auto Configuration**: When a discovery message is received, the source IP is automatically used to configure the SporeApiClient
4. **Dynamic Updates**: The system automatically switches to the most recently seen node as the primary connection
5. **Health Monitoring**: Continuous monitoring of node availability with automatic failover
@@ -57,7 +57,7 @@ The backend now includes automatic UDP discovery for SPORE nodes on the network.
### 📡 Discovery Protocol
- **Port**: 4210 (configurable via `UDP_PORT` constant)
- **Message**: `CLUSTER_DISCOVERY` (configurable via `DISCOVERY_MESSAGE` constant)
- **Message**: `CLUSTER_HEARTBEAT` (configurable via `HEARTBEAT_MESSAGE` constant)
- **Broadcast**: `255.255.255.255:4210`
- **Protocol**: UDP broadcast listening
- **Auto-binding**: Automatically binds to the specified port on startup
@@ -72,27 +72,27 @@ npm start
# The server will automatically:
# - Start HTTP server on port 3001
# - Start UDP discovery server on port 4210
# - Wait for CLUSTER_DISCOVERY messages
# - Wait for CLUSTER_HEARTBEAT messages
```
#### Node Configuration
SPORE nodes should send discovery messages periodically:
SPORE nodes should send heartbeat messages periodically:
```bash
# Recommended: Send every 30-60 seconds
# Message format: "CLUSTER_DISCOVERY"
# Message format: "CLUSTER_HEARTBEAT:hostname"
# Target: 255.255.255.255:4210
```
### 🌐 Discovery Endpoints
### 🌐 Cluster Endpoints
#### Discovery Management
- `GET /api/discovery/nodes` - View all discovered nodes and current status
- `POST /api/discovery/refresh` - Manually trigger discovery refresh
#### Cluster Management
- `GET /api/discovery/nodes` - View all cluster nodes and current status
- `POST /api/discovery/refresh` - Manually trigger cluster refresh
- `POST /api/discovery/primary/:ip` - Manually set a specific node as primary
- `POST /api/discovery/random-primary` - Randomly select a new primary node
#### Health Monitoring
- `GET /api/health` - Comprehensive health check including discovery status
- `GET /api/health` - Comprehensive health check including cluster status
### 🧪 Testing & Development

250
index.js
View File

@@ -41,12 +41,11 @@ app.use(cors({
allowedHeaders: ['Content-Type', 'Authorization']
}));
// UDP discovery configuration
// UDP heartbeat-only configuration
const UDP_PORT = 4210;
const DISCOVERY_MESSAGE = 'CLUSTER_DISCOVERY';
const STALE_THRESHOLD_SECONDS = 3; // 3 seconds for faster detection
const STALE_THRESHOLD_SECONDS = 8; // 8 seconds to accommodate 5-second heartbeat interval
// Initialize UDP server for auto discovery
// Initialize UDP server for heartbeat-based cluster management
const udpServer = dgram.createSocket('udp4');
// Store discovered nodes and their IPs
@@ -69,106 +68,15 @@ udpServer.on('message', (msg, rinfo) => {
const sourceIp = rinfo.address;
const sourcePort = rinfo.port;
// Only log non-discovery messages to reduce noise
if (message !== DISCOVERY_MESSAGE) {
console.log(`📨 UDP message received from ${sourceIp}:${sourcePort}: "${message}"`);
}
console.log(`📨 UDP message received from ${sourceIp}:${sourcePort}: "${message}"`);
if (message === DISCOVERY_MESSAGE) {
//console.log(`Received CLUSTER_DISCOVERY from ${sourceIp}:${sourcePort}`);
// Store the discovered node
const now = Math.floor(Date.now() / 1000);
const nodeInfo = {
ip: sourceIp,
port: sourcePort,
status: 'active', // New nodes from discovery are active
discoveredAt: now,
lastSeen: now
};
const isNewNode = !discoveredNodes.has(sourceIp);
discoveredNodes.set(sourceIp, nodeInfo);
// Set as primary node if this is the first one or if we don't have one
if (!primaryNodeIp) {
primaryNodeIp = sourceIp;
console.log(`Set primary node to ${sourceIp}`);
// Immediately try to initialize the client
updateSporeClient();
}
// Update last seen timestamp
discoveredNodes.get(sourceIp).lastSeen = Math.floor(Date.now() / 1000);
// Broadcast discovery event if this is a new node
if (isNewNode) {
console.log(`🆕 NEW NODE DISCOVERED: ${sourceIp}:${sourcePort} via CLUSTER_DISCOVERY. Total nodes: ${discoveredNodes.size}`);
broadcastNodeDiscovery(sourceIp, 'discovered');
// Broadcast cluster update after a short delay to allow member data to be fetched
setTimeout(() => broadcastMemberListChange('new discovery'), 1000);
}
} else if (message.startsWith('CLUSTER_HEARTBEAT:')) {
// Handle heartbeat messages that also update member list
if (message.startsWith('CLUSTER_HEARTBEAT:')) {
// Handle heartbeat messages that update member list
const hostname = message.substring('CLUSTER_HEARTBEAT:'.length);
// Update or create node entry from heartbeat
const existingNode = discoveredNodes.get(sourceIp);
const now = Math.floor(Date.now() / 1000); // Use Unix timestamp for consistency
if (existingNode) {
// Update existing node
const wasStale = existingNode.status === 'inactive';
const oldHostname = existingNode.hostname;
existingNode.lastSeen = now;
existingNode.hostname = hostname;
existingNode.status = 'active'; // Mark as active when heartbeat received
logger.debug(`💓 Heartbeat from ${sourceIp}:${sourcePort} (${hostname}). Total nodes: ${discoveredNodes.size}`);
// Check if hostname changed
const hostnameChanged = oldHostname !== hostname;
if (hostnameChanged) {
console.log(`🔄 Hostname updated for ${sourceIp}: "${oldHostname}" -> "${hostname}"`);
}
// ALWAYS broadcast every heartbeat for immediate UI updates
// This ensures the UI gets real-time updates without delay
const reason = wasStale ? 'node became active' :
hostnameChanged ? 'hostname update' :
'active heartbeat';
logger.debug(`📡 Broadcasting heartbeat update: ${reason}`);
broadcastMemberListChange(reason);
} else {
// Create new node entry from heartbeat - NEW NODE DISCOVERED
const nodeInfo = {
ip: sourceIp,
port: sourcePort,
hostname: hostname,
status: 'active', // New nodes from heartbeat are active
discoveredAt: now,
lastSeen: now
};
discoveredNodes.set(sourceIp, nodeInfo);
console.log(`🆕 NEW NODE DISCOVERED: ${sourceIp}:${sourcePort} (${hostname}) via heartbeat. Total nodes: ${discoveredNodes.size}`);
// Set as primary node if this is the first one or if we don't have one
if (!primaryNodeIp) {
primaryNodeIp = sourceIp;
console.log(`Set primary node to ${sourceIp} (from heartbeat)`);
// Immediately try to initialize the client
updateSporeClient();
}
// Broadcast discovery event for new node from heartbeat
broadcastNodeDiscovery(sourceIp, 'discovered');
// Broadcast cluster update after a short delay to allow member data to be fetched
setTimeout(() => broadcastMemberListChange('new heartbeat discovery'), 1000);
}
updateNodeFromHeartbeat(sourceIp, sourcePort, hostname);
} else if (message.startsWith('NODE_UPDATE:')) {
// Handle node update messages that provide detailed node info
handleNodeUpdate(sourceIp, message);
} else if (!message.startsWith('RAW:')) {
console.log(`Received unknown message from ${sourceIp}:${sourcePort}: "${message}"`);
}
@@ -179,12 +87,12 @@ udpServer.on('message', (msg, rinfo) => {
udpServer.on('listening', () => {
const address = udpServer.address();
console.log(`UDP discovery server listening on ${address.address}:${address.port}`);
console.log(`UDP heartbeat server listening on ${address.address}:${address.port}`);
});
// Bind UDP server to listen for discovery messages
// Bind UDP server to listen for heartbeat messages
udpServer.bind(UDP_PORT, () => {
console.log(`UDP discovery server bound to port ${UDP_PORT}`);
console.log(`UDP heartbeat server bound to port ${UDP_PORT}`);
});
// Initialize the SPORE API client with dynamic IP
@@ -358,6 +266,109 @@ async function performWithFailover(operation) {
throw lastError || new Error('All discovered nodes failed');
}
// Function to update node from heartbeat message
function updateNodeFromHeartbeat(sourceIp, sourcePort, hostname) {
const existingNode = discoveredNodes.get(sourceIp);
const now = Math.floor(Date.now() / 1000);
if (existingNode) {
// Update existing node
const wasStale = existingNode.status === 'inactive';
const oldHostname = existingNode.hostname;
existingNode.lastSeen = now;
existingNode.hostname = hostname;
existingNode.status = 'active'; // Mark as active when heartbeat received
logger.debug(`💓 Heartbeat from ${sourceIp}:${sourcePort} (${hostname}). Total nodes: ${discoveredNodes.size}`);
// Check if hostname changed
const hostnameChanged = oldHostname !== hostname;
if (hostnameChanged) {
console.log(`🔄 Hostname updated for ${sourceIp}: "${oldHostname}" -> "${hostname}"`);
}
// Broadcast heartbeat update for immediate UI updates
const reason = wasStale ? 'node became active' :
hostnameChanged ? 'hostname update' :
'active heartbeat';
logger.debug(`📡 Broadcasting heartbeat update: ${reason}`);
broadcastMemberListChange(reason);
} else {
// Create new node entry from heartbeat - NEW NODE DISCOVERED
const nodeInfo = {
ip: sourceIp,
port: sourcePort,
hostname: hostname,
status: 'active',
discoveredAt: now,
lastSeen: now
};
discoveredNodes.set(sourceIp, nodeInfo);
console.log(`🆕 NEW NODE DISCOVERED: ${sourceIp}:${sourcePort} (${hostname}) via heartbeat. Total nodes: ${discoveredNodes.size}`);
// Set as primary node if this is the first one or if we don't have one
if (!primaryNodeIp) {
primaryNodeIp = sourceIp;
console.log(`Set primary node to ${sourceIp} (from heartbeat)`);
updateSporeClient();
}
// Broadcast discovery event for new node from heartbeat
broadcastNodeDiscovery(sourceIp, 'discovered');
// Broadcast cluster update after a short delay to allow member data to be fetched
setTimeout(() => broadcastMemberListChange('new heartbeat discovery'), 1000);
}
}
// Function to handle NODE_UPDATE messages
function handleNodeUpdate(sourceIp, message) {
// Message format: "NODE_UPDATE:hostname:{json}"
const parts = message.split(':');
if (parts.length < 3) {
console.warn(`Invalid NODE_UPDATE message format: ${message}`);
return;
}
const hostname = parts[1];
const jsonData = parts.slice(2).join(':');
try {
const nodeData = JSON.parse(jsonData);
// Update the specific node with the new information
const existingNode = discoveredNodes.get(sourceIp);
if (existingNode) {
// Update hostname if provided
if (nodeData.hostname) {
existingNode.hostname = nodeData.hostname;
}
// Update uptime if provided
if (nodeData.uptime) {
existingNode.uptime = nodeData.uptime;
}
// Update labels if provided
if (nodeData.labels) {
existingNode.labels = nodeData.labels;
}
existingNode.lastSeen = Math.floor(Date.now() / 1000);
existingNode.status = 'active';
console.log(`🔄 Updated node ${sourceIp} (${hostname}) from NODE_UPDATE`);
broadcastMemberListChange('node update');
} else {
console.warn(`Received NODE_UPDATE for unknown node: ${sourceIp} (${hostname})`);
}
} catch (error) {
console.error(`Error parsing NODE_UPDATE JSON: ${error.message}`);
}
}
// Set up periodic tasks
setInterval(() => {
markStaleNodes();
@@ -374,7 +385,7 @@ app.get('/', (req, res) => {
res.sendFile(path.join(__dirname, 'public', 'index.html'));
});
// API endpoint to get discovered nodes
// API endpoint to get cluster nodes (heartbeat-based)
app.get('/api/discovery/nodes', (req, res) => {
const nodes = Array.from(discoveredNodes.values()).map(node => ({
...node,
@@ -389,15 +400,14 @@ app.get('/api/discovery/nodes', (req, res) => {
nodes: nodes,
clientInitialized: !!sporeClient,
clientBaseUrl: sporeClient ? sporeClient.baseUrl : null,
discoveryStatus: {
clusterStatus: {
udpPort: UDP_PORT,
discoveryMessage: DISCOVERY_MESSAGE,
serverRunning: udpServer.listening
}
});
});
// API endpoint to manually trigger discovery refresh
// API endpoint to manually trigger cluster refresh
app.post('/api/discovery/refresh', (req, res) => {
try {
// Mark stale nodes as inactive
@@ -411,15 +421,15 @@ app.post('/api/discovery/refresh', (req, res) => {
res.json({
success: true,
message: 'Discovery refresh completed',
message: 'Cluster refresh completed',
primaryNode: primaryNodeIp,
totalNodes: discoveredNodes.size,
clientInitialized: !!sporeClient
});
} catch (error) {
console.error('Error during discovery refresh:', error);
console.error('Error during cluster refresh:', error);
res.status(500).json({
error: 'Discovery refresh failed',
error: 'Cluster refresh failed',
message: error.message
});
}
@@ -526,7 +536,7 @@ app.get('/api/cluster/members', async (req, res) => {
if (discoveredNodes.size === 0) {
return res.status(503).json({
error: 'Service unavailable',
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_HEARTBEAT messages...',
discoveredNodes: Array.from(discoveredNodes.keys())
});
}
@@ -564,7 +574,7 @@ app.get('/api/tasks/status', async (req, res) => {
if (discoveredNodes.size === 0) {
return res.status(503).json({
error: 'Service unavailable',
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_HEARTBEAT messages...',
discoveredNodes: Array.from(discoveredNodes.keys())
});
}
@@ -624,7 +634,7 @@ app.get('/api/node/endpoints', async (req, res) => {
if (discoveredNodes.size === 0) {
return res.status(503).json({
error: 'Service unavailable',
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_HEARTBEAT messages...',
discoveredNodes: Array.from(discoveredNodes.keys())
});
}
@@ -867,7 +877,7 @@ app.get('/api/health', (req, res) => {
udp: udpServer.listening,
sporeClient: !!sporeClient
},
discovery: {
cluster: {
totalNodes: discoveredNodes.size,
primaryNode: primaryNodeIp,
udpPort: UDP_PORT,
@@ -1106,20 +1116,20 @@ function initializeWebSocketServer(httpServer) {
const server = app.listen(PORT, '0.0.0.0', () => {
console.log(`Server is running on http://0.0.0.0:${PORT}`);
console.log(`Accessible from: http://YOUR_COMPUTER_IP:${PORT}`);
console.log(`UDP discovery server listening on port ${UDP_PORT}`);
console.log(`UDP heartbeat server listening on port ${UDP_PORT}`);
// Initialize WebSocket server after HTTP server is running
initializeWebSocketServer(server);
console.log('WebSocket server ready for real-time updates');
console.log('Waiting for CLUSTER_DISCOVERY messages from SPORE nodes...');
console.log('Waiting for CLUSTER_HEARTBEAT and NODE_UPDATE messages from SPORE nodes...');
});
// Graceful shutdown handling
process.on('SIGINT', () => {
console.log('\nReceived SIGINT. Shutting down gracefully...');
udpServer.close(() => {
console.log('UDP server closed.');
console.log('UDP heartbeat server closed.');
});
server.close(() => {
console.log('HTTP server closed.');
@@ -1130,7 +1140,7 @@ process.on('SIGINT', () => {
process.on('SIGTERM', () => {
console.log('\nReceived SIGTERM. Shutting down gracefully...');
udpServer.close(() => {
console.log('UDP server closed.');
console.log('UDP heartbeat server closed.');
});
server.close(() => {
console.log('HTTP server closed.');

View File

@@ -23,8 +23,8 @@ async function main() {
await runExamples(client);
} else {
console.log('❌ No nodes discovered yet.');
console.log('💡 Start the backend server and send CLUSTER_DISCOVERY messages');
console.log('💡 Use: npm run test-discovery broadcast');
console.log('💡 Start the backend server and send CLUSTER_HEARTBEAT messages');
console.log('💡 Use: npm run test-heartbeat broadcast');
return;
}
} catch (error) {