fix: primary node failover

This commit is contained in:
2025-08-29 19:06:12 +02:00
parent d7c70cf636
commit ae061bbbc9
3 changed files with 72 additions and 18 deletions

View File

@@ -195,6 +195,49 @@ function updateSporeClient() {
} }
} }
// Helper: perform an operation against the current primary, failing over to other discovered nodes if needed
async function performWithFailover(operation) {
// Build candidate list: current primary first, then others by most recently seen
const candidateIps = [];
if (primaryNodeIp && discoveredNodes.has(primaryNodeIp)) {
candidateIps.push(primaryNodeIp);
}
const others = Array.from(discoveredNodes.values())
.filter(n => n.ip !== primaryNodeIp)
.sort((a, b) => b.lastSeen - a.lastSeen)
.map(n => n.ip);
candidateIps.push(...others);
if (candidateIps.length === 0) {
throw new Error('No SPORE nodes discovered');
}
let lastError = null;
for (const ip of candidateIps) {
try {
const client = (sporeClient && ip === primaryNodeIp)
? sporeClient
: initializeSporeClient(ip);
if (!client) {
throw new Error(`Failed to initialize client for ${ip}`);
}
const result = await operation(client, ip);
if (ip !== primaryNodeIp) {
primaryNodeIp = ip;
sporeClient = client;
console.log(`Failover: switched primary node to ${ip}`);
}
return result;
} catch (err) {
console.warn(`Primary attempt on ${ip} failed: ${err.message}`);
lastError = err;
continue;
}
}
throw lastError || new Error('All discovered nodes failed');
}
// Set up periodic tasks // Set up periodic tasks
setInterval(() => { setInterval(() => {
cleanupStaleNodes(); cleanupStaleNodes();
@@ -335,19 +378,19 @@ app.post('/api/discovery/primary/:ip', (req, res) => {
// API endpoint to get cluster members // API endpoint to get cluster members
app.get('/api/cluster/members', async (req, res) => { app.get('/api/cluster/members', async (req, res) => {
try { try {
if (!sporeClient) { if (discoveredNodes.size === 0) {
return res.status(503).json({ return res.status(503).json({
error: 'Service unavailable', error: 'Service unavailable',
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...', message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
discoveredNodes: Array.from(discoveredNodes.keys()) discoveredNodes: Array.from(discoveredNodes.keys())
}); });
} }
const members = await sporeClient.getClusterStatus(); const members = await performWithFailover((client) => client.getClusterStatus());
res.json(members); res.json(members);
} catch (error) { } catch (error) {
console.error('Error fetching cluster members:', error); console.error('Error fetching cluster members:', error);
res.status(500).json({ res.status(502).json({
error: 'Failed to fetch cluster members', error: 'Failed to fetch cluster members',
message: error.message message: error.message
}); });
@@ -373,19 +416,19 @@ app.get('/api/tasks/status', async (req, res) => {
} }
} }
if (!sporeClient) { if (discoveredNodes.size === 0) {
return res.status(503).json({ return res.status(503).json({
error: 'Service unavailable', error: 'Service unavailable',
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...', message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
discoveredNodes: Array.from(discoveredNodes.keys()) discoveredNodes: Array.from(discoveredNodes.keys())
}); });
} }
const taskStatus = await sporeClient.getTaskStatus(); const taskStatus = await performWithFailover((client) => client.getTaskStatus());
res.json(taskStatus); res.json(taskStatus);
} catch (error) { } catch (error) {
console.error('Error fetching task status:', error); console.error('Error fetching task status:', error);
res.status(500).json({ res.status(502).json({
error: 'Failed to fetch task status', error: 'Failed to fetch task status',
message: error.message message: error.message
}); });
@@ -395,7 +438,7 @@ app.get('/api/tasks/status', async (req, res) => {
// API endpoint to get system status // API endpoint to get system status
app.get('/api/node/status', async (req, res) => { app.get('/api/node/status', async (req, res) => {
try { try {
if (!sporeClient) { if (discoveredNodes.size === 0) {
return res.status(503).json({ return res.status(503).json({
error: 'Service unavailable', error: 'Service unavailable',
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...', message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
@@ -403,11 +446,11 @@ app.get('/api/node/status', async (req, res) => {
}); });
} }
const systemStatus = await sporeClient.getSystemStatus(); const systemStatus = await performWithFailover((client) => client.getSystemStatus());
res.json(systemStatus); res.json(systemStatus);
} catch (error) { } catch (error) {
console.error('Error fetching system status:', error); console.error('Error fetching system status:', error);
res.status(500).json({ res.status(502).json({
error: 'Failed to fetch system status', error: 'Failed to fetch system status',
message: error.message message: error.message
}); });
@@ -433,7 +476,7 @@ app.get('/api/capabilities', async (req, res) => {
} }
} }
if (!sporeClient) { if (discoveredNodes.size === 0) {
return res.status(503).json({ return res.status(503).json({
error: 'Service unavailable', error: 'Service unavailable',
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...', message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
@@ -441,11 +484,11 @@ app.get('/api/capabilities', async (req, res) => {
}); });
} }
const caps = await sporeClient.getCapabilities(); const caps = await performWithFailover((client) => client.getCapabilities());
return res.json(caps); return res.json(caps);
} catch (error) { } catch (error) {
console.error('Error fetching capabilities:', error); console.error('Error fetching capabilities:', error);
return res.status(500).json({ return res.status(502).json({
error: 'Failed to fetch capabilities', error: 'Failed to fetch capabilities',
message: error.message message: error.message
}); });

View File

@@ -18,6 +18,7 @@ class PrimaryNodeComponent extends Component {
this.subscribeToProperty('primaryNode', this.render.bind(this)); this.subscribeToProperty('primaryNode', this.render.bind(this));
this.subscribeToProperty('clientInitialized', this.render.bind(this)); this.subscribeToProperty('clientInitialized', this.render.bind(this));
this.subscribeToProperty('totalNodes', this.render.bind(this)); this.subscribeToProperty('totalNodes', this.render.bind(this));
this.subscribeToProperty('onlineNodes', this.render.bind(this));
this.subscribeToProperty('error', this.render.bind(this)); this.subscribeToProperty('error', this.render.bind(this));
} }
@@ -25,6 +26,7 @@ class PrimaryNodeComponent extends Component {
const primaryNode = this.viewModel.get('primaryNode'); const primaryNode = this.viewModel.get('primaryNode');
const clientInitialized = this.viewModel.get('clientInitialized'); const clientInitialized = this.viewModel.get('clientInitialized');
const totalNodes = this.viewModel.get('totalNodes'); const totalNodes = this.viewModel.get('totalNodes');
const onlineNodes = this.viewModel.get('onlineNodes');
const error = this.viewModel.get('error'); const error = this.viewModel.get('error');
if (error) { if (error) {
@@ -44,7 +46,9 @@ class PrimaryNodeComponent extends Component {
} }
const status = clientInitialized ? '✅' : '⚠️'; const status = clientInitialized ? '✅' : '⚠️';
const nodeCount = totalNodes > 1 ? ` (${totalNodes} nodes)` : ''; const nodeCount = (onlineNodes && onlineNodes > 0)
? ` (${onlineNodes}/${totalNodes} online)`
: (totalNodes > 1 ? ` (${totalNodes} nodes)` : '');
this.setText('#primary-node-ip', `${status} ${primaryNode}${nodeCount}`); this.setText('#primary-node-ip', `${status} ${primaryNode}${nodeCount}`);
this.setClass('#primary-node-ip', 'error', false); this.setClass('#primary-node-ip', 'error', false);

View File

@@ -13,7 +13,8 @@ class ClusterViewModel extends ViewModel {
error: null, error: null,
expandedCards: new Map(), expandedCards: new Map(),
activeTabs: new Map(), // Store active tab for each node activeTabs: new Map(), // Store active tab for each node
lastUpdateTime: null lastUpdateTime: null,
onlineNodes: 0
}); });
// Initialize cluster status after a short delay to allow components to subscribe // Initialize cluster status after a short delay to allow components to subscribe
@@ -39,10 +40,16 @@ class ClusterViewModel extends ViewModel {
const response = await window.apiClient.getClusterMembers(); const response = await window.apiClient.getClusterMembers();
console.log('ClusterViewModel: Got response:', response); console.log('ClusterViewModel: Got response:', response);
const members = response.members || [];
const onlineNodes = Array.isArray(members)
? members.filter(m => m && m.status === 'active').length
: 0;
// Use batch update to preserve UI state // Use batch update to preserve UI state
this.batchUpdate({ this.batchUpdate({
members: response.members || [], members: members,
lastUpdateTime: new Date().toISOString() lastUpdateTime: new Date().toISOString(),
onlineNodes: onlineNodes
}, { preserveUIState: true }); }, { preserveUIState: true });
// Restore expanded cards and active tabs // Restore expanded cards and active tabs