fix: primary node failover
This commit is contained in:
71
index.js
71
index.js
@@ -195,6 +195,49 @@ function updateSporeClient() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper: perform an operation against the current primary, failing over to other discovered nodes if needed
|
||||||
|
async function performWithFailover(operation) {
|
||||||
|
// Build candidate list: current primary first, then others by most recently seen
|
||||||
|
const candidateIps = [];
|
||||||
|
if (primaryNodeIp && discoveredNodes.has(primaryNodeIp)) {
|
||||||
|
candidateIps.push(primaryNodeIp);
|
||||||
|
}
|
||||||
|
const others = Array.from(discoveredNodes.values())
|
||||||
|
.filter(n => n.ip !== primaryNodeIp)
|
||||||
|
.sort((a, b) => b.lastSeen - a.lastSeen)
|
||||||
|
.map(n => n.ip);
|
||||||
|
candidateIps.push(...others);
|
||||||
|
|
||||||
|
if (candidateIps.length === 0) {
|
||||||
|
throw new Error('No SPORE nodes discovered');
|
||||||
|
}
|
||||||
|
|
||||||
|
let lastError = null;
|
||||||
|
for (const ip of candidateIps) {
|
||||||
|
try {
|
||||||
|
const client = (sporeClient && ip === primaryNodeIp)
|
||||||
|
? sporeClient
|
||||||
|
: initializeSporeClient(ip);
|
||||||
|
if (!client) {
|
||||||
|
throw new Error(`Failed to initialize client for ${ip}`);
|
||||||
|
}
|
||||||
|
const result = await operation(client, ip);
|
||||||
|
if (ip !== primaryNodeIp) {
|
||||||
|
primaryNodeIp = ip;
|
||||||
|
sporeClient = client;
|
||||||
|
console.log(`Failover: switched primary node to ${ip}`);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
} catch (err) {
|
||||||
|
console.warn(`Primary attempt on ${ip} failed: ${err.message}`);
|
||||||
|
lastError = err;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw lastError || new Error('All discovered nodes failed');
|
||||||
|
}
|
||||||
|
|
||||||
// Set up periodic tasks
|
// Set up periodic tasks
|
||||||
setInterval(() => {
|
setInterval(() => {
|
||||||
cleanupStaleNodes();
|
cleanupStaleNodes();
|
||||||
@@ -335,19 +378,19 @@ app.post('/api/discovery/primary/:ip', (req, res) => {
|
|||||||
// API endpoint to get cluster members
|
// API endpoint to get cluster members
|
||||||
app.get('/api/cluster/members', async (req, res) => {
|
app.get('/api/cluster/members', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
if (!sporeClient) {
|
if (discoveredNodes.size === 0) {
|
||||||
return res.status(503).json({
|
return res.status(503).json({
|
||||||
error: 'Service unavailable',
|
error: 'Service unavailable',
|
||||||
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
|
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
|
||||||
discoveredNodes: Array.from(discoveredNodes.keys())
|
discoveredNodes: Array.from(discoveredNodes.keys())
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const members = await sporeClient.getClusterStatus();
|
const members = await performWithFailover((client) => client.getClusterStatus());
|
||||||
res.json(members);
|
res.json(members);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching cluster members:', error);
|
console.error('Error fetching cluster members:', error);
|
||||||
res.status(500).json({
|
res.status(502).json({
|
||||||
error: 'Failed to fetch cluster members',
|
error: 'Failed to fetch cluster members',
|
||||||
message: error.message
|
message: error.message
|
||||||
});
|
});
|
||||||
@@ -373,19 +416,19 @@ app.get('/api/tasks/status', async (req, res) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!sporeClient) {
|
if (discoveredNodes.size === 0) {
|
||||||
return res.status(503).json({
|
return res.status(503).json({
|
||||||
error: 'Service unavailable',
|
error: 'Service unavailable',
|
||||||
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
|
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
|
||||||
discoveredNodes: Array.from(discoveredNodes.keys())
|
discoveredNodes: Array.from(discoveredNodes.keys())
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const taskStatus = await sporeClient.getTaskStatus();
|
const taskStatus = await performWithFailover((client) => client.getTaskStatus());
|
||||||
res.json(taskStatus);
|
res.json(taskStatus);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching task status:', error);
|
console.error('Error fetching task status:', error);
|
||||||
res.status(500).json({
|
res.status(502).json({
|
||||||
error: 'Failed to fetch task status',
|
error: 'Failed to fetch task status',
|
||||||
message: error.message
|
message: error.message
|
||||||
});
|
});
|
||||||
@@ -395,7 +438,7 @@ app.get('/api/tasks/status', async (req, res) => {
|
|||||||
// API endpoint to get system status
|
// API endpoint to get system status
|
||||||
app.get('/api/node/status', async (req, res) => {
|
app.get('/api/node/status', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
if (!sporeClient) {
|
if (discoveredNodes.size === 0) {
|
||||||
return res.status(503).json({
|
return res.status(503).json({
|
||||||
error: 'Service unavailable',
|
error: 'Service unavailable',
|
||||||
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
|
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
|
||||||
@@ -403,11 +446,11 @@ app.get('/api/node/status', async (req, res) => {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const systemStatus = await sporeClient.getSystemStatus();
|
const systemStatus = await performWithFailover((client) => client.getSystemStatus());
|
||||||
res.json(systemStatus);
|
res.json(systemStatus);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching system status:', error);
|
console.error('Error fetching system status:', error);
|
||||||
res.status(500).json({
|
res.status(502).json({
|
||||||
error: 'Failed to fetch system status',
|
error: 'Failed to fetch system status',
|
||||||
message: error.message
|
message: error.message
|
||||||
});
|
});
|
||||||
@@ -433,7 +476,7 @@ app.get('/api/capabilities', async (req, res) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!sporeClient) {
|
if (discoveredNodes.size === 0) {
|
||||||
return res.status(503).json({
|
return res.status(503).json({
|
||||||
error: 'Service unavailable',
|
error: 'Service unavailable',
|
||||||
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
|
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
|
||||||
@@ -441,11 +484,11 @@ app.get('/api/capabilities', async (req, res) => {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const caps = await sporeClient.getCapabilities();
|
const caps = await performWithFailover((client) => client.getCapabilities());
|
||||||
return res.json(caps);
|
return res.json(caps);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching capabilities:', error);
|
console.error('Error fetching capabilities:', error);
|
||||||
return res.status(500).json({
|
return res.status(502).json({
|
||||||
error: 'Failed to fetch capabilities',
|
error: 'Failed to fetch capabilities',
|
||||||
message: error.message
|
message: error.message
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ class PrimaryNodeComponent extends Component {
|
|||||||
this.subscribeToProperty('primaryNode', this.render.bind(this));
|
this.subscribeToProperty('primaryNode', this.render.bind(this));
|
||||||
this.subscribeToProperty('clientInitialized', this.render.bind(this));
|
this.subscribeToProperty('clientInitialized', this.render.bind(this));
|
||||||
this.subscribeToProperty('totalNodes', this.render.bind(this));
|
this.subscribeToProperty('totalNodes', this.render.bind(this));
|
||||||
|
this.subscribeToProperty('onlineNodes', this.render.bind(this));
|
||||||
this.subscribeToProperty('error', this.render.bind(this));
|
this.subscribeToProperty('error', this.render.bind(this));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -25,6 +26,7 @@ class PrimaryNodeComponent extends Component {
|
|||||||
const primaryNode = this.viewModel.get('primaryNode');
|
const primaryNode = this.viewModel.get('primaryNode');
|
||||||
const clientInitialized = this.viewModel.get('clientInitialized');
|
const clientInitialized = this.viewModel.get('clientInitialized');
|
||||||
const totalNodes = this.viewModel.get('totalNodes');
|
const totalNodes = this.viewModel.get('totalNodes');
|
||||||
|
const onlineNodes = this.viewModel.get('onlineNodes');
|
||||||
const error = this.viewModel.get('error');
|
const error = this.viewModel.get('error');
|
||||||
|
|
||||||
if (error) {
|
if (error) {
|
||||||
@@ -44,7 +46,9 @@ class PrimaryNodeComponent extends Component {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const status = clientInitialized ? '✅' : '⚠️';
|
const status = clientInitialized ? '✅' : '⚠️';
|
||||||
const nodeCount = totalNodes > 1 ? ` (${totalNodes} nodes)` : '';
|
const nodeCount = (onlineNodes && onlineNodes > 0)
|
||||||
|
? ` (${onlineNodes}/${totalNodes} online)`
|
||||||
|
: (totalNodes > 1 ? ` (${totalNodes} nodes)` : '');
|
||||||
|
|
||||||
this.setText('#primary-node-ip', `${status} ${primaryNode}${nodeCount}`);
|
this.setText('#primary-node-ip', `${status} ${primaryNode}${nodeCount}`);
|
||||||
this.setClass('#primary-node-ip', 'error', false);
|
this.setClass('#primary-node-ip', 'error', false);
|
||||||
|
|||||||
@@ -13,7 +13,8 @@ class ClusterViewModel extends ViewModel {
|
|||||||
error: null,
|
error: null,
|
||||||
expandedCards: new Map(),
|
expandedCards: new Map(),
|
||||||
activeTabs: new Map(), // Store active tab for each node
|
activeTabs: new Map(), // Store active tab for each node
|
||||||
lastUpdateTime: null
|
lastUpdateTime: null,
|
||||||
|
onlineNodes: 0
|
||||||
});
|
});
|
||||||
|
|
||||||
// Initialize cluster status after a short delay to allow components to subscribe
|
// Initialize cluster status after a short delay to allow components to subscribe
|
||||||
@@ -39,10 +40,16 @@ class ClusterViewModel extends ViewModel {
|
|||||||
const response = await window.apiClient.getClusterMembers();
|
const response = await window.apiClient.getClusterMembers();
|
||||||
console.log('ClusterViewModel: Got response:', response);
|
console.log('ClusterViewModel: Got response:', response);
|
||||||
|
|
||||||
|
const members = response.members || [];
|
||||||
|
const onlineNodes = Array.isArray(members)
|
||||||
|
? members.filter(m => m && m.status === 'active').length
|
||||||
|
: 0;
|
||||||
|
|
||||||
// Use batch update to preserve UI state
|
// Use batch update to preserve UI state
|
||||||
this.batchUpdate({
|
this.batchUpdate({
|
||||||
members: response.members || [],
|
members: members,
|
||||||
lastUpdateTime: new Date().toISOString()
|
lastUpdateTime: new Date().toISOString(),
|
||||||
|
onlineNodes: onlineNodes
|
||||||
}, { preserveUIState: true });
|
}, { preserveUIState: true });
|
||||||
|
|
||||||
// Restore expanded cards and active tabs
|
// Restore expanded cards and active tabs
|
||||||
|
|||||||
Reference in New Issue
Block a user