fix: primary node failover
This commit is contained in:
67
index.js
67
index.js
@@ -195,6 +195,49 @@ function updateSporeClient() {
|
||||
}
|
||||
}
|
||||
|
||||
// Helper: perform an operation against the current primary, failing over to other discovered nodes if needed
|
||||
async function performWithFailover(operation) {
|
||||
// Build candidate list: current primary first, then others by most recently seen
|
||||
const candidateIps = [];
|
||||
if (primaryNodeIp && discoveredNodes.has(primaryNodeIp)) {
|
||||
candidateIps.push(primaryNodeIp);
|
||||
}
|
||||
const others = Array.from(discoveredNodes.values())
|
||||
.filter(n => n.ip !== primaryNodeIp)
|
||||
.sort((a, b) => b.lastSeen - a.lastSeen)
|
||||
.map(n => n.ip);
|
||||
candidateIps.push(...others);
|
||||
|
||||
if (candidateIps.length === 0) {
|
||||
throw new Error('No SPORE nodes discovered');
|
||||
}
|
||||
|
||||
let lastError = null;
|
||||
for (const ip of candidateIps) {
|
||||
try {
|
||||
const client = (sporeClient && ip === primaryNodeIp)
|
||||
? sporeClient
|
||||
: initializeSporeClient(ip);
|
||||
if (!client) {
|
||||
throw new Error(`Failed to initialize client for ${ip}`);
|
||||
}
|
||||
const result = await operation(client, ip);
|
||||
if (ip !== primaryNodeIp) {
|
||||
primaryNodeIp = ip;
|
||||
sporeClient = client;
|
||||
console.log(`Failover: switched primary node to ${ip}`);
|
||||
}
|
||||
return result;
|
||||
} catch (err) {
|
||||
console.warn(`Primary attempt on ${ip} failed: ${err.message}`);
|
||||
lastError = err;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError || new Error('All discovered nodes failed');
|
||||
}
|
||||
|
||||
// Set up periodic tasks
|
||||
setInterval(() => {
|
||||
cleanupStaleNodes();
|
||||
@@ -335,7 +378,7 @@ app.post('/api/discovery/primary/:ip', (req, res) => {
|
||||
// API endpoint to get cluster members
|
||||
app.get('/api/cluster/members', async (req, res) => {
|
||||
try {
|
||||
if (!sporeClient) {
|
||||
if (discoveredNodes.size === 0) {
|
||||
return res.status(503).json({
|
||||
error: 'Service unavailable',
|
||||
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
|
||||
@@ -343,11 +386,11 @@ app.get('/api/cluster/members', async (req, res) => {
|
||||
});
|
||||
}
|
||||
|
||||
const members = await sporeClient.getClusterStatus();
|
||||
const members = await performWithFailover((client) => client.getClusterStatus());
|
||||
res.json(members);
|
||||
} catch (error) {
|
||||
console.error('Error fetching cluster members:', error);
|
||||
res.status(500).json({
|
||||
res.status(502).json({
|
||||
error: 'Failed to fetch cluster members',
|
||||
message: error.message
|
||||
});
|
||||
@@ -373,7 +416,7 @@ app.get('/api/tasks/status', async (req, res) => {
|
||||
}
|
||||
}
|
||||
|
||||
if (!sporeClient) {
|
||||
if (discoveredNodes.size === 0) {
|
||||
return res.status(503).json({
|
||||
error: 'Service unavailable',
|
||||
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
|
||||
@@ -381,11 +424,11 @@ app.get('/api/tasks/status', async (req, res) => {
|
||||
});
|
||||
}
|
||||
|
||||
const taskStatus = await sporeClient.getTaskStatus();
|
||||
const taskStatus = await performWithFailover((client) => client.getTaskStatus());
|
||||
res.json(taskStatus);
|
||||
} catch (error) {
|
||||
console.error('Error fetching task status:', error);
|
||||
res.status(500).json({
|
||||
res.status(502).json({
|
||||
error: 'Failed to fetch task status',
|
||||
message: error.message
|
||||
});
|
||||
@@ -395,7 +438,7 @@ app.get('/api/tasks/status', async (req, res) => {
|
||||
// API endpoint to get system status
|
||||
app.get('/api/node/status', async (req, res) => {
|
||||
try {
|
||||
if (!sporeClient) {
|
||||
if (discoveredNodes.size === 0) {
|
||||
return res.status(503).json({
|
||||
error: 'Service unavailable',
|
||||
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
|
||||
@@ -403,11 +446,11 @@ app.get('/api/node/status', async (req, res) => {
|
||||
});
|
||||
}
|
||||
|
||||
const systemStatus = await sporeClient.getSystemStatus();
|
||||
const systemStatus = await performWithFailover((client) => client.getSystemStatus());
|
||||
res.json(systemStatus);
|
||||
} catch (error) {
|
||||
console.error('Error fetching system status:', error);
|
||||
res.status(500).json({
|
||||
res.status(502).json({
|
||||
error: 'Failed to fetch system status',
|
||||
message: error.message
|
||||
});
|
||||
@@ -433,7 +476,7 @@ app.get('/api/capabilities', async (req, res) => {
|
||||
}
|
||||
}
|
||||
|
||||
if (!sporeClient) {
|
||||
if (discoveredNodes.size === 0) {
|
||||
return res.status(503).json({
|
||||
error: 'Service unavailable',
|
||||
message: 'No SPORE nodes discovered yet. Waiting for CLUSTER_DISCOVERY messages...',
|
||||
@@ -441,11 +484,11 @@ app.get('/api/capabilities', async (req, res) => {
|
||||
});
|
||||
}
|
||||
|
||||
const caps = await sporeClient.getCapabilities();
|
||||
const caps = await performWithFailover((client) => client.getCapabilities());
|
||||
return res.json(caps);
|
||||
} catch (error) {
|
||||
console.error('Error fetching capabilities:', error);
|
||||
return res.status(500).json({
|
||||
return res.status(502).json({
|
||||
error: 'Failed to fetch capabilities',
|
||||
message: error.message
|
||||
});
|
||||
|
||||
@@ -18,6 +18,7 @@ class PrimaryNodeComponent extends Component {
|
||||
this.subscribeToProperty('primaryNode', this.render.bind(this));
|
||||
this.subscribeToProperty('clientInitialized', this.render.bind(this));
|
||||
this.subscribeToProperty('totalNodes', this.render.bind(this));
|
||||
this.subscribeToProperty('onlineNodes', this.render.bind(this));
|
||||
this.subscribeToProperty('error', this.render.bind(this));
|
||||
}
|
||||
|
||||
@@ -25,6 +26,7 @@ class PrimaryNodeComponent extends Component {
|
||||
const primaryNode = this.viewModel.get('primaryNode');
|
||||
const clientInitialized = this.viewModel.get('clientInitialized');
|
||||
const totalNodes = this.viewModel.get('totalNodes');
|
||||
const onlineNodes = this.viewModel.get('onlineNodes');
|
||||
const error = this.viewModel.get('error');
|
||||
|
||||
if (error) {
|
||||
@@ -44,7 +46,9 @@ class PrimaryNodeComponent extends Component {
|
||||
}
|
||||
|
||||
const status = clientInitialized ? '✅' : '⚠️';
|
||||
const nodeCount = totalNodes > 1 ? ` (${totalNodes} nodes)` : '';
|
||||
const nodeCount = (onlineNodes && onlineNodes > 0)
|
||||
? ` (${onlineNodes}/${totalNodes} online)`
|
||||
: (totalNodes > 1 ? ` (${totalNodes} nodes)` : '');
|
||||
|
||||
this.setText('#primary-node-ip', `${status} ${primaryNode}${nodeCount}`);
|
||||
this.setClass('#primary-node-ip', 'error', false);
|
||||
|
||||
@@ -13,7 +13,8 @@ class ClusterViewModel extends ViewModel {
|
||||
error: null,
|
||||
expandedCards: new Map(),
|
||||
activeTabs: new Map(), // Store active tab for each node
|
||||
lastUpdateTime: null
|
||||
lastUpdateTime: null,
|
||||
onlineNodes: 0
|
||||
});
|
||||
|
||||
// Initialize cluster status after a short delay to allow components to subscribe
|
||||
@@ -39,10 +40,16 @@ class ClusterViewModel extends ViewModel {
|
||||
const response = await window.apiClient.getClusterMembers();
|
||||
console.log('ClusterViewModel: Got response:', response);
|
||||
|
||||
const members = response.members || [];
|
||||
const onlineNodes = Array.isArray(members)
|
||||
? members.filter(m => m && m.status === 'active').length
|
||||
: 0;
|
||||
|
||||
// Use batch update to preserve UI state
|
||||
this.batchUpdate({
|
||||
members: response.members || [],
|
||||
lastUpdateTime: new Date().toISOString()
|
||||
members: members,
|
||||
lastUpdateTime: new Date().toISOString(),
|
||||
onlineNodes: onlineNodes
|
||||
}, { preserveUIState: true });
|
||||
|
||||
// Restore expanded cards and active tabs
|
||||
|
||||
Reference in New Issue
Block a user