feat: more logging
This commit is contained in:
@@ -366,16 +366,19 @@ func (hs *HTTPServer) setPrimaryNode(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
// GET /api/cluster/members
|
||||
func (hs *HTTPServer) getClusterMembers(w http.ResponseWriter, r *http.Request) {
|
||||
log.Debug("Fetching cluster members via API")
|
||||
|
||||
result, err := hs.performWithFailover(func(client *client.SporeClient) (interface{}, error) {
|
||||
return client.GetClusterStatus()
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Error fetching cluster members")
|
||||
log.WithError(err).Debug("Failed to fetch cluster members")
|
||||
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch cluster members", "message": "%s"}`, err.Error()), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug("Successfully fetched cluster members via API")
|
||||
json.NewEncoder(w).Encode(result)
|
||||
}
|
||||
|
||||
@@ -417,42 +420,52 @@ func (hs *HTTPServer) getTaskStatus(w http.ResponseWriter, r *http.Request) {
|
||||
ip := r.URL.Query().Get("ip")
|
||||
|
||||
if ip != "" {
|
||||
log.WithField("node_ip", ip).Debug("Fetching task status from specific node")
|
||||
client := hs.getSporeClient(ip)
|
||||
result, err := client.GetTaskStatus()
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Error fetching task status from specific node")
|
||||
log.WithFields(log.Fields{
|
||||
"node_ip": ip,
|
||||
"error": err.Error(),
|
||||
}).Debug("Failed to fetch task status from specific node")
|
||||
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch task status from node", "message": "%s"}`, err.Error()), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
log.WithField("node_ip", ip).Debug("Successfully fetched task status from specific node")
|
||||
json.NewEncoder(w).Encode(result)
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug("Fetching task status via failover")
|
||||
result, err := hs.performWithFailover(func(client *client.SporeClient) (interface{}, error) {
|
||||
return client.GetTaskStatus()
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Error fetching task status")
|
||||
log.WithError(err).Debug("Failed to fetch task status via failover")
|
||||
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch task status", "message": "%s"}`, err.Error()), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug("Successfully fetched task status via failover")
|
||||
json.NewEncoder(w).Encode(result)
|
||||
}
|
||||
|
||||
// GET /api/node/status
|
||||
func (hs *HTTPServer) getNodeStatus(w http.ResponseWriter, r *http.Request) {
|
||||
log.Debug("Fetching node system status via failover")
|
||||
|
||||
result, err := hs.performWithFailover(func(client *client.SporeClient) (interface{}, error) {
|
||||
return client.GetSystemStatus()
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Error fetching system status")
|
||||
log.WithError(err).Debug("Failed to fetch system status via failover")
|
||||
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch system status", "message": "%s"}`, err.Error()), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug("Successfully fetched system status via failover")
|
||||
json.NewEncoder(w).Encode(result)
|
||||
}
|
||||
|
||||
@@ -461,14 +474,20 @@ func (hs *HTTPServer) getNodeStatusByIP(w http.ResponseWriter, r *http.Request)
|
||||
vars := mux.Vars(r)
|
||||
nodeIP := vars["ip"]
|
||||
|
||||
log.WithField("node_ip", nodeIP).Debug("Fetching system status from specific node")
|
||||
|
||||
client := hs.getSporeClient(nodeIP)
|
||||
result, err := client.GetSystemStatus()
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Error fetching status from specific node")
|
||||
log.WithFields(log.Fields{
|
||||
"node_ip": nodeIP,
|
||||
"error": err.Error(),
|
||||
}).Debug("Failed to fetch status from specific node")
|
||||
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch status from node %s", "message": "%s"}`, nodeIP, err.Error()), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
log.WithField("node_ip", nodeIP).Debug("Successfully fetched status from specific node")
|
||||
json.NewEncoder(w).Encode(result)
|
||||
}
|
||||
|
||||
@@ -477,27 +496,34 @@ func (hs *HTTPServer) getNodeEndpoints(w http.ResponseWriter, r *http.Request) {
|
||||
ip := r.URL.Query().Get("ip")
|
||||
|
||||
if ip != "" {
|
||||
log.WithField("node_ip", ip).Debug("Fetching endpoints from specific node")
|
||||
client := hs.getSporeClient(ip)
|
||||
result, err := client.GetCapabilities()
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Error fetching endpoints from specific node")
|
||||
log.WithFields(log.Fields{
|
||||
"node_ip": ip,
|
||||
"error": err.Error(),
|
||||
}).Debug("Failed to fetch endpoints from specific node")
|
||||
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch endpoints from node", "message": "%s"}`, err.Error()), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
log.WithField("node_ip", ip).Debug("Successfully fetched endpoints from specific node")
|
||||
json.NewEncoder(w).Encode(result)
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug("Fetching capabilities via failover")
|
||||
result, err := hs.performWithFailover(func(client *client.SporeClient) (interface{}, error) {
|
||||
return client.GetCapabilities()
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Error fetching capabilities")
|
||||
log.WithError(err).Debug("Failed to fetch capabilities via failover")
|
||||
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch capabilities", "message": "%s"}`, err.Error()), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug("Successfully fetched capabilities via failover")
|
||||
json.NewEncoder(w).Encode(result)
|
||||
}
|
||||
|
||||
@@ -849,18 +875,21 @@ type ClusterNodeVersionsResponse struct {
|
||||
|
||||
// GET /api/cluster/node/versions
|
||||
func (hs *HTTPServer) getClusterNodeVersions(w http.ResponseWriter, r *http.Request) {
|
||||
log.Debug("Fetching cluster node versions")
|
||||
|
||||
result, err := hs.performWithFailover(func(client *client.SporeClient) (interface{}, error) {
|
||||
return client.GetClusterStatus()
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Error fetching cluster members for versions")
|
||||
log.WithError(err).Debug("Failed to fetch cluster members for versions")
|
||||
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch cluster members", "message": "%s"}`, err.Error()), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
|
||||
clusterStatus, ok := result.(*client.ClusterStatusResponse)
|
||||
if !ok {
|
||||
log.Debug("Invalid cluster status response type")
|
||||
http.Error(w, `{"error": "Invalid cluster status response"}`, http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
@@ -880,6 +909,8 @@ func (hs *HTTPServer) getClusterNodeVersions(w http.ResponseWriter, r *http.Requ
|
||||
})
|
||||
}
|
||||
|
||||
log.WithField("node_count", len(nodeVersions)).Debug("Successfully fetched cluster node versions")
|
||||
|
||||
response := ClusterNodeVersionsResponse{
|
||||
Nodes: nodeVersions,
|
||||
}
|
||||
@@ -956,12 +987,25 @@ func (hs *HTTPServer) nodeMatchesLabels(nodeLabels, rolloutLabels map[string]str
|
||||
|
||||
// processRollout handles the actual rollout process in the background
|
||||
func (hs *HTTPServer) processRollout(rolloutID string, nodes []NodeInfo, firmwareInfo FirmwareInfo) {
|
||||
log.WithField("rollout_id", rolloutID).Info("Starting background rollout process")
|
||||
log.WithFields(log.Fields{
|
||||
"rollout_id": rolloutID,
|
||||
"firmware": fmt.Sprintf("%s/%s", firmwareInfo.Name, firmwareInfo.Version),
|
||||
"node_count": len(nodes),
|
||||
}).Debug("Starting background rollout process")
|
||||
|
||||
// Download firmware from registry
|
||||
log.WithFields(log.Fields{
|
||||
"rollout_id": rolloutID,
|
||||
"firmware": fmt.Sprintf("%s/%s", firmwareInfo.Name, firmwareInfo.Version),
|
||||
}).Debug("Downloading firmware from registry for rollout")
|
||||
|
||||
firmwareData, err := hs.registryClient.DownloadFirmware(firmwareInfo.Name, firmwareInfo.Version)
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Failed to download firmware for rollout")
|
||||
log.WithFields(log.Fields{
|
||||
"rollout_id": rolloutID,
|
||||
"firmware": fmt.Sprintf("%s/%s", firmwareInfo.Name, firmwareInfo.Version),
|
||||
"error": err.Error(),
|
||||
}).Error("Failed to download firmware for rollout")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -970,7 +1014,7 @@ func (hs *HTTPServer) processRollout(rolloutID string, nodes []NodeInfo, firmwar
|
||||
"firmware": fmt.Sprintf("%s/%s", firmwareInfo.Name, firmwareInfo.Version),
|
||||
"size": len(firmwareData),
|
||||
"total_nodes": len(nodes),
|
||||
}).Info("Downloaded firmware for rollout")
|
||||
}).Debug("Successfully downloaded firmware for rollout")
|
||||
|
||||
// Process nodes in parallel using goroutines
|
||||
var wg sync.WaitGroup
|
||||
@@ -984,9 +1028,14 @@ func (hs *HTTPServer) processRollout(rolloutID string, nodes []NodeInfo, firmwar
|
||||
"rollout_id": rolloutID,
|
||||
"node_ip": node.IP,
|
||||
"progress": fmt.Sprintf("%d/%d", nodeIndex+1, len(nodes)),
|
||||
}).Info("Processing node in rollout")
|
||||
}).Debug("Processing node in rollout")
|
||||
|
||||
// Update version label on the node before upload
|
||||
log.WithFields(log.Fields{
|
||||
"rollout_id": rolloutID,
|
||||
"node_ip": node.IP,
|
||||
}).Debug("Getting SPORE client for node")
|
||||
|
||||
client := hs.getSporeClient(node.IP)
|
||||
|
||||
// Create updated labels with the new version
|
||||
|
||||
@@ -429,20 +429,36 @@ func (wss *WebSocketServer) calculateProgress(current, total int, status string)
|
||||
func (wss *WebSocketServer) getCurrentClusterMembers() ([]client.ClusterMember, error) {
|
||||
nodes := wss.nodeDiscovery.GetNodes()
|
||||
if len(nodes) == 0 {
|
||||
wss.logger.Debug("No nodes available for cluster member retrieval")
|
||||
return []client.ClusterMember{}, nil
|
||||
}
|
||||
|
||||
// Try to get real cluster data from primary node
|
||||
primaryNode := wss.nodeDiscovery.GetPrimaryNode()
|
||||
if primaryNode != "" {
|
||||
wss.logger.WithFields(log.Fields{
|
||||
"primary_node": primaryNode,
|
||||
"total_nodes": len(nodes),
|
||||
}).Debug("Fetching cluster members from primary node")
|
||||
|
||||
client := wss.getSporeClient(primaryNode)
|
||||
clusterStatus, err := client.GetClusterStatus()
|
||||
if err == nil {
|
||||
wss.logger.WithFields(log.Fields{
|
||||
"primary_node": primaryNode,
|
||||
"member_count": len(clusterStatus.Members),
|
||||
}).Debug("Successfully fetched cluster members from primary node")
|
||||
|
||||
// Update local node data with API information
|
||||
wss.updateLocalNodesWithAPI(clusterStatus.Members)
|
||||
return clusterStatus.Members, nil
|
||||
}
|
||||
wss.logger.WithError(err).Error("Failed to get cluster status from primary node")
|
||||
wss.logger.WithFields(log.Fields{
|
||||
"primary_node": primaryNode,
|
||||
"error": err.Error(),
|
||||
}).Debug("Failed to get cluster status from primary node, using fallback")
|
||||
} else {
|
||||
wss.logger.Debug("No primary node available, using fallback cluster members")
|
||||
}
|
||||
|
||||
// Fallback to local data if API fails
|
||||
|
||||
Reference in New Issue
Block a user