fix: websocket race-condition and firmware upload
This commit is contained in:
@@ -491,7 +491,9 @@ func (hs *HTTPServer) updateNodeFirmware(w http.ResponseWriter, r *http.Request)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if nodeIP == "" {
|
if nodeIP == "" {
|
||||||
http.Error(w, `{"error": "Node IP address is required", "message": "Please provide the target node IP address"}`, http.StatusBadRequest)
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusBadRequest)
|
||||||
|
w.Write([]byte(`{"error": "Node IP address is required", "message": "Please provide the target node IP address"}`))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -499,14 +501,18 @@ func (hs *HTTPServer) updateNodeFirmware(w http.ResponseWriter, r *http.Request)
|
|||||||
err := r.ParseMultipartForm(50 << 20) // 50MB limit
|
err := r.ParseMultipartForm(50 << 20) // 50MB limit
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.WithError(err).Error("Error parsing multipart form")
|
log.WithError(err).Error("Error parsing multipart form")
|
||||||
http.Error(w, `{"error": "Failed to parse form", "message": "Error parsing multipart form data"}`, http.StatusBadRequest)
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusBadRequest)
|
||||||
|
w.Write([]byte(`{"error": "Failed to parse form", "message": "Error parsing multipart form data"}`))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
file, fileHeader, err := r.FormFile("file")
|
file, fileHeader, err := r.FormFile("file")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.WithError(err).Error("No file found in form")
|
log.WithError(err).Error("No file found in form")
|
||||||
http.Error(w, `{"error": "No file data received", "message": "Please select a firmware file to upload"}`, http.StatusBadRequest)
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusBadRequest)
|
||||||
|
w.Write([]byte(`{"error": "No file data received", "message": "Please select a firmware file to upload"}`))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
@@ -517,61 +523,93 @@ func (hs *HTTPServer) updateNodeFirmware(w http.ResponseWriter, r *http.Request)
|
|||||||
filename = "firmware.bin"
|
filename = "firmware.bin"
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read file data
|
// Read file data efficiently
|
||||||
fileData := make([]byte, 0)
|
fileData, err := io.ReadAll(file)
|
||||||
buffer := make([]byte, 1024)
|
|
||||||
for {
|
|
||||||
n, err := file.Read(buffer)
|
|
||||||
if n > 0 {
|
|
||||||
fileData = append(fileData, buffer[:n]...)
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err.Error() == "EOF" {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
log.WithError(err).Error("Error reading file data")
|
log.WithError(err).Error("Error reading file data")
|
||||||
http.Error(w, `{"error": "Failed to read file", "message": "Error reading uploaded file data"}`, http.StatusInternalServerError)
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusInternalServerError)
|
||||||
|
w.Write([]byte(`{"error": "Failed to read file", "message": "Error reading uploaded file data"}`))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
log.WithFields(log.Fields{
|
log.WithFields(log.Fields{
|
||||||
"node_ip": nodeIP,
|
"node_ip": nodeIP,
|
||||||
"file_size": len(fileData),
|
"file_size": len(fileData),
|
||||||
}).Info("Firmware upload received")
|
}).Info("Firmware upload received")
|
||||||
|
|
||||||
client := hs.getSporeClient(nodeIP)
|
// Send immediate acknowledgment to client
|
||||||
result, err := client.UpdateFirmware(fileData, filename)
|
|
||||||
if err != nil {
|
|
||||||
log.WithError(err).Error("Error uploading firmware")
|
|
||||||
http.Error(w, fmt.Sprintf(`{"error": "Failed to upload firmware", "message": "%s"}`, err.Error()), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if the device reported a failure
|
|
||||||
if result.Status == "FAIL" {
|
|
||||||
log.WithField("message", result.Message).Error("Device reported firmware update failure")
|
|
||||||
http.Error(w, fmt.Sprintf(`{"success": false, "error": "Firmware update failed", "message": "%s"}`, result.Message), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
response := struct {
|
response := struct {
|
||||||
Success bool `json:"success"`
|
Success bool `json:"success"`
|
||||||
Message string `json:"message"`
|
Message string `json:"message"`
|
||||||
NodeIP string `json:"nodeIp"`
|
NodeIP string `json:"nodeIp"`
|
||||||
FileSize int `json:"fileSize"`
|
FileSize int `json:"fileSize"`
|
||||||
Filename string `json:"filename"`
|
Filename string `json:"filename"`
|
||||||
Result interface{} `json:"result"`
|
Status string `json:"status"`
|
||||||
}{
|
}{
|
||||||
Success: true,
|
Success: true,
|
||||||
Message: "Firmware uploaded successfully",
|
Message: "Firmware upload received, processing...",
|
||||||
NodeIP: nodeIP,
|
NodeIP: nodeIP,
|
||||||
FileSize: len(fileData),
|
FileSize: len(fileData),
|
||||||
Filename: filename,
|
Filename: filename,
|
||||||
Result: result,
|
Status: "processing",
|
||||||
}
|
}
|
||||||
|
|
||||||
json.NewEncoder(w).Encode(response)
|
log.WithFields(log.Fields{
|
||||||
|
"node_ip": nodeIP,
|
||||||
|
"file_size": len(fileData),
|
||||||
|
"filename": filename,
|
||||||
|
}).Info("Sending immediate acknowledgment to client")
|
||||||
|
|
||||||
|
// Set response headers to ensure immediate delivery
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.Header().Set("Cache-Control", "no-cache")
|
||||||
|
|
||||||
|
if err := json.NewEncoder(w).Encode(response); err != nil {
|
||||||
|
log.WithError(err).Error("Failed to encode firmware upload acknowledgment")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"node_ip": nodeIP,
|
||||||
|
"status": "acknowledgment_sent",
|
||||||
|
"response": response,
|
||||||
|
}).Debug("Firmware upload acknowledgment sent to client")
|
||||||
|
|
||||||
|
// Flush the response to ensure it's sent immediately
|
||||||
|
if f, ok := w.(http.Flusher); ok {
|
||||||
|
f.Flush()
|
||||||
|
log.WithField("node_ip", nodeIP).Debug("Acknowledgment flushed to client")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now process the firmware upload in the background
|
||||||
|
go func() {
|
||||||
|
client := hs.getSporeClient(nodeIP)
|
||||||
|
result, err := client.UpdateFirmware(fileData, filename)
|
||||||
|
if err != nil {
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"node_ip": nodeIP,
|
||||||
|
"error": err.Error(),
|
||||||
|
}).Error("Error uploading firmware to device")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the device reported a failure
|
||||||
|
if result.Status == "FAIL" {
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"node_ip": nodeIP,
|
||||||
|
"message": result.Message,
|
||||||
|
}).Error("Device reported firmware update failure")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"node_ip": nodeIP,
|
||||||
|
"file_size": len(fileData),
|
||||||
|
"filename": filename,
|
||||||
|
"result": result.Status,
|
||||||
|
}).Info("Firmware upload completed successfully")
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
// POST /api/proxy-call
|
// POST /api/proxy-call
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ type WebSocketServer struct {
|
|||||||
sporeClients map[string]*client.SporeClient
|
sporeClients map[string]*client.SporeClient
|
||||||
clients map[*websocket.Conn]bool
|
clients map[*websocket.Conn]bool
|
||||||
mutex sync.RWMutex
|
mutex sync.RWMutex
|
||||||
|
writeMutex sync.Mutex // Mutex to serialize writes to WebSocket connections
|
||||||
logger *log.Logger
|
logger *log.Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -214,8 +215,11 @@ func (wss *WebSocketServer) broadcastClusterUpdate() {
|
|||||||
"prep_time": broadcastTime.Sub(startTime),
|
"prep_time": broadcastTime.Sub(startTime),
|
||||||
}).Debug("Broadcasting cluster update to WebSocket clients")
|
}).Debug("Broadcasting cluster update to WebSocket clients")
|
||||||
|
|
||||||
// Send to all clients
|
// Send to all clients with write synchronization
|
||||||
var failedClients int
|
var failedClients int
|
||||||
|
wss.writeMutex.Lock()
|
||||||
|
defer wss.writeMutex.Unlock()
|
||||||
|
|
||||||
for _, client := range clients {
|
for _, client := range clients {
|
||||||
client.SetWriteDeadline(time.Now().Add(5 * time.Second))
|
client.SetWriteDeadline(time.Now().Add(5 * time.Second))
|
||||||
if err := client.WriteMessage(websocket.TextMessage, data); err != nil {
|
if err := client.WriteMessage(websocket.TextMessage, data); err != nil {
|
||||||
@@ -224,7 +228,7 @@ func (wss *WebSocketServer) broadcastClusterUpdate() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
totalTime := time.Now().Sub(startTime)
|
totalTime := time.Since(startTime)
|
||||||
wss.logger.WithFields(log.Fields{
|
wss.logger.WithFields(log.Fields{
|
||||||
"clients": len(clients),
|
"clients": len(clients),
|
||||||
"failed_clients": failedClients,
|
"failed_clients": failedClients,
|
||||||
@@ -263,6 +267,10 @@ func (wss *WebSocketServer) broadcastNodeDiscovery(nodeIP, action string) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Send to all clients with write synchronization
|
||||||
|
wss.writeMutex.Lock()
|
||||||
|
defer wss.writeMutex.Unlock()
|
||||||
|
|
||||||
for _, client := range clients {
|
for _, client := range clients {
|
||||||
client.SetWriteDeadline(time.Now().Add(5 * time.Second))
|
client.SetWriteDeadline(time.Now().Add(5 * time.Second))
|
||||||
if err := client.WriteMessage(websocket.TextMessage, data); err != nil {
|
if err := client.WriteMessage(websocket.TextMessage, data); err != nil {
|
||||||
@@ -302,7 +310,7 @@ func (wss *WebSocketServer) updateLocalNodesWithAPI(apiMembers []client.ClusterM
|
|||||||
wss.logger.WithField("members", len(apiMembers)).Debug("Updating local nodes with API data")
|
wss.logger.WithField("members", len(apiMembers)).Debug("Updating local nodes with API data")
|
||||||
|
|
||||||
for _, member := range apiMembers {
|
for _, member := range apiMembers {
|
||||||
if member.Labels != nil && len(member.Labels) > 0 {
|
if len(member.Labels) > 0 {
|
||||||
wss.logger.WithFields(log.Fields{
|
wss.logger.WithFields(log.Fields{
|
||||||
"ip": member.IP,
|
"ip": member.IP,
|
||||||
"labels": member.Labels,
|
"labels": member.Labels,
|
||||||
|
|||||||
@@ -225,20 +225,49 @@ func (c *SporeClient) UpdateFirmware(firmwareData []byte, filename string) (*Fir
|
|||||||
Timeout: 5 * time.Minute, // 5 minutes for firmware uploads
|
Timeout: 5 * time.Minute, // 5 minutes for firmware uploads
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"node_ip": c.BaseURL,
|
||||||
|
"status": "sending_firmware",
|
||||||
|
}).Debug("Sending firmware to SPORE device")
|
||||||
|
|
||||||
resp, err := firmwareClient.Do(req)
|
resp, err := firmwareClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"node_ip": c.BaseURL,
|
||||||
|
"error": err.Error(),
|
||||||
|
}).Error("Failed to send firmware request to SPORE device")
|
||||||
return nil, fmt.Errorf("failed to upload firmware: %w", err)
|
return nil, fmt.Errorf("failed to upload firmware: %w", err)
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"node_ip": c.BaseURL,
|
||||||
|
"status_code": resp.StatusCode,
|
||||||
|
"headers": resp.Header,
|
||||||
|
}).Debug("Received response from SPORE device")
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
// Only try to read body for error cases
|
||||||
body, _ := io.ReadAll(resp.Body)
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"node_ip": c.BaseURL,
|
||||||
|
"status": resp.StatusCode,
|
||||||
|
"error_body": string(body),
|
||||||
|
}).Error("SPORE device reported firmware upload failure")
|
||||||
return nil, fmt.Errorf("firmware update failed with status %d: %s", resp.StatusCode, string(body))
|
return nil, fmt.Errorf("firmware update failed with status %d: %s", resp.StatusCode, string(body))
|
||||||
}
|
}
|
||||||
|
|
||||||
var updateResponse FirmwareUpdateResponse
|
// For successful firmware uploads, don't try to read the response body
|
||||||
if err := json.NewDecoder(resp.Body).Decode(&updateResponse); err != nil {
|
// The SPORE device restarts immediately after sending the response, so reading the body
|
||||||
return nil, fmt.Errorf("failed to decode firmware update response: %w", err)
|
// would cause the connection to hang or timeout
|
||||||
|
log.WithFields(log.Fields{
|
||||||
|
"node_ip": c.BaseURL,
|
||||||
|
"status": "success_no_body",
|
||||||
|
}).Info("Firmware upload completed successfully (device restarting)")
|
||||||
|
|
||||||
|
updateResponse := FirmwareUpdateResponse{
|
||||||
|
Status: "OK",
|
||||||
|
Message: "Firmware update completed successfully",
|
||||||
}
|
}
|
||||||
|
|
||||||
return &updateResponse, nil
|
return &updateResponse, nil
|
||||||
|
|||||||
Reference in New Issue
Block a user