Compare commits

9 Commits

20 changed files with 1816 additions and 81 deletions

6
.dockerignore Normal file
View File

@@ -0,0 +1,6 @@
.git
.gitignore
.cursor
*.md
spore-gateway

31
Dockerfile Normal file
View File

@@ -0,0 +1,31 @@
# Build stage
FROM golang:1.24-alpine AS builder
WORKDIR /app
# Copy go mod files
COPY go.mod go.sum ./
RUN go mod download
# Copy source code
COPY . .
# Build the application
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o spore-gateway ./main.go
# Runtime stage
FROM alpine:latest
RUN apk --no-cache add ca-certificates
WORKDIR /root/
# Copy the binary from builder
COPY --from=builder /app/spore-gateway .
# Expose ports
EXPOSE 3001 4210
# Run the application
CMD ["./spore-gateway"]

59
Makefile Normal file
View File

@@ -0,0 +1,59 @@
.PHONY: build run clean docker-build docker-run docker-push docker-build-multiarch docker-push-multiarch
# Build the application
build:
go build -o spore-gateway main.go
# Run the application
run:
go run main.go
# Clean build artifacts
clean:
rm -f spore-gateway
# Format code
fmt:
go fmt ./...
# Lint code (requires golangci-lint)
lint:
golangci-lint run
# Install dependencies
deps:
go mod download
go mod tidy
# Docker variables
DOCKER_REGISTRY ?=
IMAGE_NAME = wirelos/spore-gateway
IMAGE_TAG ?= latest
FULL_IMAGE_NAME = $(if $(DOCKER_REGISTRY),$(DOCKER_REGISTRY)/$(IMAGE_NAME),$(IMAGE_NAME)):$(IMAGE_TAG)
# Build Docker image
docker-build:
docker build -t $(FULL_IMAGE_NAME) .
# Run Docker container
docker-run:
docker run --network host --rm $(FULL_IMAGE_NAME)
# Push Docker image
docker-push:
docker push $(FULL_IMAGE_NAME)
# Build multiarch Docker image
docker-build-multiarch:
docker buildx build --platform linux/amd64,linux/arm64 \
-t $(FULL_IMAGE_NAME) \
--push \
.
# Push multiarch Docker image (if not pushed during build)
docker-push-multiarch:
docker buildx build --platform linux/amd64,linux/arm64 \
-t $(FULL_IMAGE_NAME) \
--push \
.

View File

@@ -28,10 +28,42 @@ Options:
HTTP server port (default "3001")
-udp-port string
UDP discovery port (default "4210")
-mqtt string
Enable MQTT integration with server URL (e.g., tcp://localhost:1883)
-log-level string
Log level (debug, info, warn, error) (default "info")
```
### MQTT Integration
The gateway can integrate with an MQTT broker to subscribe to all MQTT topics and forward messages to connected WebSocket clients.
To enable MQTT integration:
```bash
# Basic usage
./spore-gateway -mqtt tcp://localhost:1883
# With authentication (using environment variables)
MQTT_USER=username MQTT_PASSWORD=password ./spore-gateway -mqtt tcp://broker.example.com:1883
```
When enabled, the gateway will:
- Connect to the specified MQTT broker
- Subscribe to all topics (`#`)
- Forward all received messages to connected WebSocket clients with the format:
```json
{
"topic": "sensor/temperature",
"data": "{\"value\": 23.5}",
"timestamp": "2024-01-15T10:30:00Z"
}
```
Environment variables:
- `MQTT_USER`: Username for MQTT broker authentication (optional)
- `MQTT_PASSWORD`: Password for MQTT broker authentication (optional)
## Integration
The spore-gateway works together with the SPORE UI frontend:
@@ -94,11 +126,19 @@ The application follows the same patterns as the original Node.js spore-ui serve
- HTTP middleware for CORS and logging
- WebSocket support for real-time updates
## Documentation
See the `docs/` directory for detailed documentation:
- [MQTT Integration](./docs/MQTT.md) - MQTT message forwarding and integration
- [Rollout Process](./docs/Rollout.md) - Firmware rollout orchestration
- [Testing Tools](./hack/README.md) - Local MQTT broker and testing scripts
## Architecture
- `main.go` - Application entry point
- `internal/discovery/` - UDP-based node discovery
- `internal/server/` - HTTP API server
- `internal/websocket/` - WebSocket server for real-time updates
- `internal/mqtt/` - MQTT client and message forwarding
- `pkg/client/` - SPORE API client
- `pkg/config/` - Configuration management

370
docs/MQTT.md Normal file
View File

@@ -0,0 +1,370 @@
# MQTT Integration
The SPORE Gateway includes optional MQTT integration that allows subscribing to MQTT brokers and forwarding messages to connected WebSocket clients. This enables integration with IoT devices, sensor networks, and other MQTT-based systems.
## Overview
When enabled, the gateway acts as an MQTT subscriber that:
- Connects to an MQTT broker
- Subscribes to all topics (`#`)
- Forwards received messages to WebSocket clients in real-time
This allows the SPORE UI to display MQTT events alongside SPORE cluster events.
## Features
- **Universal Topic Subscription**: Subscribes to `#` (all topics) to capture all messages
- **WebSocket Forwarding**: All MQTT messages are forwarded to connected WebSocket clients
- **Authentication Support**: Optional username/password authentication
- **Automatic Reconnection**: Handles connection failures and automatically reconnects
- **Structured Message Format**: Messages are formatted with topic, data, and timestamp
## Usage
### Basic Usage
Start the gateway with MQTT integration enabled:
```bash
./spore-gateway -mqtt tcp://localhost:1883
```
### With Authentication
If your MQTT broker requires authentication, use environment variables:
```bash
MQTT_USER=username MQTT_PASSWORD=password ./spore-gateway -mqtt tcp://broker.example.com:1883
```
### Complete Example
```bash
# Terminal 1: Start MQTT broker (optional, for testing)
cd hack
./mosquitto.sh
# Terminal 2: Start SPORE gateway with MQTT integration
cd ..
MQTT_USER=admin MQTT_PASSWORD=secret ./spore-gateway -mqtt tcp://localhost:1883
# Terminal 3: Publish test messages
cd hack
./mqtt-test.sh
```
## Message Format
MQTT messages received by the gateway are forwarded to WebSocket clients with the following JSON structure:
```json
{
"topic": "sensor/temperature/living-room",
"data": "{\"temperature\": 23.5, \"unit\": \"celsius\", \"timestamp\": \"2024-01-15T10:30:00Z\"}",
"timestamp": "2024-01-15T10:30:00Z"
}
```
### Fields
- **topic** (string): The MQTT topic the message was published to
- **data** (string): The raw message payload as a string (can be JSON, text, binary data encoded as string, etc.)
- **timestamp** (string): RFC3339 timestamp when the gateway received the message
### Message Payload Handling
The gateway treats all MQTT message payloads as raw data (byte arrays). When forwarding to WebSocket:
- Binary data is converted to string representation
- Text data is forwarded as-is
- JSON data remains as JSON string (not parsed)
This preserves the original message format while allowing the WebSocket client to parse or display it as needed.
## Configuration
### Command Line Flags
| Flag | Description | Example |
|------|-------------|---------|
| `-mqtt` | MQTT broker URL | `tcp://localhost:1883` |
### Environment Variables
| Variable | Description | Required |
|----------|-------------|----------|
| `MQTT_USER` | Username for MQTT authentication | No |
| `MQTT_PASSWORD` | Password for MQTT authentication | No |
### Broker URLs
Supported URL formats:
- `tcp://hostname:port` - Standard MQTT (e.g., `tcp://localhost:1883`)
- `tcp://hostname` - Uses default port 1883
- `tls://hostname:8883` - Secure MQTT with TLS
Note: TLS support may require additional configuration in the MQTT client.
## Architecture
### Components
1. **MQTT Client** (`internal/mqtt/mqtt.go`)
- Manages connection to MQTT broker
- Handles subscriptions and message reception
- Implements reconnection logic
2. **WebSocket Server** (`internal/websocket/websocket.go`)
- Broadcasting MQTT messages to connected clients
- Serialization and message formatting
3. **Main Application** (`main.go`)
- Coordinates MQTT client initialization
- Sets up message callback for WebSocket forwarding
### Data Flow
```
MQTT Broker → MQTT Client → Callback → HTTP Server → WebSocket Server → Client
```
1. MQTT broker publishes message to any topic
2. Gateway's MQTT client receives message
3. Message callback triggers
4. HTTP server broadcasts to WebSocket
5. WebSocket server forwards to all connected clients
## Testing
### Local Testing Setup
The `hack/` directory contains scripts for testing MQTT integration:
```bash
# Start a local MQTT broker
./hack/mosquitto.sh
# Run comprehensive test suite
./hack/mqtt-test.sh
```
### Test Messages
The test suite includes 16 different message types:
- Simple text messages
- JSON sensor data (temperature, humidity)
- Device status updates
- System events and alerts
- Configuration updates
- Metrics and telemetry
- Node discovery events
- Firmware updates
- Task status
- Error logs
- Light control (SPORE-specific)
- Binary data
- Edge cases (empty messages, large payloads)
### Manual Testing
You can also publish messages manually using the Mosquitto client:
```bash
# Install mosquitto clients
# Ubuntu/Debian: apt-get install mosquitto-clients
# Or use Docker: docker run --rm -it --network host eclipse-mosquitto:latest mosquitto_pub
# Publish a test message
docker run --rm --network host eclipse-mosquitto:latest \
mosquitto_pub -h localhost -p 1883 -t "test/topic" -m "Hello World"
# Publish JSON message
docker run --rm --network host eclipse-mosquitto:latest \
mosquitto_pub -h localhost -p 1883 -t "sensor/data" \
-m '{"sensor": "temperature", "value": 25.5, "unit": "celsius"}'
```
## Integration with SPORE UI
When the SPORE UI connects to the gateway's WebSocket endpoint, it will automatically receive MQTT messages. The UI can handle these messages similarly to SPORE cluster events.
### WebSocket Event Types
The WebSocket receives different event types:
- **Cluster Events**: `cluster/update`, `node/discovery`, etc. (from SPORE nodes)
- **MQTT Events**: Any topic from MQTT (identified by the topic field)
Example WebSocket message from MQTT:
```json
{
"topic": "sensor/temperature",
"data": "23.5",
"timestamp": "2024-01-15T10:30:00Z"
}
```
Example WebSocket message from SPORE cluster:
```json
{
"topic": "cluster/update",
"members": [...],
"primaryNode": "192.168.1.100",
"totalNodes": 3,
"timestamp": "2024-01-15T10:30:00Z"
}
```
## Troubleshooting
### Connection Issues
**Problem**: Gateway fails to connect to MQTT broker
**Solutions**:
```bash
# Check if broker is running
docker ps | grep mqtt-broker
# Check broker logs
docker logs mqtt-broker
# Test connection manually
docker run --rm -it --network host eclipse-mosquitto:latest \
mosquitto_pub -h localhost -p 1883 -t "test" -m "test"
```
### Messages Not Forwarding
**Problem**: MQTT messages not appearing in WebSocket
**Solutions**:
1. Verify gateway is running with MQTT enabled
2. Check gateway logs for MQTT connection status
3. Verify WebSocket client is connected
4. Check MQTT broker logs for subscription confirmation
### Authentication Errors
**Problem**: "Connection refused" or authentication errors
**Solutions**:
```bash
# Ensure environment variables are set
export MQTT_USER=username
export MQTT_PASSWORD=password
# Verify broker allows connections
# Check mosquitto.conf for allow_anonymous or authentication settings
```
### High Message Volume
If receiving many MQTT messages:
- Gateway handles messages efficiently using Go concurrency
- WebSocket broadcasts are serialized to prevent race conditions
- Consider QoS levels if message delivery is critical
## Best Practices
### Topic Naming
Use hierarchical topic names for better organization:
```
sensor/temperature/living-room
sensor/humidity/bedroom
device/status/esp32-001
cluster/node/discovered
```
### Message Size
- Keep individual messages reasonably sized (< 10KB recommended)
- For large data, consider splitting into multiple messages
- Use compression if transmitting large JSON payloads
### Security
- Use authentication for production deployments
- Consider TLS for encrypted connections
- Use topic filtering if subscribing to specific topics only (modify subscription)
- Implement rate limiting on message processing if needed
### Error Handling
The gateway includes automatic reconnection logic:
- Initial connection failures are logged
- Reconnection attempts every 10 seconds
- Connection state is tracked and logged
- WebSocket clients are notified via disconnect events
## Limitations
- **QoS Levels**: Currently uses QoS 0 (at most once delivery)
- **Topic Filtering**: Subscribes to all topics (`#`); no selective subscription
- **Message Retention**: Does not store messages; forwards only real-time events
- **Duplicate Handling**: Does not deduplicate messages
- **Ordering**: Maintains message order within individual WebSocket broadcasts
## Future Enhancements
Potential improvements:
- Configurable QoS levels per topic
- Selective topic subscription via configuration
- Message persistence and replay
- Metrics and monitoring for MQTT integration
- Support for MQTT 5.0 features
## Related Documentation
- [Main README](../README.md) - Overview of SPORE Gateway
- [Rollout Documentation](./Rollout.md) - Firmware rollout process
- [Hack Directory](../hack/README.md) - Testing tools and scripts
## Examples
### Example: IoT Sensor Integration
Connect temperature sensors to the gateway:
```bash
# Start gateway with MQTT
./spore-gateway -mqtt tcp://iot-broker.example.com:1883
# Sensors publish to topics like:
# - sensor/temperature/room1
# - sensor/humidity/room1
# - sensor/light/room1
```
### Example: Device Control
Control SPORE nodes via MQTT:
```bash
# Publish control commands
mosquitto_pub -h broker.example.com -t "spore/control" \
-m '{"node": "esp32-001", "action": "pattern", "pattern": "rainbow"}'
```
### Example: Monitoring Dashboard
Combine SPORE cluster events with external system events:
```bash
# Gateway receives both:
# 1. SPORE cluster events (from UDP discovery)
# 2. External system events (via MQTT)
# UI displays unified event stream
```
## Support
For issues or questions about MQTT integration:
- Check gateway logs for MQTT connection status
- Review MQTT broker configuration
- Use `hack/mqtt-test.sh` for testing
- See [troubleshooting section](#troubleshooting) above

73
docs/README.md Normal file
View File

@@ -0,0 +1,73 @@
# SPORE Gateway Documentation
Welcome to the SPORE Gateway documentation. This directory contains detailed documentation for various features and capabilities of the gateway.
## Available Documentation
### [MQTT Integration](./MQTT.md)
Comprehensive guide to the MQTT integration feature, including:
- Setting up MQTT integration
- Message format and handling
- Testing with local MQTT brokers
- Architecture and data flow
- Troubleshooting and best practices
### [Rollout Process](./Rollout.md)
Detailed documentation for the firmware rollout system:
- Parallel firmware updates across multiple nodes
- WebSocket progress updates
- Integration with spore-registry
- API endpoints and message formats
## Quick Links
- **Main README**: [../README.md](../README.md)
- **Hack Directory**: [../hack/README.md](../hack/README.md)
- **Testing Scripts**: [../hack/](../hack/)
## Feature Overview
### Core Features
- UDP-based node discovery
- Cluster management and primary node selection
- HTTP API server for cluster operations
- WebSocket real-time updates
- Failover logic for automatic primary switching
- Generic proxy calls to SPORE nodes
### Integration Features
- **MQTT Integration**: Subscribe to MQTT topics and forward messages to WebSocket clients
- **Firmware Rollout**: Orchestrated firmware updates across the cluster
- **Registry Proxy**: Proxy for spore-registry firmware management
## Getting Started
1. **Basic Setup**: See [Main README](../README.md) for installation and basic usage
2. **MQTT Integration**: See [MQTT.md](./MQTT.md) for MQTT setup and testing
3. **Testing**: See [Hack README](../hack/README.md) for local testing tools
## Development
The gateway is written in Go and follows modern Go best practices:
- Structured logging using logrus
- Graceful shutdown handling
- Concurrent-safe operations
- HTTP middleware for CORS and logging
- WebSocket support for real-time updates
## Contributing
When adding new features:
1. Update relevant documentation in this directory
2. Add examples to the `hack/` directory
3. Update the main README with feature highlights
4. Follow the existing documentation style and structure
## Support
For questions or issues:
- Check the relevant documentation in this directory
- Review gateway logs for error messages
- Use testing tools in the `hack/` directory
- Check the main README for troubleshooting tips

11
go.mod
View File

@@ -1,11 +1,18 @@
module spore-gateway
go 1.21
go 1.24.0
toolchain go1.24.3
require (
github.com/eclipse/paho.mqtt.golang v1.5.1
github.com/gorilla/mux v1.8.1
github.com/gorilla/websocket v1.5.3
github.com/sirupsen/logrus v1.9.3
)
require golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect
require (
golang.org/x/net v0.44.0 // indirect
golang.org/x/sync v0.17.0 // indirect
golang.org/x/sys v0.36.0 // indirect
)

9
go.sum
View File

@@ -1,6 +1,8 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/eclipse/paho.mqtt.golang v1.5.1 h1:/VSOv3oDLlpqR2Epjn1Q7b2bSTplJIeV2ISgCl2W7nE=
github.com/eclipse/paho.mqtt.golang v1.5.1/go.mod h1:1/yJCneuyOoCOzKSsOTUc0AJfpsItBGWvYpBLimhArU=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
@@ -12,8 +14,13 @@ github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVs
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ=
golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I=
golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY=
golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

130
hack/README.md Normal file
View File

@@ -0,0 +1,130 @@
# Hack Directory
This directory contains utility scripts for testing and development of the SPORE Gateway.
## Scripts
### mosquitto.sh
Starts a local Mosquitto MQTT broker using Docker.
**Usage:**
```bash
./mosquitto.sh
```
This will:
- Start a Mosquitto broker on port 1883
- Use the configuration from `mosquitto.conf`
- Allow anonymous connections (no authentication required)
### mqtt-test.sh
Sends various test events to the local MQTT broker to test the gateway's MQTT integration.
**Usage:**
```bash
# Make sure the broker is running first
./mosquitto.sh # In terminal 1
# In another terminal, run the tests
./mqtt-test.sh
```
This script will send 16 different test messages covering:
## Test Message Coverage
The full `mqtt-test.sh` script will send 16 different test messages covering:
- Simple text messages
- JSON sensor data (temperature, humidity)
- Device status updates
- System events and alerts
- Configuration updates
- Metrics
- Cluster/node discovery events
- Firmware updates
- Task status
- Error logs
- Light control (SPORE nodes)
- Binary data
- Edge cases (empty messages, large payloads)
## Testing MQTT Integration
### Complete Test Workflow
1. **Start the MQTT broker:**
```bash
cd hack
./mosquitto.sh
```
2. **In a new terminal, start the SPORE gateway with MQTT enabled:**
```bash
cd /path/to/spore-gateway
./spore-gateway -mqtt tcp://localhost:1883
```
3. **In another terminal, run the test script:**
```bash
cd hack
./mqtt-test.sh
```
4. **Monitor the WebSocket connection** to see the events being forwarded.
You can use a WebSocket client or the SPORE UI to connect to `ws://localhost:3001/ws`.
### Expected Output
All MQTT messages will be forwarded through the WebSocket with this format:
```json
{
"topic": "sensor/temperature/living-room",
"data": "{\"temperature\": 23.5, \"unit\": \"celsius\", \"timestamp\": \"2024-01-15T10:30:00Z\"}",
"timestamp": "2024-01-15T10:30:00Z"
}
```
## Customization
### Using a Different MQTT Broker
You can change the broker URL using the `MQTT_BROKER` environment variable:
```bash
MQTT_BROKER=tcp://broker.example.com:1883 ./mqtt-test.sh
```
### Adding Your Own Test Messages
Edit `mqtt-test.sh` and add your custom test case:
```bash
# Test N: Your custom test
echo -e "${YELLOW}=== Test N: Your Description ===${NC}"
publish_json "your/topic" '{"your": "data"}'
```
## Troubleshooting
### Broker Not Starting
- Make sure Docker is running
- Check if port 1883 is already in use
- Verify the Mosquitto image is available: `docker pull eclipse-mosquitto:latest`
### Messages Not Being Received
- Verify the gateway is running with `-mqtt tcp://localhost:1883`
- Check the gateway logs for connection errors
- Ensure the WebSocket client is connected to `ws://localhost:3001/ws`
### Port Conflicts
If port 1883 is in use, modify `mosquitto.sh` to use a different port:
```bash
-p 1884:1883 # Maps host port 1884 to container port 1883
```
Then update your gateway command:
```bash
./spore-gateway -mqtt tcp://localhost:1884
```

10
hack/mosquitto.conf Normal file
View File

@@ -0,0 +1,10 @@
# -----------------------------
# Basic Mosquitto configuration
# -----------------------------
listener 1883
allow_anonymous true
# (Optional) WebSocket listener if you exposed port 9001 above
# listener 9001
# protocol websockets
# allow_anonymous true

7
hack/mosquitto.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
docker run --rm -it \
--name mqtt-broker \
-p 1883:1883 \
-v $(pwd)/mosquitto.conf:/mosquitto/config/mosquitto.conf:ro \
eclipse-mosquitto:latest

136
hack/mqtt-test.sh Executable file
View File

@@ -0,0 +1,136 @@
#!/usr/bin/env bash
# MQTT Test Script for SPORE Gateway
# This script sends various test events to the local MQTT broker
set -e
# Configuration
MQTT_BROKER="${MQTT_BROKER:-tcp://localhost:1883}"
DOCKER_IMAGE="eclipse-mosquitto:latest"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Function to publish an MQTT message
publish_message() {
local topic="$1"
local payload="$2"
local qos="${3:-0}"
echo -e "${YELLOW}Publishing to topic: ${GREEN}${topic}${NC}"
docker run --rm --network host \
"${DOCKER_IMAGE}" \
mosquitto_pub \
-h localhost \
-p 1883 \
-t "${topic}" \
-m "${payload}" \
-q "${qos}"
if [ $? -eq 0 ]; then
echo -e "${GREEN}✓ Message sent successfully${NC}"
else
echo -e "${RED}✗ Failed to send message${NC}"
exit 1
fi
echo ""
}
# Function to publish a JSON message
publish_json() {
local topic="$1"
local json="$2"
local qos="${3:-0}"
publish_message "${topic}" "${json}" "${qos}"
}
# Main test execution
echo "============================================"
echo " SPORE Gateway MQTT Test Suite"
echo "============================================"
echo ""
echo "Using MQTT broker: ${MQTT_BROKER}"
echo ""
# Test 1: Simple text message
echo -e "${YELLOW}=== Test 1: Simple Text Message ===${NC}"
publish_message "test/hello" "Hello from MQTT test script!"
# Test 2: Temperature sensor reading
echo -e "${YELLOW}=== Test 2: Temperature Sensor Reading ===${NC}"
publish_json "sensor/temperature/living-room" '{"temperature": 23.5, "unit": "celsius", "timestamp": "2024-01-15T10:30:00Z"}'
# Test 3: Humidity sensor reading
echo -e "${YELLOW}=== Test 3: Humidity Sensor Reading ===${NC}"
publish_json "sensor/humidity/bedroom" '{"humidity": 45.2, "unit": "percent", "timestamp": "2024-01-15T10:30:05Z"}'
# Test 4: Device status
echo -e "${YELLOW}=== Test 4: Device Status Update ===${NC}"
publish_json "device/status/esp32-001" '{"id": "esp32-001", "status": "online", "uptime": 3600, "firmware": "v1.2.3"}'
# Test 5: System event
echo -e "${YELLOW}=== Test 5: System Event ===${NC}"
publish_json "system/event" '{"type": "startup", "message": "Gateway started successfully", "timestamp": "2024-01-15T10:30:10Z"}'
# Test 6: Alert message
echo -e "${YELLOW}=== Test 6: Alert Message ===${NC}"
publish_json "alert/high-temperature" '{"level": "warning", "message": "Temperature exceeded threshold", "value": 35.5, "threshold": 30.0}'
# Test 7: Configuration update
echo -e "${YELLOW}=== Test 7: Configuration Update ===${NC}"
publish_json "config/update" '{"section": "network", "key": "retry_count", "value": 3, "updated": "2024-01-15T10:30:15Z"}'
# Test 8: Metric data
echo -e "${YELLOW}=== Test 8: Metric Data ===${NC}"
publish_json "metrics/system" '{"cpu": 45.2, "memory": 62.5, "disk": 38.7, "timestamp": "2024-01-15T10:30:20Z"}'
# Test 9: Node discovery event
echo -e "${YELLOW}=== Test 9: Node Discovery Event ===${NC}"
publish_json "cluster/node/discovered" '{"ip": "192.168.1.100", "hostname": "node-001", "status": "online", "version": "1.0.0"}'
# Test 10: Firmware update event
echo -e "${YELLOW}=== Test 10: Firmware Update Event ===${NC}"
publish_json "firmware/update/esp32-001" '{"node": "esp32-001", "status": "completed", "version": "v1.3.0", "size": 1234567}'
# Test 11: Task status
echo -e "${YELLOW}=== Test 11: Task Status ===${NC}"
publish_json "task/sync/status" '{"id": "sync-001", "status": "running", "progress": 75, "estimated_completion": "2024-01-15T10:35:00Z"}'
# Test 12: Error log
echo -e "${YELLOW}=== Test 12: Error Log ===${NC}"
publish_json "log/error" '{"severity": "error", "component": "mqtt-client", "message": "Connection timeout", "code": 1001}'
# Test 13: Light control (for SPORE nodes)
echo -e "${YELLOW}=== Test 13: Light Control ===${NC}"
publish_json "light/control" '{"id": "neopixel-001", "brightness": 128, "color": {"r": 255, "g": 0, "b": 0}, "pattern": "solid"}'
# Test 14: Binary data (as hex string)
echo -e "${YELLOW}=== Test 14: Binary Data ===${NC}"
publish_message "data/binary" "48656c6c6f20576f726c64" # "Hello World" in hex
# Test 15: Empty message
echo -e "${YELLOW}=== Test 15: Empty Message ===${NC}"
publish_message "test/empty" ""
# Test 16: Large payload
echo -e "${YELLOW}=== Test 16: Large Payload ===${NC}"
LARGE_PAYLOAD='{"data": "'$(head -c 1000 < /dev/zero | tr '\0' 'A')'"}'
publish_message "test/large" "${LARGE_PAYLOAD}"
echo "============================================"
echo -e "${GREEN}All tests completed successfully!${NC}"
echo "============================================"
echo ""
echo "To monitor these messages, connect to the WebSocket at:"
echo " ws://localhost:3001/ws"
echo ""
echo "You should see all these events forwarded with the format:"
echo ' {"topic": "...", "data": "...", "timestamp": "..."}'
echo ""

View File

@@ -2,6 +2,7 @@ package discovery
import (
"context"
"encoding/json"
"fmt"
"net"
"strconv"
@@ -49,6 +50,9 @@ func (nd *NodeDiscovery) Shutdown(ctx context.Context) error {
return nil
}
// MessageHandler processes a specific UDP message type
type MessageHandler func(payload string, remoteAddr *net.UDPAddr)
// handleUDPMessage processes incoming UDP messages
func (nd *NodeDiscovery) handleUDPMessage(message string, remoteAddr *net.UDPAddr) {
nd.logger.WithFields(log.Fields{
@@ -58,13 +62,42 @@ func (nd *NodeDiscovery) handleUDPMessage(message string, remoteAddr *net.UDPAdd
message = strings.TrimSpace(message)
if strings.HasPrefix(message, "CLUSTER_HEARTBEAT:") {
hostname := strings.TrimPrefix(message, "CLUSTER_HEARTBEAT:")
nd.updateNodeFromHeartbeat(remoteAddr.IP.String(), remoteAddr.Port, hostname)
} else if strings.HasPrefix(message, "NODE_UPDATE:") {
nd.handleNodeUpdate(remoteAddr.IP.String(), message)
} else if !strings.HasPrefix(message, "RAW:") {
nd.logger.WithField("message", message).Debug("Received unknown UDP message")
// Extract topic by splitting on first ":"
parts := strings.SplitN(message, ":", 2)
if len(parts) < 2 {
nd.logger.WithField("message", message).Debug("Invalid message format - missing ':' separator")
return
}
topic := parts[0]
payload := parts[1]
// Handler map for different message types
handlers := map[string]MessageHandler{
"cluster/heartbeat": func(payload string, remoteAddr *net.UDPAddr) {
nd.updateNodeFromHeartbeat(remoteAddr.IP.String(), remoteAddr.Port, payload)
},
"node/update": func(payload string, remoteAddr *net.UDPAddr) {
// Reconstruct full message for handleNodeUpdate which expects "node/update:hostname:{json}"
fullMessage := "node/update:" + payload
nd.handleNodeUpdate(remoteAddr.IP.String(), fullMessage)
},
"RAW": func(payload string, remoteAddr *net.UDPAddr) {
nd.logger.WithField("message", "RAW:"+payload).Debug("Received raw message")
},
"cluster/event": func(payload string, remoteAddr *net.UDPAddr) {
nd.handleClusterEvent(payload, remoteAddr)
},
"cluster/broadcast": func(payload string, remoteAddr *net.UDPAddr) {
nd.handleClusterBroadcast(payload, remoteAddr)
},
}
// Look up and execute handler
if handler, exists := handlers[topic]; exists {
handler(payload, remoteAddr)
} else {
nd.logger.WithField("topic", topic).Debug("Received unknown UDP message type")
}
}
@@ -138,9 +171,9 @@ func (nd *NodeDiscovery) updateNodeFromHeartbeat(sourceIP string, sourcePort int
}
}
// handleNodeUpdate processes NODE_UPDATE messages
// handleNodeUpdate processes NODE_UPDATE and node/update messages
func (nd *NodeDiscovery) handleNodeUpdate(sourceIP, message string) {
// Message format: "NODE_UPDATE:hostname:{json}"
// Message format: "NODE_UPDATE:hostname:{json}" or "node/update:hostname:{json}"
parts := strings.SplitN(message, ":", 3)
if len(parts) < 3 {
nd.logger.WithField("message", message).Warn("Invalid NODE_UPDATE message format")
@@ -344,6 +377,55 @@ func (nd *NodeDiscovery) AddCallback(callback NodeUpdateCallback) {
nd.callbacks = append(nd.callbacks, callback)
}
// SetClusterEventCallback sets the callback for cluster events
func (nd *NodeDiscovery) SetClusterEventCallback(callback ClusterEventBroadcaster) {
nd.mutex.Lock()
defer nd.mutex.Unlock()
nd.clusterEventCallback = callback
}
// handleClusterEvent processes cluster/event messages
func (nd *NodeDiscovery) handleClusterEvent(payload string, remoteAddr *net.UDPAddr) {
nd.logger.WithFields(log.Fields{
"payload": payload,
"from": remoteAddr.String(),
}).Debug("Received cluster/event message")
// Forward to websocket if callback is set
if nd.clusterEventCallback != nil {
nd.clusterEventCallback.BroadcastClusterEvent("cluster/event", payload)
}
}
// handleClusterBroadcast processes cluster/broadcast messages
func (nd *NodeDiscovery) handleClusterBroadcast(payload string, remoteAddr *net.UDPAddr) {
nd.logger.WithFields(log.Fields{
"payload": payload,
"from": remoteAddr.String(),
}).Debug("Received cluster/broadcast message")
// Parse the payload JSON to extract nested event and data
var payloadData struct {
Event string `json:"event"`
Data interface{} `json:"data"`
}
if err := json.Unmarshal([]byte(payload), &payloadData); err != nil {
nd.logger.WithError(err).Error("Failed to parse cluster/broadcast payload")
return
}
nd.logger.WithFields(log.Fields{
"event": payloadData.Event,
"from": remoteAddr.String(),
}).Debug("Parsed cluster/broadcast payload")
// Forward to websocket if callback is set, mapping event to topic and data to data
if nd.clusterEventCallback != nil {
nd.clusterEventCallback.BroadcastClusterEvent(payloadData.Event, payloadData.Data)
}
}
// GetClusterStatus returns current cluster status
func (nd *NodeDiscovery) GetClusterStatus() ClusterStatus {
nd.mutex.RLock()

View File

@@ -41,15 +41,21 @@ type ClusterStatus struct {
// NodeUpdateCallback is called when node information changes
type NodeUpdateCallback func(nodeIP string, action string)
// ClusterEventBroadcaster interface for broadcasting cluster events
type ClusterEventBroadcaster interface {
BroadcastClusterEvent(topic string, data interface{})
}
// NodeDiscovery manages UDP-based node discovery
type NodeDiscovery struct {
udpPort string
discoveredNodes map[string]*NodeInfo
primaryNode string
mutex sync.RWMutex
callbacks []NodeUpdateCallback
staleThreshold time.Duration
logger *log.Logger
udpPort string
discoveredNodes map[string]*NodeInfo
primaryNode string
mutex sync.RWMutex
callbacks []NodeUpdateCallback
clusterEventCallback ClusterEventBroadcaster
staleThreshold time.Duration
logger *log.Logger
}
// NewNodeDiscovery creates a new node discovery instance
@@ -57,7 +63,7 @@ func NewNodeDiscovery(udpPort string) *NodeDiscovery {
return &NodeDiscovery{
udpPort: udpPort,
discoveredNodes: make(map[string]*NodeInfo),
staleThreshold: 10 * time.Second, // TODO make configurable
staleThreshold: 10 * time.Second, // Heartbeat timeout - mark nodes inactive after 10 seconds
logger: log.New(),
}
}

138
internal/mqtt/mqtt.go Normal file
View File

@@ -0,0 +1,138 @@
package mqtt
import (
"context"
"fmt"
"os"
"time"
mqtt "github.com/eclipse/paho.mqtt.golang"
log "github.com/sirupsen/logrus"
)
// MQTTClient represents an MQTT client for the gateway
type MQTTClient struct {
client mqtt.Client
serverURL string
username string
password string
connected bool
logger *log.Logger
messageCallback func(topic string, data []byte)
}
// NewMQTTClient creates a new MQTT client instance
func NewMQTTClient(serverURL, username, password string) *MQTTClient {
return &MQTTClient{
serverURL: serverURL,
username: username,
password: password,
logger: log.New(),
}
}
// SetMessageCallback sets the callback function to be called when messages are received
func (mc *MQTTClient) SetMessageCallback(callback func(topic string, data []byte)) {
mc.messageCallback = callback
}
// Connect connects to the MQTT broker
func (mc *MQTTClient) Connect() error {
opts := mqtt.NewClientOptions()
opts.AddBroker(mc.serverURL)
opts.SetClientID(fmt.Sprintf("spore-gateway-%d", time.Now().Unix()))
opts.SetCleanSession(true)
opts.SetAutoReconnect(true)
opts.SetConnectRetry(true)
opts.SetConnectRetryInterval(10 * time.Second)
opts.SetKeepAlive(30 * time.Second)
opts.SetPingTimeout(10 * time.Second)
// Set credentials if provided
if mc.username != "" {
opts.SetUsername(mc.username)
}
if mc.password != "" {
opts.SetPassword(mc.password)
}
// Set connection callbacks
opts.SetOnConnectHandler(mc.onConnected)
opts.SetConnectionLostHandler(mc.onConnectionLost)
mc.client = mqtt.NewClient(opts)
mc.logger.WithFields(log.Fields{
"server": mc.serverURL,
"username": mc.username,
}).Info("Connecting to MQTT broker")
if token := mc.client.Connect(); token.Wait() && token.Error() != nil {
return fmt.Errorf("failed to connect to MQTT broker: %w", token.Error())
}
return nil
}
// onConnected is called when the client successfully connects to the broker
func (mc *MQTTClient) onConnected(client mqtt.Client) {
mc.logger.Info("Successfully connected to MQTT broker")
mc.connected = true
// Subscribe to all topics
if token := mc.client.Subscribe("#", 0, mc.handleMessage); token.Wait() && token.Error() != nil {
mc.logger.WithError(token.Error()).Error("Failed to subscribe to MQTT topics")
} else {
mc.logger.Info("Subscribed to all MQTT topics (#)")
}
}
// onConnectionLost is called when the connection to the broker is lost
func (mc *MQTTClient) onConnectionLost(client mqtt.Client, err error) {
mc.logger.WithError(err).Error("MQTT connection lost")
mc.connected = false
}
// handleMessage handles incoming MQTT messages
func (mc *MQTTClient) handleMessage(client mqtt.Client, msg mqtt.Message) {
topic := msg.Topic()
payload := msg.Payload()
mc.logger.WithFields(log.Fields{
"topic": topic,
"length": len(payload),
}).Debug("Received MQTT message")
// Call the callback if set
if mc.messageCallback != nil {
mc.messageCallback(topic, payload)
}
}
// Disconnect disconnects from the MQTT broker
func (mc *MQTTClient) Disconnect() {
if mc.client != nil && mc.connected {
mc.logger.Info("Disconnecting from MQTT broker")
mc.client.Disconnect(250)
mc.connected = false
}
}
// Shutdown gracefully shuts down the MQTT client
func (mc *MQTTClient) Shutdown(ctx context.Context) error {
mc.logger.Info("Shutting down MQTT client")
mc.Disconnect()
return nil
}
// IsConnected returns whether the client is currently connected
func (mc *MQTTClient) IsConnected() bool {
return mc.connected
}
// NewMQTTClientFromEnv creates a new MQTT client from environment variables
func NewMQTTClientFromEnv(serverURL string) *MQTTClient {
username := os.Getenv("MQTT_USER")
password := os.Getenv("MQTT_PASSWORD")
return NewMQTTClient(serverURL, username, password)
}

View File

@@ -38,6 +38,9 @@ func NewHTTPServer(port string, nodeDiscovery *discovery.NodeDiscovery) *HTTPSer
// Initialize registry client
registryClient := registry.NewRegistryClient("http://localhost:3002")
// Register WebSocket server as cluster event broadcaster
nodeDiscovery.SetClusterEventCallback(wsServer)
hs := &HTTPServer{
port: port,
router: mux.NewRouter(),
@@ -174,6 +177,11 @@ func (hs *HTTPServer) Start() error {
return hs.server.ListenAndServe()
}
// BroadcastMQTTMessage broadcasts an MQTT message through the WebSocket server
func (hs *HTTPServer) BroadcastMQTTMessage(topic string, data []byte) {
hs.webSocketServer.BroadcastMQTTMessage(topic, data)
}
// Shutdown gracefully shuts down the HTTP server
func (hs *HTTPServer) Shutdown(ctx context.Context) error {
log.Info("Shutting down HTTP server")
@@ -366,16 +374,19 @@ func (hs *HTTPServer) setPrimaryNode(w http.ResponseWriter, r *http.Request) {
// GET /api/cluster/members
func (hs *HTTPServer) getClusterMembers(w http.ResponseWriter, r *http.Request) {
log.Debug("Fetching cluster members via API")
result, err := hs.performWithFailover(func(client *client.SporeClient) (interface{}, error) {
return client.GetClusterStatus()
})
if err != nil {
log.WithError(err).Error("Error fetching cluster members")
log.WithError(err).Debug("Failed to fetch cluster members")
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch cluster members", "message": "%s"}`, err.Error()), http.StatusBadGateway)
return
}
log.Debug("Successfully fetched cluster members via API")
json.NewEncoder(w).Encode(result)
}
@@ -417,42 +428,52 @@ func (hs *HTTPServer) getTaskStatus(w http.ResponseWriter, r *http.Request) {
ip := r.URL.Query().Get("ip")
if ip != "" {
log.WithField("node_ip", ip).Debug("Fetching task status from specific node")
client := hs.getSporeClient(ip)
result, err := client.GetTaskStatus()
if err != nil {
log.WithError(err).Error("Error fetching task status from specific node")
log.WithFields(log.Fields{
"node_ip": ip,
"error": err.Error(),
}).Debug("Failed to fetch task status from specific node")
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch task status from node", "message": "%s"}`, err.Error()), http.StatusInternalServerError)
return
}
log.WithField("node_ip", ip).Debug("Successfully fetched task status from specific node")
json.NewEncoder(w).Encode(result)
return
}
log.Debug("Fetching task status via failover")
result, err := hs.performWithFailover(func(client *client.SporeClient) (interface{}, error) {
return client.GetTaskStatus()
})
if err != nil {
log.WithError(err).Error("Error fetching task status")
log.WithError(err).Debug("Failed to fetch task status via failover")
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch task status", "message": "%s"}`, err.Error()), http.StatusBadGateway)
return
}
log.Debug("Successfully fetched task status via failover")
json.NewEncoder(w).Encode(result)
}
// GET /api/node/status
func (hs *HTTPServer) getNodeStatus(w http.ResponseWriter, r *http.Request) {
log.Debug("Fetching node system status via failover")
result, err := hs.performWithFailover(func(client *client.SporeClient) (interface{}, error) {
return client.GetSystemStatus()
})
if err != nil {
log.WithError(err).Error("Error fetching system status")
log.WithError(err).Debug("Failed to fetch system status via failover")
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch system status", "message": "%s"}`, err.Error()), http.StatusBadGateway)
return
}
log.Debug("Successfully fetched system status via failover")
json.NewEncoder(w).Encode(result)
}
@@ -461,14 +482,20 @@ func (hs *HTTPServer) getNodeStatusByIP(w http.ResponseWriter, r *http.Request)
vars := mux.Vars(r)
nodeIP := vars["ip"]
log.WithField("node_ip", nodeIP).Debug("Fetching system status from specific node")
client := hs.getSporeClient(nodeIP)
result, err := client.GetSystemStatus()
if err != nil {
log.WithError(err).Error("Error fetching status from specific node")
log.WithFields(log.Fields{
"node_ip": nodeIP,
"error": err.Error(),
}).Debug("Failed to fetch status from specific node")
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch status from node %s", "message": "%s"}`, nodeIP, err.Error()), http.StatusInternalServerError)
return
}
log.WithField("node_ip", nodeIP).Debug("Successfully fetched status from specific node")
json.NewEncoder(w).Encode(result)
}
@@ -477,27 +504,34 @@ func (hs *HTTPServer) getNodeEndpoints(w http.ResponseWriter, r *http.Request) {
ip := r.URL.Query().Get("ip")
if ip != "" {
log.WithField("node_ip", ip).Debug("Fetching endpoints from specific node")
client := hs.getSporeClient(ip)
result, err := client.GetCapabilities()
if err != nil {
log.WithError(err).Error("Error fetching endpoints from specific node")
log.WithFields(log.Fields{
"node_ip": ip,
"error": err.Error(),
}).Debug("Failed to fetch endpoints from specific node")
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch endpoints from node", "message": "%s"}`, err.Error()), http.StatusInternalServerError)
return
}
log.WithField("node_ip", ip).Debug("Successfully fetched endpoints from specific node")
json.NewEncoder(w).Encode(result)
return
}
log.Debug("Fetching capabilities via failover")
result, err := hs.performWithFailover(func(client *client.SporeClient) (interface{}, error) {
return client.GetCapabilities()
})
if err != nil {
log.WithError(err).Error("Error fetching capabilities")
log.WithError(err).Debug("Failed to fetch capabilities via failover")
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch capabilities", "message": "%s"}`, err.Error()), http.StatusBadGateway)
return
}
log.Debug("Successfully fetched capabilities via failover")
json.NewEncoder(w).Encode(result)
}
@@ -849,18 +883,21 @@ type ClusterNodeVersionsResponse struct {
// GET /api/cluster/node/versions
func (hs *HTTPServer) getClusterNodeVersions(w http.ResponseWriter, r *http.Request) {
log.Debug("Fetching cluster node versions")
result, err := hs.performWithFailover(func(client *client.SporeClient) (interface{}, error) {
return client.GetClusterStatus()
})
if err != nil {
log.WithError(err).Error("Error fetching cluster members for versions")
log.WithError(err).Debug("Failed to fetch cluster members for versions")
http.Error(w, fmt.Sprintf(`{"error": "Failed to fetch cluster members", "message": "%s"}`, err.Error()), http.StatusBadGateway)
return
}
clusterStatus, ok := result.(*client.ClusterStatusResponse)
if !ok {
log.Debug("Invalid cluster status response type")
http.Error(w, `{"error": "Invalid cluster status response"}`, http.StatusInternalServerError)
return
}
@@ -880,6 +917,8 @@ func (hs *HTTPServer) getClusterNodeVersions(w http.ResponseWriter, r *http.Requ
})
}
log.WithField("node_count", len(nodeVersions)).Debug("Successfully fetched cluster node versions")
response := ClusterNodeVersionsResponse{
Nodes: nodeVersions,
}
@@ -956,12 +995,25 @@ func (hs *HTTPServer) nodeMatchesLabels(nodeLabels, rolloutLabels map[string]str
// processRollout handles the actual rollout process in the background
func (hs *HTTPServer) processRollout(rolloutID string, nodes []NodeInfo, firmwareInfo FirmwareInfo) {
log.WithField("rollout_id", rolloutID).Info("Starting background rollout process")
log.WithFields(log.Fields{
"rollout_id": rolloutID,
"firmware": fmt.Sprintf("%s/%s", firmwareInfo.Name, firmwareInfo.Version),
"node_count": len(nodes),
}).Debug("Starting background rollout process")
// Download firmware from registry
log.WithFields(log.Fields{
"rollout_id": rolloutID,
"firmware": fmt.Sprintf("%s/%s", firmwareInfo.Name, firmwareInfo.Version),
}).Debug("Downloading firmware from registry for rollout")
firmwareData, err := hs.registryClient.DownloadFirmware(firmwareInfo.Name, firmwareInfo.Version)
if err != nil {
log.WithError(err).Error("Failed to download firmware for rollout")
log.WithFields(log.Fields{
"rollout_id": rolloutID,
"firmware": fmt.Sprintf("%s/%s", firmwareInfo.Name, firmwareInfo.Version),
"error": err.Error(),
}).Error("Failed to download firmware for rollout")
return
}
@@ -970,7 +1022,7 @@ func (hs *HTTPServer) processRollout(rolloutID string, nodes []NodeInfo, firmwar
"firmware": fmt.Sprintf("%s/%s", firmwareInfo.Name, firmwareInfo.Version),
"size": len(firmwareData),
"total_nodes": len(nodes),
}).Info("Downloaded firmware for rollout")
}).Debug("Successfully downloaded firmware for rollout")
// Process nodes in parallel using goroutines
var wg sync.WaitGroup
@@ -984,9 +1036,14 @@ func (hs *HTTPServer) processRollout(rolloutID string, nodes []NodeInfo, firmwar
"rollout_id": rolloutID,
"node_ip": node.IP,
"progress": fmt.Sprintf("%d/%d", nodeIndex+1, len(nodes)),
}).Info("Processing node in rollout")
}).Debug("Processing node in rollout")
// Update version label on the node before upload
log.WithFields(log.Fields{
"rollout_id": rolloutID,
"node_ip": node.IP,
}).Debug("Getting SPORE client for node")
client := hs.getSporeClient(node.IP)
// Create updated labels with the new version

View File

@@ -23,26 +23,34 @@ var upgrader = websocket.Upgrader{
// WebSocketServer manages WebSocket connections and broadcasts
type WebSocketServer struct {
nodeDiscovery *discovery.NodeDiscovery
sporeClients map[string]*client.SporeClient
clients map[*websocket.Conn]bool
mutex sync.RWMutex
writeMutex sync.Mutex // Mutex to serialize writes to WebSocket connections
logger *log.Logger
nodeDiscovery *discovery.NodeDiscovery
sporeClients map[string]*client.SporeClient
clients map[*websocket.Conn]bool
mutex sync.RWMutex
writeMutex sync.Mutex // Mutex to serialize writes to WebSocket connections
logger *log.Logger
clusterInfoTicker *time.Ticker
clusterInfoStopCh chan bool
clusterInfoInterval time.Duration
}
// NewWebSocketServer creates a new WebSocket server
func NewWebSocketServer(nodeDiscovery *discovery.NodeDiscovery) *WebSocketServer {
wss := &WebSocketServer{
nodeDiscovery: nodeDiscovery,
sporeClients: make(map[string]*client.SporeClient),
clients: make(map[*websocket.Conn]bool),
logger: log.New(),
nodeDiscovery: nodeDiscovery,
sporeClients: make(map[string]*client.SporeClient),
clients: make(map[*websocket.Conn]bool),
logger: log.New(),
clusterInfoStopCh: make(chan bool),
clusterInfoInterval: 5 * time.Second, // Fetch cluster info every 5 seconds
}
// Register callback for node updates
nodeDiscovery.AddCallback(wss.handleNodeUpdate)
// Start periodic cluster info fetching
go wss.startPeriodicClusterInfoFetching()
return wss
}
@@ -126,13 +134,13 @@ func (wss *WebSocketServer) sendCurrentClusterState(conn *websocket.Conn) {
}
message := struct {
Type string `json:"type"`
Topic string `json:"topic"`
Members []client.ClusterMember `json:"members"`
PrimaryNode string `json:"primaryNode"`
TotalNodes int `json:"totalNodes"`
Timestamp string `json:"timestamp"`
}{
Type: "cluster_update",
Topic: "cluster/update",
Members: clusterData,
PrimaryNode: wss.nodeDiscovery.GetPrimaryNode(),
TotalNodes: len(nodes),
@@ -151,17 +159,48 @@ func (wss *WebSocketServer) sendCurrentClusterState(conn *websocket.Conn) {
}
}
// startPeriodicClusterInfoFetching starts a goroutine that periodically fetches cluster info
func (wss *WebSocketServer) startPeriodicClusterInfoFetching() {
wss.clusterInfoTicker = time.NewTicker(wss.clusterInfoInterval)
defer wss.clusterInfoTicker.Stop()
wss.logger.WithField("interval", wss.clusterInfoInterval).Info("Starting periodic cluster info fetching")
for {
select {
case <-wss.clusterInfoTicker.C:
wss.fetchAndBroadcastClusterInfo()
case <-wss.clusterInfoStopCh:
wss.logger.Info("Stopping periodic cluster info fetching")
return
}
}
}
// fetchAndBroadcastClusterInfo fetches cluster info and broadcasts it to clients
func (wss *WebSocketServer) fetchAndBroadcastClusterInfo() {
// Only fetch if we have clients connected
wss.mutex.RLock()
clientCount := len(wss.clients)
wss.mutex.RUnlock()
if clientCount == 0 {
return
}
wss.logger.Debug("Periodically fetching cluster info")
wss.broadcastClusterUpdate()
}
// handleNodeUpdate is called when node information changes
func (wss *WebSocketServer) handleNodeUpdate(nodeIP, action string) {
wss.logger.WithFields(log.Fields{
"node_ip": nodeIP,
"action": action,
}).Debug("Node update received, broadcasting to WebSocket clients")
}).Debug("Node update received, broadcasting node discovery event")
// Broadcast cluster update to all clients
wss.broadcastClusterUpdate()
// Also broadcast node discovery event
// Only broadcast node discovery event, not cluster update
// Cluster updates are now handled by periodic fetching
wss.broadcastNodeDiscovery(nodeIP, action)
}
@@ -188,13 +227,13 @@ func (wss *WebSocketServer) broadcastClusterUpdate() {
}
message := struct {
Type string `json:"type"`
Topic string `json:"topic"`
Members []client.ClusterMember `json:"members"`
PrimaryNode string `json:"primaryNode"`
TotalNodes int `json:"totalNodes"`
Timestamp string `json:"timestamp"`
}{
Type: "cluster_update",
Topic: "cluster/update",
Members: clusterData,
PrimaryNode: wss.nodeDiscovery.GetPrimaryNode(),
TotalNodes: len(wss.nodeDiscovery.GetNodes()),
@@ -248,12 +287,12 @@ func (wss *WebSocketServer) broadcastNodeDiscovery(nodeIP, action string) {
}
message := struct {
Type string `json:"type"`
Topic string `json:"topic"`
Action string `json:"action"`
NodeIP string `json:"nodeIp"`
Timestamp string `json:"timestamp"`
}{
Type: "node_discovery",
Topic: "node/discovery",
Action: action,
NodeIP: nodeIP,
Timestamp: time.Now().Format(time.RFC3339),
@@ -291,14 +330,14 @@ func (wss *WebSocketServer) BroadcastFirmwareUploadStatus(nodeIP, status, filena
}
message := struct {
Type string `json:"type"`
Topic string `json:"topic"`
NodeIP string `json:"nodeIp"`
Status string `json:"status"`
Filename string `json:"filename"`
FileSize int `json:"fileSize"`
Timestamp string `json:"timestamp"`
}{
Type: "firmware_upload_status",
Topic: "firmware/upload/status",
NodeIP: nodeIP,
Status: status,
Filename: filename,
@@ -346,7 +385,7 @@ func (wss *WebSocketServer) BroadcastRolloutProgress(rolloutID, nodeIP, status s
}
message := struct {
Type string `json:"type"`
Topic string `json:"topic"`
RolloutID string `json:"rolloutId"`
NodeIP string `json:"nodeIp"`
Status string `json:"status"`
@@ -355,7 +394,7 @@ func (wss *WebSocketServer) BroadcastRolloutProgress(rolloutID, nodeIP, status s
Progress int `json:"progress"`
Timestamp string `json:"timestamp"`
}{
Type: "rollout_progress",
Topic: "rollout/progress",
RolloutID: rolloutID,
NodeIP: nodeIP,
Status: status,
@@ -429,20 +468,38 @@ func (wss *WebSocketServer) calculateProgress(current, total int, status string)
func (wss *WebSocketServer) getCurrentClusterMembers() ([]client.ClusterMember, error) {
nodes := wss.nodeDiscovery.GetNodes()
if len(nodes) == 0 {
wss.logger.Debug("No nodes available for cluster member retrieval")
return []client.ClusterMember{}, nil
}
// Try to get real cluster data from primary node
primaryNode := wss.nodeDiscovery.GetPrimaryNode()
if primaryNode != "" {
wss.logger.WithFields(log.Fields{
"primary_node": primaryNode,
"total_nodes": len(nodes),
}).Debug("Fetching cluster members from primary node")
client := wss.getSporeClient(primaryNode)
clusterStatus, err := client.GetClusterStatus()
if err == nil {
// Update local node data with API information
wss.logger.WithFields(log.Fields{
"primary_node": primaryNode,
"member_count": len(clusterStatus.Members),
}).Debug("Successfully fetched cluster members from primary node")
// Update local node data with API information but preserve heartbeat status
wss.updateLocalNodesWithAPI(clusterStatus.Members)
return clusterStatus.Members, nil
// Return merged data with heartbeat-based status override
return wss.mergeAPIWithHeartbeatStatus(clusterStatus.Members), nil
}
wss.logger.WithError(err).Error("Failed to get cluster status from primary node")
wss.logger.WithFields(log.Fields{
"primary_node": primaryNode,
"error": err.Error(),
}).Debug("Failed to get cluster status from primary node, using fallback")
} else {
wss.logger.Debug("No primary node available, using fallback cluster members")
}
// Fallback to local data if API fails
@@ -451,20 +508,62 @@ func (wss *WebSocketServer) getCurrentClusterMembers() ([]client.ClusterMember,
// updateLocalNodesWithAPI updates local node data with information from API
func (wss *WebSocketServer) updateLocalNodesWithAPI(apiMembers []client.ClusterMember) {
// This would update the local node discovery with fresh API data
// For now, we'll just log that we received the data
wss.logger.WithField("members", len(apiMembers)).Debug("Updating local nodes with API data")
for _, member := range apiMembers {
if len(member.Labels) > 0 {
wss.logger.WithFields(log.Fields{
"ip": member.IP,
"labels": member.Labels,
}).Debug("API member labels")
}
// Update local node with API data, but preserve heartbeat-based status
wss.updateNodeWithAPIData(member)
}
}
// updateNodeWithAPIData updates a single node with API data while preserving heartbeat status
func (wss *WebSocketServer) updateNodeWithAPIData(apiMember client.ClusterMember) {
nodes := wss.nodeDiscovery.GetNodes()
if localNode, exists := nodes[apiMember.IP]; exists {
// Update additional data from API but preserve heartbeat-based status
localNode.Labels = apiMember.Labels
localNode.Resources = apiMember.Resources
localNode.Latency = apiMember.Latency
// Only update hostname if it's different and not empty
if apiMember.Hostname != "" && apiMember.Hostname != localNode.Hostname {
localNode.Hostname = apiMember.Hostname
}
wss.logger.WithFields(log.Fields{
"ip": apiMember.IP,
"labels": apiMember.Labels,
"status": localNode.Status, // Keep heartbeat-based status
}).Debug("Updated node with API data, preserved heartbeat status")
}
}
// mergeAPIWithHeartbeatStatus merges API member data with heartbeat-based status
func (wss *WebSocketServer) mergeAPIWithHeartbeatStatus(apiMembers []client.ClusterMember) []client.ClusterMember {
localNodes := wss.nodeDiscovery.GetNodes()
mergedMembers := make([]client.ClusterMember, 0, len(apiMembers))
for _, apiMember := range apiMembers {
mergedMember := apiMember
// Override status with heartbeat-based status if we have local data
if localNode, exists := localNodes[apiMember.IP]; exists {
mergedMember.Status = string(localNode.Status)
mergedMember.LastSeen = localNode.LastSeen.Unix()
wss.logger.WithFields(log.Fields{
"ip": apiMember.IP,
"api_status": apiMember.Status,
"heartbeat_status": localNode.Status,
}).Debug("Overriding API status with heartbeat status")
}
mergedMembers = append(mergedMembers, mergedMember)
}
return mergedMembers
}
// getFallbackClusterMembers returns local node data as fallback
func (wss *WebSocketServer) getFallbackClusterMembers() []client.ClusterMember {
nodes := wss.nodeDiscovery.GetNodes()
@@ -503,10 +602,106 @@ func (wss *WebSocketServer) GetClientCount() int {
return len(wss.clients)
}
// BroadcastClusterEvent sends cluster events to all connected clients
func (wss *WebSocketServer) BroadcastClusterEvent(topic string, data interface{}) {
wss.mutex.RLock()
clients := make([]*websocket.Conn, 0, len(wss.clients))
for client := range wss.clients {
clients = append(clients, client)
}
wss.mutex.RUnlock()
if len(clients) == 0 {
return
}
message := struct {
Topic string `json:"topic"`
Data interface{} `json:"data"`
Timestamp string `json:"timestamp"`
}{
Topic: topic,
Data: data,
Timestamp: time.Now().Format(time.RFC3339),
}
messageData, err := json.Marshal(message)
if err != nil {
wss.logger.WithError(err).Error("Failed to marshal cluster event")
return
}
wss.logger.WithFields(log.Fields{
"topic": topic,
"clients": len(clients),
}).Debug("Broadcasting cluster event to WebSocket clients")
// Send to all clients with write synchronization
wss.writeMutex.Lock()
defer wss.writeMutex.Unlock()
for _, client := range clients {
client.SetWriteDeadline(time.Now().Add(5 * time.Second))
if err := client.WriteMessage(websocket.TextMessage, messageData); err != nil {
wss.logger.WithError(err).Error("Failed to send cluster event to client")
}
}
}
// BroadcastMQTTMessage broadcasts an MQTT message to all connected WebSocket clients
func (wss *WebSocketServer) BroadcastMQTTMessage(topic string, data []byte) {
wss.mutex.RLock()
clients := make([]*websocket.Conn, 0, len(wss.clients))
for client := range wss.clients {
clients = append(clients, client)
}
wss.mutex.RUnlock()
if len(clients) == 0 {
return
}
message := struct {
Topic string `json:"topic"`
Data string `json:"data"`
Timestamp string `json:"timestamp"`
}{
Topic: topic,
Data: string(data),
Timestamp: time.Now().Format(time.RFC3339),
}
messageData, err := json.Marshal(message)
if err != nil {
wss.logger.WithError(err).Error("Failed to marshal MQTT message")
return
}
wss.logger.WithFields(log.Fields{
"topic": topic,
"clients": len(clients),
"length": len(data),
}).Debug("Broadcasting MQTT message to WebSocket clients")
// Send to all clients with write synchronization
wss.writeMutex.Lock()
defer wss.writeMutex.Unlock()
for _, client := range clients {
client.SetWriteDeadline(time.Now().Add(5 * time.Second))
if err := client.WriteMessage(websocket.TextMessage, messageData); err != nil {
wss.logger.WithError(err).Error("Failed to send MQTT message to client")
}
}
}
// Shutdown gracefully shuts down the WebSocket server
func (wss *WebSocketServer) Shutdown(ctx context.Context) error {
wss.logger.Info("Shutting down WebSocket server")
// Stop periodic cluster info fetching
close(wss.clusterInfoStopCh)
wss.mutex.Lock()
clients := make([]*websocket.Conn, 0, len(wss.clients))
for client := range wss.clients {

31
main.go
View File

@@ -10,6 +10,7 @@ import (
"time"
"spore-gateway/internal/discovery"
"spore-gateway/internal/mqtt"
"spore-gateway/internal/server"
"spore-gateway/pkg/config"
@@ -21,6 +22,7 @@ func main() {
configFile := flag.String("config", "", "Path to configuration file")
port := flag.String("port", "3001", "HTTP server port")
udpPort := flag.String("udp-port", "4210", "UDP discovery port")
mqttServer := flag.String("mqtt", "", "Enable MQTT integration with server URL (e.g., tcp://localhost:1883)")
logLevel := flag.String("log-level", "info", "Log level (debug, info, warn, error)")
flag.Parse()
@@ -61,6 +63,28 @@ func main() {
// Initialize HTTP server
httpServer := server.NewHTTPServer(cfg.HTTPPort, nodeDiscovery)
// Initialize MQTT client if enabled
var mqttClient *mqtt.MQTTClient
// Check for MQTT server from flag or environment variable
mqttServerURL := *mqttServer
if mqttServerURL == "" {
mqttServerURL = os.Getenv("MQTT_SERVER")
}
if mqttServerURL != "" {
log.WithField("server", mqttServerURL).Info("Initializing MQTT client")
mqttClient = mqtt.NewMQTTClientFromEnv(mqttServerURL)
// Set callback to forward MQTT messages to WebSocket
mqttClient.SetMessageCallback(func(topic string, data []byte) {
httpServer.BroadcastMQTTMessage(topic, data)
})
if err := mqttClient.Connect(); err != nil {
log.WithError(err).Fatal("Failed to connect to MQTT broker")
}
}
// Setup graceful shutdown
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer stop()
@@ -90,6 +114,13 @@ func main() {
shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Shutdown MQTT client
if mqttClient != nil {
if err := mqttClient.Shutdown(shutdownCtx); err != nil {
log.WithError(err).Error("MQTT client shutdown error")
}
}
// Shutdown HTTP server
if err := httpServer.Shutdown(shutdownCtx); err != nil {
log.WithError(err).Error("HTTP server shutdown error")

View File

@@ -117,21 +117,43 @@ type FirmwareUpdateResponse struct {
func (c *SporeClient) GetClusterStatus() (*ClusterStatusResponse, error) {
url := fmt.Sprintf("%s/api/cluster/members", c.BaseURL)
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"endpoint": "/api/cluster/members",
}).Debug("Fetching cluster status from SPORE node")
resp, err := c.HTTPClient.Get(url)
if err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to fetch cluster status from SPORE node")
return nil, fmt.Errorf("failed to get cluster status: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"status_code": resp.StatusCode,
}).Debug("Cluster status request returned non-OK status")
return nil, fmt.Errorf("cluster status request failed with status %d", resp.StatusCode)
}
var clusterStatus ClusterStatusResponse
if err := json.NewDecoder(resp.Body).Decode(&clusterStatus); err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to decode cluster status response")
return nil, fmt.Errorf("failed to decode cluster status response: %w", err)
}
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"member_count": len(clusterStatus.Members),
}).Debug("Successfully fetched cluster status from SPORE node")
return &clusterStatus, nil
}
@@ -139,21 +161,44 @@ func (c *SporeClient) GetClusterStatus() (*ClusterStatusResponse, error) {
func (c *SporeClient) GetTaskStatus() (*TaskStatusResponse, error) {
url := fmt.Sprintf("%s/api/tasks/status", c.BaseURL)
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"endpoint": "/api/tasks/status",
}).Debug("Fetching task status from SPORE node")
resp, err := c.HTTPClient.Get(url)
if err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to fetch task status from SPORE node")
return nil, fmt.Errorf("failed to get task status: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"status_code": resp.StatusCode,
}).Debug("Task status request returned non-OK status")
return nil, fmt.Errorf("task status request failed with status %d", resp.StatusCode)
}
var taskStatus TaskStatusResponse
if err := json.NewDecoder(resp.Body).Decode(&taskStatus); err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to decode task status response")
return nil, fmt.Errorf("failed to decode task status response: %w", err)
}
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"total_tasks": taskStatus.Summary.TotalTasks,
"active_tasks": taskStatus.Summary.ActiveTasks,
}).Debug("Successfully fetched task status from SPORE node")
return &taskStatus, nil
}
@@ -161,21 +206,44 @@ func (c *SporeClient) GetTaskStatus() (*TaskStatusResponse, error) {
func (c *SporeClient) GetSystemStatus() (*SystemStatusResponse, error) {
url := fmt.Sprintf("%s/api/node/status", c.BaseURL)
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"endpoint": "/api/node/status",
}).Debug("Fetching system status from SPORE node")
resp, err := c.HTTPClient.Get(url)
if err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to fetch system status from SPORE node")
return nil, fmt.Errorf("failed to get system status: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"status_code": resp.StatusCode,
}).Debug("System status request returned non-OK status")
return nil, fmt.Errorf("system status request failed with status %d", resp.StatusCode)
}
var systemStatus SystemStatusResponse
if err := json.NewDecoder(resp.Body).Decode(&systemStatus); err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to decode system status response")
return nil, fmt.Errorf("failed to decode system status response: %w", err)
}
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"free_heap": systemStatus.FreeHeap,
"chip_id": systemStatus.ChipID,
}).Debug("Successfully fetched system status from SPORE node")
return &systemStatus, nil
}
@@ -183,21 +251,43 @@ func (c *SporeClient) GetSystemStatus() (*SystemStatusResponse, error) {
func (c *SporeClient) GetCapabilities() (*CapabilitiesResponse, error) {
url := fmt.Sprintf("%s/api/node/endpoints", c.BaseURL)
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"endpoint": "/api/node/endpoints",
}).Debug("Fetching capabilities from SPORE node")
resp, err := c.HTTPClient.Get(url)
if err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to fetch capabilities from SPORE node")
return nil, fmt.Errorf("failed to get capabilities: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"status_code": resp.StatusCode,
}).Debug("Capabilities request returned non-OK status")
return nil, fmt.Errorf("capabilities request failed with status %d", resp.StatusCode)
}
var capabilities CapabilitiesResponse
if err := json.NewDecoder(resp.Body).Decode(&capabilities); err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to decode capabilities response")
return nil, fmt.Errorf("failed to decode capabilities response: %w", err)
}
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"endpoint_count": len(capabilities.Endpoints),
}).Debug("Successfully fetched capabilities from SPORE node")
return &capabilities, nil
}
@@ -205,16 +295,30 @@ func (c *SporeClient) GetCapabilities() (*CapabilitiesResponse, error) {
func (c *SporeClient) UpdateFirmware(firmwareData []byte, filename string) (*FirmwareUpdateResponse, error) {
url := fmt.Sprintf("%s/api/node/update", c.BaseURL)
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"endpoint": "/api/node/update",
"filename": filename,
"data_size": len(firmwareData),
}).Debug("Preparing firmware upload to SPORE node")
// Create multipart form
var requestBody bytes.Buffer
contentType := createMultipartForm(&requestBody, firmwareData, filename)
if contentType == "" {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
}).Debug("Failed to create multipart form for firmware upload")
return nil, fmt.Errorf("failed to create multipart form")
}
req, err := http.NewRequest("POST", url, &requestBody)
if err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to create firmware update request")
return nil, fmt.Errorf("failed to create firmware update request: %w", err)
}
@@ -226,9 +330,10 @@ func (c *SporeClient) UpdateFirmware(firmwareData []byte, filename string) (*Fir
}
log.WithFields(log.Fields{
"node_ip": c.BaseURL,
"status": "sending_firmware",
}).Debug("Sending firmware to SPORE device")
"node_url": c.BaseURL,
"filename": filename,
"data_size": len(firmwareData),
}).Debug("Uploading firmware to SPORE node")
resp, err := firmwareClient.Do(req)
if err != nil {
@@ -277,9 +382,19 @@ func (c *SporeClient) UpdateFirmware(firmwareData []byte, filename string) (*Fir
func (c *SporeClient) UpdateNodeLabels(labels map[string]string) error {
targetURL := fmt.Sprintf("%s/api/node/config", c.BaseURL)
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"endpoint": "/api/node/config",
"labels": labels,
}).Debug("Updating node labels on SPORE node")
// Convert labels to JSON
labelsJSON, err := json.Marshal(labels)
if err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to marshal labels")
return fmt.Errorf("failed to marshal labels: %w", err)
}
@@ -289,6 +404,10 @@ func (c *SporeClient) UpdateNodeLabels(labels map[string]string) error {
req, err := http.NewRequest("POST", targetURL, strings.NewReader(data.Encode()))
if err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to create labels update request")
return fmt.Errorf("failed to create labels update request: %w", err)
}
@@ -296,19 +415,28 @@ func (c *SporeClient) UpdateNodeLabels(labels map[string]string) error {
resp, err := c.HTTPClient.Do(req)
if err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to update node labels")
return fmt.Errorf("failed to update node labels: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"status_code": resp.StatusCode,
"error_body": string(body),
}).Debug("Node labels update returned non-OK status")
return fmt.Errorf("node labels update failed with status %d: %s", resp.StatusCode, string(body))
}
log.WithFields(log.Fields{
"node_ip": c.BaseURL,
"labels": labels,
}).Info("Node labels updated successfully")
"node_url": c.BaseURL,
"labels": labels,
}).Debug("Successfully updated node labels on SPORE node")
return nil
}
@@ -318,17 +446,43 @@ func (c *SporeClient) ProxyCall(method, uri string, params map[string]interface{
// Build target URL
targetURL := fmt.Sprintf("%s%s", c.BaseURL, uri)
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"method": method,
"endpoint": uri,
"param_count": len(params),
}).Debug("Making proxy call to SPORE node")
// Parse parameters and build request
req, err := c.buildProxyRequest(method, targetURL, params)
if err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"method": method,
"endpoint": uri,
"error": err.Error(),
}).Debug("Failed to build proxy request")
return nil, fmt.Errorf("failed to build proxy request: %w", err)
}
resp, err := c.HTTPClient.Do(req)
if err != nil {
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"method": method,
"endpoint": uri,
"error": err.Error(),
}).Debug("Proxy call failed")
return nil, fmt.Errorf("proxy call failed: %w", err)
}
log.WithFields(log.Fields{
"node_url": c.BaseURL,
"method": method,
"endpoint": uri,
"status_code": resp.StatusCode,
}).Debug("Proxy call completed successfully")
return resp, nil
}

View File

@@ -69,21 +69,42 @@ func (c *RegistryClient) FindFirmwareByNameAndVersion(name, version string) (*Fi
func (c *RegistryClient) GetHealth() (map[string]interface{}, error) {
url := fmt.Sprintf("%s/health", c.BaseURL)
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"endpoint": "/health",
}).Debug("Checking registry health")
resp, err := c.HTTPClient.Get(url)
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to check registry health")
return nil, fmt.Errorf("failed to get registry health: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"status_code": resp.StatusCode,
}).Debug("Registry health check returned non-OK status")
return nil, fmt.Errorf("registry health check failed with status %d", resp.StatusCode)
}
var health map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&health); err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to decode health response")
return nil, fmt.Errorf("failed to decode health response: %w", err)
}
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
}).Debug("Successfully checked registry health")
return health, nil
}
@@ -91,6 +112,13 @@ func (c *RegistryClient) GetHealth() (map[string]interface{}, error) {
func (c *RegistryClient) UploadFirmware(metadata FirmwareMetadata, firmwareFile io.Reader) (map[string]interface{}, error) {
url := fmt.Sprintf("%s/firmware", c.BaseURL)
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"endpoint": "/firmware",
"name": metadata.Name,
"version": metadata.Version,
}).Debug("Uploading firmware to registry")
// Create multipart form data
body := &bytes.Buffer{}
writer := multipart.NewWriter(body)
@@ -98,11 +126,19 @@ func (c *RegistryClient) UploadFirmware(metadata FirmwareMetadata, firmwareFile
// Add metadata
metadataJSON, err := json.Marshal(metadata)
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to marshal firmware metadata")
return nil, fmt.Errorf("failed to marshal metadata: %w", err)
}
metadataPart, err := writer.CreateFormField("metadata")
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to create metadata field")
return nil, fmt.Errorf("failed to create metadata field: %w", err)
}
metadataPart.Write(metadataJSON)
@@ -110,10 +146,18 @@ func (c *RegistryClient) UploadFirmware(metadata FirmwareMetadata, firmwareFile
// Add firmware file
firmwarePart, err := writer.CreateFormFile("firmware", fmt.Sprintf("%s-%s.bin", metadata.Name, metadata.Version))
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to create firmware field")
return nil, fmt.Errorf("failed to create firmware field: %w", err)
}
if _, err := io.Copy(firmwarePart, firmwareFile); err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to copy firmware data")
return nil, fmt.Errorf("failed to copy firmware data: %w", err)
}
@@ -121,6 +165,10 @@ func (c *RegistryClient) UploadFirmware(metadata FirmwareMetadata, firmwareFile
req, err := http.NewRequest("POST", url, body)
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to create upload request")
return nil, fmt.Errorf("failed to create request: %w", err)
}
@@ -128,20 +176,43 @@ func (c *RegistryClient) UploadFirmware(metadata FirmwareMetadata, firmwareFile
resp, err := c.HTTPClient.Do(req)
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": metadata.Name,
"version": metadata.Version,
"error": err.Error(),
}).Debug("Failed to upload firmware to registry")
return nil, fmt.Errorf("failed to upload firmware: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated {
body, _ := io.ReadAll(resp.Body)
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": metadata.Name,
"version": metadata.Version,
"status_code": resp.StatusCode,
"error_body": string(body),
}).Debug("Firmware upload returned non-OK status")
return nil, fmt.Errorf("firmware upload failed with status %d: %s", resp.StatusCode, string(body))
}
var result map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to decode upload response")
return nil, fmt.Errorf("failed to decode upload response: %w", err)
}
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": metadata.Name,
"version": metadata.Version,
}).Debug("Successfully uploaded firmware to registry")
return result, nil
}
@@ -149,13 +220,32 @@ func (c *RegistryClient) UploadFirmware(metadata FirmwareMetadata, firmwareFile
func (c *RegistryClient) UpdateFirmwareMetadata(name, version string, metadata FirmwareMetadata) (map[string]interface{}, error) {
url := fmt.Sprintf("%s/firmware/%s/%s", c.BaseURL, name, version)
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"endpoint": fmt.Sprintf("/firmware/%s/%s", name, version),
"name": name,
"version": version,
}).Debug("Updating firmware metadata in registry")
metadataJSON, err := json.Marshal(metadata)
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
"error": err.Error(),
}).Debug("Failed to marshal metadata")
return nil, fmt.Errorf("failed to marshal metadata: %w", err)
}
req, err := http.NewRequest("PUT", url, bytes.NewBuffer(metadataJSON))
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
"error": err.Error(),
}).Debug("Failed to create update request")
return nil, fmt.Errorf("failed to create request: %w", err)
}
@@ -163,20 +253,45 @@ func (c *RegistryClient) UpdateFirmwareMetadata(name, version string, metadata F
resp, err := c.HTTPClient.Do(req)
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
"error": err.Error(),
}).Debug("Failed to update firmware metadata in registry")
return nil, fmt.Errorf("failed to update firmware metadata: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
"status_code": resp.StatusCode,
"error_body": string(body),
}).Debug("Firmware metadata update returned non-OK status")
return nil, fmt.Errorf("firmware metadata update failed with status %d: %s", resp.StatusCode, string(body))
}
var result map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
"error": err.Error(),
}).Debug("Failed to decode update response")
return nil, fmt.Errorf("failed to decode update response: %w", err)
}
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
}).Debug("Successfully updated firmware metadata in registry")
return result, nil
}
@@ -221,21 +336,43 @@ func (c *RegistryClient) firmwareMatchesLabels(firmwareLabels, rolloutLabels map
func (c *RegistryClient) ListFirmware() ([]GroupedFirmware, error) {
url := fmt.Sprintf("%s/firmware", c.BaseURL)
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"endpoint": "/firmware",
}).Debug("Fetching firmware list from registry")
resp, err := c.HTTPClient.Get(url)
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to fetch firmware list from registry")
return nil, fmt.Errorf("failed to get firmware list: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"status_code": resp.StatusCode,
}).Debug("Firmware list request returned non-OK status")
return nil, fmt.Errorf("firmware list request failed with status %d", resp.StatusCode)
}
var firmwareList []GroupedFirmware
if err := json.NewDecoder(resp.Body).Decode(&firmwareList); err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"error": err.Error(),
}).Debug("Failed to decode firmware list response")
return nil, fmt.Errorf("failed to decode firmware list response: %w", err)
}
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"firmware_count": len(firmwareList),
}).Debug("Successfully fetched firmware list from registry")
return firmwareList, nil
}
@@ -243,26 +380,52 @@ func (c *RegistryClient) ListFirmware() ([]GroupedFirmware, error) {
func (c *RegistryClient) DownloadFirmware(name, version string) ([]byte, error) {
url := fmt.Sprintf("%s/firmware/%s/%s", c.BaseURL, name, version)
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"endpoint": fmt.Sprintf("/firmware/%s/%s", name, version),
"name": name,
"version": version,
}).Debug("Downloading firmware from registry")
resp, err := c.HTTPClient.Get(url)
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
"error": err.Error(),
}).Debug("Failed to download firmware from registry")
return nil, fmt.Errorf("failed to download firmware: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
"status_code": resp.StatusCode,
}).Debug("Firmware download request returned non-OK status")
return nil, fmt.Errorf("firmware download request failed with status %d", resp.StatusCode)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
"error": err.Error(),
}).Debug("Failed to read firmware data from registry")
return nil, fmt.Errorf("failed to read firmware data: %w", err)
}
log.WithFields(log.Fields{
"name": name,
"version": version,
"size": len(data),
}).Info("Downloaded firmware from registry")
"registry_url": c.BaseURL,
"name": name,
"version": version,
"size": len(data),
}).Debug("Successfully downloaded firmware from registry")
return data, nil
}
@@ -271,31 +434,64 @@ func (c *RegistryClient) DownloadFirmware(name, version string) ([]byte, error)
func (c *RegistryClient) DeleteFirmware(name, version string) (map[string]interface{}, error) {
url := fmt.Sprintf("%s/firmware/%s/%s", c.BaseURL, name, version)
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"endpoint": fmt.Sprintf("/firmware/%s/%s", name, version),
"name": name,
"version": version,
}).Debug("Deleting firmware from registry")
req, err := http.NewRequest(http.MethodDelete, url, nil)
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
"error": err.Error(),
}).Debug("Failed to create delete request")
return nil, fmt.Errorf("failed to create delete request: %w", err)
}
resp, err := c.HTTPClient.Do(req)
if err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
"error": err.Error(),
}).Debug("Failed to delete firmware from registry")
return nil, fmt.Errorf("failed to delete firmware: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
"status_code": resp.StatusCode,
"error_body": string(body),
}).Debug("Firmware delete returned non-OK status")
return nil, fmt.Errorf("firmware delete request failed with status %d: %s", resp.StatusCode, string(body))
}
var result map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
log.WithFields(log.Fields{
"registry_url": c.BaseURL,
"name": name,
"version": version,
"error": err.Error(),
}).Debug("Failed to decode delete response")
return nil, fmt.Errorf("failed to decode delete response: %w", err)
}
log.WithFields(log.Fields{
"name": name,
"version": version,
}).Info("Deleted firmware from registry")
"registry_url": c.BaseURL,
"name": name,
"version": version,
}).Debug("Successfully deleted firmware from registry")
return result, nil
}