feat(epic1): complete OpenTelemetry integration and add verification documentation

Story 1.6: OpenTelemetry Distributed Tracing
- Implemented tracer initialization with stdout (dev) and OTLP (prod) exporters
- Added HTTP request instrumentation via Gin middleware
- Integrated trace ID correlation in structured logs
- Added tracing configuration to config files
- Registered tracer provider in DI container

Documentation and Setup:
- Created Docker Compose setup for PostgreSQL database
- Added comprehensive Epic 1 summary with verification instructions
- Added Epic 0 summary with verification instructions
- Linked summaries in documentation index and epic READMEs
- Included detailed database testing instructions
- Added Docker Compose commands and troubleshooting guide

All Epic 1 stories (1.1-1.6) are now complete. Story 1.7 depends on Epic 2.
This commit is contained in:
2025-11-05 18:20:15 +01:00
parent 30320304f6
commit fde01bfc73
13 changed files with 873 additions and 54 deletions

View File

@@ -13,10 +13,12 @@ import (
"git.dcentral.systems/toolz/goplt/internal/infra/database"
loggerimpl "git.dcentral.systems/toolz/goplt/internal/logger"
"git.dcentral.systems/toolz/goplt/internal/metrics"
"git.dcentral.systems/toolz/goplt/internal/observability"
"git.dcentral.systems/toolz/goplt/internal/server"
"git.dcentral.systems/toolz/goplt/pkg/config"
"git.dcentral.systems/toolz/goplt/pkg/errorbus"
"git.dcentral.systems/toolz/goplt/pkg/logger"
"go.opentelemetry.io/otel/trace"
"go.uber.org/fx"
)
@@ -156,6 +158,54 @@ func ProvideMetrics() fx.Option {
})
}
// ProvideTracer creates an FX option that provides the OpenTelemetry tracer.
func ProvideTracer() fx.Option {
return fx.Provide(func(cfg config.ConfigProvider, lc fx.Lifecycle) (trace.TracerProvider, error) {
enabled := cfg.GetBool("tracing.enabled")
if !enabled {
// Return no-op tracer
return trace.NewNoopTracerProvider(), nil
}
serviceName := cfg.GetString("tracing.service_name")
if serviceName == "" {
serviceName = "platform"
}
serviceVersion := cfg.GetString("tracing.service_version")
if serviceVersion == "" {
serviceVersion = "1.0.0"
}
env := cfg.GetString("environment")
if env == "" {
env = "development"
}
otlpEndpoint := cfg.GetString("tracing.otlp_endpoint")
tp, err := observability.InitTracer(context.Background(), observability.Config{
Enabled: enabled,
ServiceName: serviceName,
ServiceVersion: serviceVersion,
Environment: env,
OTLPEndpoint: otlpEndpoint,
})
if err != nil {
return nil, fmt.Errorf("failed to initialize tracer: %w", err)
}
// Register lifecycle hook to shutdown tracer
lc.Append(fx.Hook{
OnStop: func(ctx context.Context) error {
return observability.ShutdownTracer(ctx, tp)
},
})
return tp, nil
})
}
// ProvideHTTPServer creates an FX option that provides the HTTP server.
func ProvideHTTPServer() fx.Option {
return fx.Provide(func(
@@ -164,9 +214,10 @@ func ProvideHTTPServer() fx.Option {
healthRegistry *health.Registry,
metricsRegistry *metrics.Metrics,
errorBus errorbus.ErrorPublisher,
tracer trace.TracerProvider,
lc fx.Lifecycle,
) (*server.Server, error) {
srv, err := server.NewServer(cfg, log, healthRegistry, metricsRegistry, errorBus)
srv, err := server.NewServer(cfg, log, healthRegistry, metricsRegistry, errorBus, tracer)
if err != nil {
return nil, fmt.Errorf("failed to create HTTP server: %w", err)
}
@@ -194,7 +245,7 @@ func ProvideHTTPServer() fx.Option {
}
// CoreModule returns an FX option that provides all core services.
// This includes configuration, logging, database, error bus, health checks, metrics, and HTTP server.
// This includes configuration, logging, database, error bus, health checks, metrics, tracing, and HTTP server.
func CoreModule() fx.Option {
return fx.Options(
ProvideConfig(),
@@ -203,6 +254,7 @@ func CoreModule() fx.Option {
ProvideErrorBus(),
ProvideHealthRegistry(),
ProvideMetrics(),
ProvideTracer(),
ProvideHTTPServer(),
)
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"git.dcentral.systems/toolz/goplt/pkg/logger"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
)
@@ -87,6 +88,19 @@ func (zl *zapLogger) WithContext(ctx context.Context) logger.Logger {
fields = append(fields, zap.String("user_id", userID))
}
// Extract trace ID from OpenTelemetry context
span := trace.SpanFromContext(ctx)
if span.SpanContext().IsValid() {
traceID := span.SpanContext().TraceID().String()
spanID := span.SpanContext().SpanID().String()
if traceID != "" {
fields = append(fields, zap.String("trace_id", traceID))
}
if spanID != "" {
fields = append(fields, zap.String("span_id", spanID))
}
}
if len(fields) == 0 {
return zl
}

View File

@@ -0,0 +1,94 @@
package observability
import (
"context"
"fmt"
"os"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
"go.opentelemetry.io/otel/exporters/stdout/stdouttrace"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
"go.opentelemetry.io/otel/trace"
)
// Config holds OpenTelemetry configuration.
type Config struct {
Enabled bool
ServiceName string
ServiceVersion string
Environment string
OTLPEndpoint string
}
// InitTracer initializes OpenTelemetry tracing.
func InitTracer(ctx context.Context, cfg Config) (trace.TracerProvider, error) {
if !cfg.Enabled {
// Return a no-op tracer provider
return trace.NewNoopTracerProvider(), nil
}
// Create resource with service information
res, err := resource.New(ctx,
resource.WithAttributes(
semconv.ServiceNameKey.String(cfg.ServiceName),
semconv.ServiceVersionKey.String(cfg.ServiceVersion),
semconv.DeploymentEnvironmentKey.String(cfg.Environment),
),
)
if err != nil {
return nil, fmt.Errorf("failed to create resource: %w", err)
}
var exporter sdktrace.SpanExporter
if cfg.Environment == "production" && cfg.OTLPEndpoint != "" {
// Production: export to OTLP collector
exporter, err = otlptracehttp.New(ctx,
otlptracehttp.WithEndpoint(cfg.OTLPEndpoint),
otlptracehttp.WithInsecure(), // Use WithTLSClientConfig for secure connections
)
if err != nil {
return nil, fmt.Errorf("failed to create OTLP exporter: %w", err)
}
} else {
// Development: export to stdout
exporter, err = stdouttrace.New(
stdouttrace.WithPrettyPrint(),
stdouttrace.WithWriter(os.Stdout),
)
if err != nil {
return nil, fmt.Errorf("failed to create stdout exporter: %w", err)
}
}
// Create tracer provider
tp := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(exporter),
sdktrace.WithResource(res),
sdktrace.WithSampler(sdktrace.AlwaysSample()), // Sample all traces in dev, can be adjusted for prod
)
// Set global tracer provider
otel.SetTracerProvider(tp)
// Set global propagator for trace context
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
))
return tp, nil
}
// ShutdownTracer gracefully shuts down the tracer provider.
func ShutdownTracer(ctx context.Context, tp trace.TracerProvider) error {
if ttp, ok := tp.(*sdktrace.TracerProvider); ok {
return ttp.Shutdown(ctx)
}
return nil
}

View File

@@ -7,6 +7,8 @@ import (
"time"
"github.com/gin-gonic/gin"
"go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin"
"go.opentelemetry.io/otel/trace"
"git.dcentral.systems/toolz/goplt/internal/health"
"git.dcentral.systems/toolz/goplt/internal/metrics"
"git.dcentral.systems/toolz/goplt/pkg/config"
@@ -27,6 +29,7 @@ func NewServer(
healthRegistry *health.Registry,
metricsRegistry *metrics.Metrics,
errorBus errorbus.ErrorPublisher,
tracer trace.TracerProvider,
) (*Server, error) {
// Set Gin mode
env := cfg.GetString("environment")
@@ -37,6 +40,10 @@ func NewServer(
router := gin.New()
// Add middleware (order matters!)
// OpenTelemetry tracing should be first to capture all requests
if tracer != nil {
router.Use(otelgin.Middleware("platform", otelgin.WithTracerProvider(tracer)))
}
router.Use(RequestIDMiddleware())
router.Use(LoggingMiddleware(log))
router.Use(PanicRecoveryMiddleware(errorBus))