feat(epic1): complete OpenTelemetry integration and add verification documentation
Story 1.6: OpenTelemetry Distributed Tracing - Implemented tracer initialization with stdout (dev) and OTLP (prod) exporters - Added HTTP request instrumentation via Gin middleware - Integrated trace ID correlation in structured logs - Added tracing configuration to config files - Registered tracer provider in DI container Documentation and Setup: - Created Docker Compose setup for PostgreSQL database - Added comprehensive Epic 1 summary with verification instructions - Added Epic 0 summary with verification instructions - Linked summaries in documentation index and epic READMEs - Included detailed database testing instructions - Added Docker Compose commands and troubleshooting guide All Epic 1 stories (1.1-1.6) are now complete. Story 1.7 depends on Epic 2.
This commit is contained in:
@@ -13,10 +13,12 @@ import (
|
||||
"git.dcentral.systems/toolz/goplt/internal/infra/database"
|
||||
loggerimpl "git.dcentral.systems/toolz/goplt/internal/logger"
|
||||
"git.dcentral.systems/toolz/goplt/internal/metrics"
|
||||
"git.dcentral.systems/toolz/goplt/internal/observability"
|
||||
"git.dcentral.systems/toolz/goplt/internal/server"
|
||||
"git.dcentral.systems/toolz/goplt/pkg/config"
|
||||
"git.dcentral.systems/toolz/goplt/pkg/errorbus"
|
||||
"git.dcentral.systems/toolz/goplt/pkg/logger"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"go.uber.org/fx"
|
||||
)
|
||||
|
||||
@@ -156,6 +158,54 @@ func ProvideMetrics() fx.Option {
|
||||
})
|
||||
}
|
||||
|
||||
// ProvideTracer creates an FX option that provides the OpenTelemetry tracer.
|
||||
func ProvideTracer() fx.Option {
|
||||
return fx.Provide(func(cfg config.ConfigProvider, lc fx.Lifecycle) (trace.TracerProvider, error) {
|
||||
enabled := cfg.GetBool("tracing.enabled")
|
||||
if !enabled {
|
||||
// Return no-op tracer
|
||||
return trace.NewNoopTracerProvider(), nil
|
||||
}
|
||||
|
||||
serviceName := cfg.GetString("tracing.service_name")
|
||||
if serviceName == "" {
|
||||
serviceName = "platform"
|
||||
}
|
||||
|
||||
serviceVersion := cfg.GetString("tracing.service_version")
|
||||
if serviceVersion == "" {
|
||||
serviceVersion = "1.0.0"
|
||||
}
|
||||
|
||||
env := cfg.GetString("environment")
|
||||
if env == "" {
|
||||
env = "development"
|
||||
}
|
||||
|
||||
otlpEndpoint := cfg.GetString("tracing.otlp_endpoint")
|
||||
|
||||
tp, err := observability.InitTracer(context.Background(), observability.Config{
|
||||
Enabled: enabled,
|
||||
ServiceName: serviceName,
|
||||
ServiceVersion: serviceVersion,
|
||||
Environment: env,
|
||||
OTLPEndpoint: otlpEndpoint,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize tracer: %w", err)
|
||||
}
|
||||
|
||||
// Register lifecycle hook to shutdown tracer
|
||||
lc.Append(fx.Hook{
|
||||
OnStop: func(ctx context.Context) error {
|
||||
return observability.ShutdownTracer(ctx, tp)
|
||||
},
|
||||
})
|
||||
|
||||
return tp, nil
|
||||
})
|
||||
}
|
||||
|
||||
// ProvideHTTPServer creates an FX option that provides the HTTP server.
|
||||
func ProvideHTTPServer() fx.Option {
|
||||
return fx.Provide(func(
|
||||
@@ -164,9 +214,10 @@ func ProvideHTTPServer() fx.Option {
|
||||
healthRegistry *health.Registry,
|
||||
metricsRegistry *metrics.Metrics,
|
||||
errorBus errorbus.ErrorPublisher,
|
||||
tracer trace.TracerProvider,
|
||||
lc fx.Lifecycle,
|
||||
) (*server.Server, error) {
|
||||
srv, err := server.NewServer(cfg, log, healthRegistry, metricsRegistry, errorBus)
|
||||
srv, err := server.NewServer(cfg, log, healthRegistry, metricsRegistry, errorBus, tracer)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create HTTP server: %w", err)
|
||||
}
|
||||
@@ -194,7 +245,7 @@ func ProvideHTTPServer() fx.Option {
|
||||
}
|
||||
|
||||
// CoreModule returns an FX option that provides all core services.
|
||||
// This includes configuration, logging, database, error bus, health checks, metrics, and HTTP server.
|
||||
// This includes configuration, logging, database, error bus, health checks, metrics, tracing, and HTTP server.
|
||||
func CoreModule() fx.Option {
|
||||
return fx.Options(
|
||||
ProvideConfig(),
|
||||
@@ -203,6 +254,7 @@ func CoreModule() fx.Option {
|
||||
ProvideErrorBus(),
|
||||
ProvideHealthRegistry(),
|
||||
ProvideMetrics(),
|
||||
ProvideTracer(),
|
||||
ProvideHTTPServer(),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
|
||||
"git.dcentral.systems/toolz/goplt/pkg/logger"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"go.uber.org/zap"
|
||||
"go.uber.org/zap/zapcore"
|
||||
)
|
||||
@@ -87,6 +88,19 @@ func (zl *zapLogger) WithContext(ctx context.Context) logger.Logger {
|
||||
fields = append(fields, zap.String("user_id", userID))
|
||||
}
|
||||
|
||||
// Extract trace ID from OpenTelemetry context
|
||||
span := trace.SpanFromContext(ctx)
|
||||
if span.SpanContext().IsValid() {
|
||||
traceID := span.SpanContext().TraceID().String()
|
||||
spanID := span.SpanContext().SpanID().String()
|
||||
if traceID != "" {
|
||||
fields = append(fields, zap.String("trace_id", traceID))
|
||||
}
|
||||
if spanID != "" {
|
||||
fields = append(fields, zap.String("span_id", spanID))
|
||||
}
|
||||
}
|
||||
|
||||
if len(fields) == 0 {
|
||||
return zl
|
||||
}
|
||||
|
||||
94
internal/observability/tracer.go
Normal file
94
internal/observability/tracer.go
Normal file
@@ -0,0 +1,94 @@
|
||||
package observability
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
|
||||
"go.opentelemetry.io/otel/exporters/stdout/stdouttrace"
|
||||
"go.opentelemetry.io/otel/propagation"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
// Config holds OpenTelemetry configuration.
|
||||
type Config struct {
|
||||
Enabled bool
|
||||
ServiceName string
|
||||
ServiceVersion string
|
||||
Environment string
|
||||
OTLPEndpoint string
|
||||
}
|
||||
|
||||
// InitTracer initializes OpenTelemetry tracing.
|
||||
func InitTracer(ctx context.Context, cfg Config) (trace.TracerProvider, error) {
|
||||
if !cfg.Enabled {
|
||||
// Return a no-op tracer provider
|
||||
return trace.NewNoopTracerProvider(), nil
|
||||
}
|
||||
|
||||
// Create resource with service information
|
||||
res, err := resource.New(ctx,
|
||||
resource.WithAttributes(
|
||||
semconv.ServiceNameKey.String(cfg.ServiceName),
|
||||
semconv.ServiceVersionKey.String(cfg.ServiceVersion),
|
||||
semconv.DeploymentEnvironmentKey.String(cfg.Environment),
|
||||
),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create resource: %w", err)
|
||||
}
|
||||
|
||||
var exporter sdktrace.SpanExporter
|
||||
|
||||
if cfg.Environment == "production" && cfg.OTLPEndpoint != "" {
|
||||
// Production: export to OTLP collector
|
||||
exporter, err = otlptracehttp.New(ctx,
|
||||
otlptracehttp.WithEndpoint(cfg.OTLPEndpoint),
|
||||
otlptracehttp.WithInsecure(), // Use WithTLSClientConfig for secure connections
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create OTLP exporter: %w", err)
|
||||
}
|
||||
} else {
|
||||
// Development: export to stdout
|
||||
exporter, err = stdouttrace.New(
|
||||
stdouttrace.WithPrettyPrint(),
|
||||
stdouttrace.WithWriter(os.Stdout),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create stdout exporter: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create tracer provider
|
||||
tp := sdktrace.NewTracerProvider(
|
||||
sdktrace.WithBatcher(exporter),
|
||||
sdktrace.WithResource(res),
|
||||
sdktrace.WithSampler(sdktrace.AlwaysSample()), // Sample all traces in dev, can be adjusted for prod
|
||||
)
|
||||
|
||||
// Set global tracer provider
|
||||
otel.SetTracerProvider(tp)
|
||||
|
||||
// Set global propagator for trace context
|
||||
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
|
||||
propagation.TraceContext{},
|
||||
propagation.Baggage{},
|
||||
))
|
||||
|
||||
return tp, nil
|
||||
}
|
||||
|
||||
// ShutdownTracer gracefully shuts down the tracer provider.
|
||||
func ShutdownTracer(ctx context.Context, tp trace.TracerProvider) error {
|
||||
if ttp, ok := tp.(*sdktrace.TracerProvider); ok {
|
||||
return ttp.Shutdown(ctx)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"git.dcentral.systems/toolz/goplt/internal/health"
|
||||
"git.dcentral.systems/toolz/goplt/internal/metrics"
|
||||
"git.dcentral.systems/toolz/goplt/pkg/config"
|
||||
@@ -27,6 +29,7 @@ func NewServer(
|
||||
healthRegistry *health.Registry,
|
||||
metricsRegistry *metrics.Metrics,
|
||||
errorBus errorbus.ErrorPublisher,
|
||||
tracer trace.TracerProvider,
|
||||
) (*Server, error) {
|
||||
// Set Gin mode
|
||||
env := cfg.GetString("environment")
|
||||
@@ -37,6 +40,10 @@ func NewServer(
|
||||
router := gin.New()
|
||||
|
||||
// Add middleware (order matters!)
|
||||
// OpenTelemetry tracing should be first to capture all requests
|
||||
if tracer != nil {
|
||||
router.Use(otelgin.Middleware("platform", otelgin.WithTracerProvider(tracer)))
|
||||
}
|
||||
router.Use(RequestIDMiddleware())
|
||||
router.Use(LoggingMiddleware(log))
|
||||
router.Use(PanicRecoveryMiddleware(errorBus))
|
||||
|
||||
Reference in New Issue
Block a user