- Docker: Multi-stage Dockerfile with security hardening, docker-compose for production and development environments - Environment: Comprehensive .env.example with all config options, lib/config/env.ts for typed environment validation - Logging: Structured JSON logging with request/response middleware - Monitoring: Prometheus metrics endpoint, Grafana dashboard, health checks (liveness/readiness probes) - Security: Security headers, rate limiting, CORS middleware - CI/CD: GitHub Actions workflows for CI, production deploy, and preview deployments - Error tracking: Sentry integration foundation Files created: - Docker: Dockerfile, docker-compose.yml, docker-compose.dev.yml, .dockerignore - Config: lib/config/env.ts, lib/config/index.ts - Logging: lib/logging/logger.ts, lib/logging/middleware.ts - Monitoring: lib/monitoring/sentry.ts, lib/monitoring/metrics.ts, lib/monitoring/health.ts - Security: lib/security/headers.ts, lib/security/rateLimit.ts, lib/security/cors.ts - API: pages/api/health/*, pages/api/metrics.ts - Infra: infra/prometheus/prometheus.yml, infra/grafana/*
182 lines
4.4 KiB
TypeScript
182 lines
4.4 KiB
TypeScript
/**
|
|
* Health Check Utilities
|
|
* Agent 4: Production Deployment
|
|
*
|
|
* Provides health check functionality for the application.
|
|
*/
|
|
|
|
import { env } from '../config';
|
|
|
|
interface HealthCheckResult {
|
|
status: 'healthy' | 'unhealthy' | 'degraded';
|
|
message?: string;
|
|
latencyMs?: number;
|
|
}
|
|
|
|
interface ComponentHealth {
|
|
name: string;
|
|
status: 'healthy' | 'unhealthy' | 'degraded';
|
|
message?: string;
|
|
latencyMs?: number;
|
|
}
|
|
|
|
interface HealthStatus {
|
|
status: 'healthy' | 'unhealthy' | 'degraded';
|
|
version: string;
|
|
timestamp: string;
|
|
uptime: number;
|
|
environment: string;
|
|
checks: ComponentHealth[];
|
|
}
|
|
|
|
type HealthChecker = () => Promise<HealthCheckResult>;
|
|
|
|
/**
|
|
* Health check registry
|
|
*/
|
|
class HealthCheckRegistry {
|
|
private checks: Map<string, HealthChecker> = new Map();
|
|
private startTime: number = Date.now();
|
|
|
|
/**
|
|
* Register a health check
|
|
*/
|
|
register(name: string, checker: HealthChecker): void {
|
|
this.checks.set(name, checker);
|
|
}
|
|
|
|
/**
|
|
* Unregister a health check
|
|
*/
|
|
unregister(name: string): void {
|
|
this.checks.delete(name);
|
|
}
|
|
|
|
/**
|
|
* Run all health checks
|
|
*/
|
|
async runAll(): Promise<HealthStatus> {
|
|
const results: ComponentHealth[] = [];
|
|
let overallStatus: 'healthy' | 'unhealthy' | 'degraded' = 'healthy';
|
|
|
|
for (const [name, checker] of this.checks) {
|
|
const start = Date.now();
|
|
try {
|
|
const result = await Promise.race([
|
|
checker(),
|
|
new Promise<HealthCheckResult>((_, reject) =>
|
|
setTimeout(() => reject(new Error('Timeout')), 5000)
|
|
),
|
|
]);
|
|
|
|
results.push({
|
|
name,
|
|
status: result.status,
|
|
message: result.message,
|
|
latencyMs: result.latencyMs ?? (Date.now() - start),
|
|
});
|
|
|
|
if (result.status === 'unhealthy') {
|
|
overallStatus = 'unhealthy';
|
|
} else if (result.status === 'degraded' && overallStatus !== 'unhealthy') {
|
|
overallStatus = 'degraded';
|
|
}
|
|
} catch (error) {
|
|
results.push({
|
|
name,
|
|
status: 'unhealthy',
|
|
message: error instanceof Error ? error.message : 'Unknown error',
|
|
latencyMs: Date.now() - start,
|
|
});
|
|
overallStatus = 'unhealthy';
|
|
}
|
|
}
|
|
|
|
return {
|
|
status: overallStatus,
|
|
version: process.env.npm_package_version || '1.0.0',
|
|
timestamp: new Date().toISOString(),
|
|
uptime: Math.floor((Date.now() - this.startTime) / 1000),
|
|
environment: env.nodeEnv,
|
|
checks: results,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Run liveness check (is the process alive?)
|
|
*/
|
|
async checkLiveness(): Promise<{ status: 'ok' | 'error' }> {
|
|
return { status: 'ok' };
|
|
}
|
|
|
|
/**
|
|
* Run readiness check (is the application ready to serve traffic?)
|
|
*/
|
|
async checkReadiness(): Promise<HealthStatus> {
|
|
return this.runAll();
|
|
}
|
|
|
|
/**
|
|
* Get uptime in seconds
|
|
*/
|
|
getUptime(): number {
|
|
return Math.floor((Date.now() - this.startTime) / 1000);
|
|
}
|
|
}
|
|
|
|
// Create singleton instance
|
|
export const healthChecks = new HealthCheckRegistry();
|
|
|
|
// Register default checks
|
|
healthChecks.register('memory', async () => {
|
|
const used = process.memoryUsage();
|
|
const heapUsedMB = Math.round(used.heapUsed / 1024 / 1024);
|
|
const heapTotalMB = Math.round(used.heapTotal / 1024 / 1024);
|
|
const heapUsagePercent = (used.heapUsed / used.heapTotal) * 100;
|
|
|
|
if (heapUsagePercent > 90) {
|
|
return {
|
|
status: 'unhealthy',
|
|
message: `High memory usage: ${heapUsedMB}MB / ${heapTotalMB}MB (${heapUsagePercent.toFixed(1)}%)`,
|
|
};
|
|
} else if (heapUsagePercent > 75) {
|
|
return {
|
|
status: 'degraded',
|
|
message: `Elevated memory usage: ${heapUsedMB}MB / ${heapTotalMB}MB (${heapUsagePercent.toFixed(1)}%)`,
|
|
};
|
|
}
|
|
|
|
return {
|
|
status: 'healthy',
|
|
message: `Memory usage: ${heapUsedMB}MB / ${heapTotalMB}MB (${heapUsagePercent.toFixed(1)}%)`,
|
|
};
|
|
});
|
|
|
|
healthChecks.register('eventloop', async () => {
|
|
const start = Date.now();
|
|
await new Promise((resolve) => setImmediate(resolve));
|
|
const lag = Date.now() - start;
|
|
|
|
if (lag > 100) {
|
|
return {
|
|
status: 'unhealthy',
|
|
message: `Event loop lag: ${lag}ms`,
|
|
latencyMs: lag,
|
|
};
|
|
} else if (lag > 50) {
|
|
return {
|
|
status: 'degraded',
|
|
message: `Event loop lag: ${lag}ms`,
|
|
latencyMs: lag,
|
|
};
|
|
}
|
|
|
|
return {
|
|
status: 'healthy',
|
|
message: `Event loop lag: ${lag}ms`,
|
|
latencyMs: lag,
|
|
};
|
|
});
|
|
|
|
// Export types
|
|
export type { HealthCheckResult, ComponentHealth, HealthStatus, HealthChecker };
|