Skip to content

Commit 30a72d3

Browse files
committed
fix(observability): remove OTEL from links, normalize logging in links+uptime
Links was still sending full OTEL traces (child spans flooding the dataset with empty rows). Replaced with evlog-only, matching basket/api pattern. Added normalizeWideEventForAxiom, parseDurationMs, and enrichers to both links and uptime drains. Fixed error string collision across all process handlers.
1 parent 260b02b commit 30a72d3

9 files changed

Lines changed: 165 additions & 319 deletions

File tree

apps/links/package.json

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,7 @@
1212
"@databuddy/db": "workspace:*",
1313
"@databuddy/redis": "workspace:*",
1414
"@databuddy/shared": "workspace:*",
15-
"@elysiajs/opentelemetry": "^1.4.10",
1615
"@maxmind/geoip2-node": "^6.3.4",
17-
"@opentelemetry/api": "^1.9.0",
18-
"@opentelemetry/exporter-trace-otlp-proto": "^0.210.0",
19-
"@opentelemetry/resources": "^2.0.0",
20-
"@opentelemetry/sdk-node": "^0.210.0",
21-
"@opentelemetry/sdk-trace-node": "^2.4.0",
22-
"@opentelemetry/semantic-conventions": "^1.34.0",
2316
"elysia": "^1.4.22",
2417
"evlog": "^2.8.0",
2518
"kafkajs": "^2.2.4",

apps/links/src/index.ts

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,15 @@
1-
import { opentelemetry } from "@elysiajs/opentelemetry";
2-
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-proto";
3-
import { BatchSpanProcessor } from "@opentelemetry/sdk-trace-node";
41
import { Elysia, redirect } from "elysia";
52
import { initLogger, log } from "evlog";
63
import { evlog } from "evlog/elysia";
7-
import { flushBatchedLinksDrain, linksLoggerDrain } from "./lib/evlog-links";
4+
import {
5+
enrichLinksWideEvent,
6+
flushBatchedLinksDrain,
7+
linksLoggerDrain,
8+
} from "./lib/evlog-links";
89
import { disconnectProducer } from "./lib/producer";
9-
import { shutdownTracing } from "./lib/tracing";
1010
import { expiredRoute } from "./routes/expired";
1111
import { redirectRoute } from "./routes/redirect";
1212

13-
const exporter = new OTLPTraceExporter({
14-
url: "https://api.axiom.co/v1/traces",
15-
headers: {
16-
Authorization: `Bearer ${process.env.AXIOM_TOKEN}`,
17-
"X-Axiom-Dataset": process.env.AXIOM_DATASET ?? "links",
18-
},
19-
});
20-
21-
const batchSpanProcessor = new BatchSpanProcessor(exporter, {
22-
scheduledDelayMillis: 1000,
23-
exportTimeoutMillis: 30_000,
24-
maxExportBatchSize: 512,
25-
maxQueueSize: 2048,
26-
});
27-
2813
initLogger({
2914
env: { service: "links" },
3015
drain: linksLoggerDrain,
@@ -34,13 +19,7 @@ initLogger({
3419
});
3520

3621
const app = new Elysia()
37-
.use(evlog())
38-
.use(
39-
opentelemetry({
40-
spanProcessor: batchSpanProcessor,
41-
serviceName: "links",
42-
})
43-
)
22+
.use(evlog({ enrich: enrichLinksWideEvent }))
4423
.get("/", function rootRedirect() {
4524
return redirect("https://databuddy.cc", 302);
4625
})
@@ -88,10 +67,10 @@ async function gracefulShutdown(signal: string) {
8867
await flushBatchedLinksDrain().catch((error) =>
8968
log.error({
9069
lifecycle: "drainFlush",
91-
error: error instanceof Error ? error.message : String(error),
70+
error_message: error instanceof Error ? error.message : String(error),
9271
})
9372
);
94-
await Promise.all([disconnectProducer(), shutdownTracing()]);
73+
await disconnectProducer();
9574
process.exit(0);
9675
}
9776

apps/links/src/lib/evlog-links.ts

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
import { dirname, join } from "node:path";
22
import { fileURLToPath } from "node:url";
3-
import type { DrainContext } from "evlog";
3+
import type { DrainContext, EnrichContext } from "evlog";
44
import { createAxiomDrain } from "evlog/axiom";
5+
import {
6+
createRequestSizeEnricher,
7+
createTraceContextEnricher,
8+
createUserAgentEnricher,
9+
} from "evlog/enrichers";
510
import { createFsDrain } from "evlog/fs";
611
import { createDrainPipeline } from "evlog/pipeline";
712

@@ -29,17 +34,69 @@ const devFsDrain = useLocalEvlogFiles
2934
? createFsDrain({ dir: devFsLogsDir, pretty: false })
3035
: null;
3136

32-
/**
33-
* In development, writes NDJSON wide events to `apps/links/.evlog/logs/`
34-
* and still sends to Axiom via the batched pipeline. Production: Axiom only.
35-
*/
37+
const DURATION_MS_REGEX = /^([\d.]+)(ms|s)$/;
38+
39+
function normalizeWideEventForAxiom(event: Record<string, unknown>): void {
40+
if (typeof event.error === "string") {
41+
event.error_message = event.error;
42+
event.error = undefined;
43+
}
44+
45+
if (event.level !== "error") {
46+
return;
47+
}
48+
49+
const err = event.error;
50+
if (!err || typeof err !== "object" || Array.isArray(err)) {
51+
return;
52+
}
53+
54+
const status = (err as { status?: number }).status;
55+
if (typeof status === "number" && status >= 400 && status < 500) {
56+
event.level = "warn";
57+
event.client_http_error = true;
58+
}
59+
}
60+
61+
function parseDurationMs(duration: unknown): number | undefined {
62+
if (typeof duration !== "string") {
63+
return undefined;
64+
}
65+
const match = duration.match(DURATION_MS_REGEX);
66+
if (!match?.[1]) {
67+
return undefined;
68+
}
69+
return match[2] === "s"
70+
? Math.round(Number.parseFloat(match[1]) * 1000)
71+
: Math.round(Number.parseFloat(match[1]));
72+
}
73+
3674
export async function linksLoggerDrain(ctx: DrainContext): Promise<void> {
75+
normalizeWideEventForAxiom(ctx.event as Record<string, unknown>);
76+
77+
const durationMs = parseDurationMs(ctx.event.duration);
78+
if (durationMs !== undefined) {
79+
ctx.event.duration_ms = durationMs;
80+
}
81+
3782
if (devFsDrain) {
3883
await devFsDrain(ctx);
3984
}
4085
batchedAxiomDrain(ctx);
4186
}
4287

88+
const enrichers = [
89+
createUserAgentEnricher(),
90+
createRequestSizeEnricher(),
91+
createTraceContextEnricher(),
92+
] as const;
93+
94+
export function enrichLinksWideEvent(ctx: EnrichContext): void {
95+
for (const enricher of enrichers) {
96+
enricher(ctx);
97+
}
98+
}
99+
43100
export async function flushBatchedLinksDrain(): Promise<void> {
44101
await batchedAxiomDrain.flush();
45102
}

apps/links/src/lib/tracing.ts

Lines changed: 42 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -1,178 +1,66 @@
1-
import { type Span, SpanStatusCode, trace } from "@opentelemetry/api";
2-
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-proto";
3-
import { resourceFromAttributes } from "@opentelemetry/resources";
4-
import { NodeSDK } from "@opentelemetry/sdk-node";
5-
import { BatchSpanProcessor } from "@opentelemetry/sdk-trace-node";
6-
import {
7-
ATTR_SERVICE_NAME,
8-
ATTR_SERVICE_VERSION,
9-
} from "@opentelemetry/semantic-conventions";
101
import { log } from "evlog";
112
import { useLogger as getRequestLogger } from "evlog/elysia";
12-
import pkg from "../../package.json";
133

14-
let sdk: NodeSDK | null = null;
15-
16-
export function initTracing(): void {
17-
if (sdk) {
18-
return;
19-
}
20-
21-
const exporter = new OTLPTraceExporter({
22-
url: "https://api.axiom.co/v1/traces",
23-
headers: {
24-
Authorization: `Bearer ${process.env.AXIOM_TOKEN}`,
25-
"X-Axiom-Dataset": process.env.AXIOM_DATASET ?? "links",
26-
},
27-
});
28-
29-
sdk = new NodeSDK({
30-
resource: resourceFromAttributes({
31-
[ATTR_SERVICE_NAME]: "links",
32-
[ATTR_SERVICE_VERSION]: pkg.version,
33-
}),
34-
spanProcessor: new BatchSpanProcessor(exporter, {
35-
scheduledDelayMillis: 1000,
36-
exportTimeoutMillis: 30_000,
37-
maxExportBatchSize: 512,
38-
maxQueueSize: 2048,
39-
}),
40-
});
41-
42-
sdk.start();
43-
}
44-
45-
export async function shutdownTracing(): Promise<void> {
46-
if (sdk) {
47-
await sdk.shutdown();
48-
sdk = null;
49-
}
50-
}
51-
52-
function getTracer() {
53-
return trace.getTracer("links");
54-
}
55-
56-
export function record<T>(name: string, fn: () => Promise<T> | T): Promise<T> {
57-
const tracer = getTracer();
58-
return tracer.startActiveSpan(name, async (span) => {
59-
const startTime = Date.now();
60-
try {
61-
const result = await fn();
62-
const duration = Date.now() - startTime;
63-
span.setAttribute("duration_ms", duration);
64-
65-
if (duration > 100) {
66-
span.setAttribute("slow", true);
67-
}
68-
69-
span.setStatus({ code: SpanStatusCode.OK });
70-
return result;
71-
} catch (error) {
72-
const duration = Date.now() - startTime;
73-
span.setAttribute("duration_ms", duration);
74-
span.setStatus({
75-
code: SpanStatusCode.ERROR,
76-
message: error instanceof Error ? error.message : String(error),
77-
});
78-
span.recordException(
79-
error instanceof Error ? error : new Error(String(error))
80-
);
81-
throw error;
82-
} finally {
83-
span.end();
84-
}
85-
});
4+
/**
5+
* Run a named operation. Request-level timing and HTTP metadata are emitted by
6+
* evlog on the wide event.
7+
*/
8+
export function record<T>(_name: string, fn: () => Promise<T> | T): Promise<T> {
9+
return Promise.resolve().then(() => fn());
8610
}
8711

12+
/**
13+
* Merge structured fields into the active request wide event (evlog).
14+
*/
8815
export function mergeWideEvent(
8916
fields: Record<string, string | number | boolean>
9017
): void {
91-
setAttributes(fields);
9218
try {
9319
getRequestLogger().set(fields as Record<string, unknown>);
9420
} catch {
95-
// Outside request context — OTel attributes already set above
96-
}
97-
}
98-
99-
export function captureError(
100-
error: unknown,
101-
attributes?: Record<string, string | number | boolean>
102-
): void {
103-
const errorObj = error instanceof Error ? error : new Error(String(error));
104-
105-
if (attributes?.error_step != null) {
106-
mergeWideEvent({ request_error: true, ...attributes });
107-
}
108-
109-
log.error({
110-
links: "captureError",
111-
error: errorObj.message,
112-
stack: errorObj.stack,
113-
...(attributes ?? {}),
114-
});
115-
116-
const span = trace.getActiveSpan();
117-
if (!span) {
118-
return;
119-
}
120-
121-
span.recordException(errorObj);
122-
span.setStatus({ code: SpanStatusCode.ERROR });
123-
124-
if (attributes) {
125-
for (const [key, value] of Object.entries(attributes)) {
126-
span.setAttribute(key, value);
127-
}
21+
// Outside request context
12822
}
12923
}
13024

25+
/**
26+
* Merge structured fields, filtering out null/undefined values.
27+
*/
13128
export function setAttributes(
13229
attributes: Record<string, string | number | boolean | null | undefined>
13330
): void {
134-
const span = trace.getActiveSpan();
135-
if (span) {
136-
for (const [key, value] of Object.entries(attributes)) {
137-
if (value !== null && value !== undefined) {
138-
span.setAttribute(key, value);
139-
}
31+
const filtered: Record<string, string | number | boolean> = {};
32+
for (const [key, value] of Object.entries(attributes)) {
33+
if (value !== null && value !== undefined) {
34+
filtered[key] = value;
14035
}
14136
}
37+
mergeWideEvent(filtered);
14238
}
14339

144-
export function startRequestSpan(
145-
method: string,
146-
path: string,
147-
route?: string
148-
): Span {
149-
const tracer = getTracer();
150-
return tracer.startSpan(`${method} ${route ?? path}`, {
151-
kind: 1,
152-
attributes: {
153-
http_method: method,
154-
http_route: route ?? path,
155-
http_target: path,
156-
},
157-
});
158-
}
159-
160-
export function endRequestSpan(
161-
span: Span,
162-
statusCode: number,
163-
startTime: number
40+
/**
41+
* Attach an error to the active request wide event when inside the evlog
42+
* middleware; otherwise emit a global structured log line.
43+
*/
44+
export function captureError(
45+
error: unknown,
46+
attributes?: Record<string, string | number | boolean>
16447
): void {
165-
const duration = Date.now() - startTime;
166-
span.setAttribute("http_status_code", statusCode);
167-
span.setAttribute("http_response_duration_ms", duration);
168-
169-
if (duration > 100) {
170-
span.setAttribute("http_slow", true);
48+
const err = error instanceof Error ? error : new Error(String(error));
49+
if (attributes?.error_step != null) {
50+
mergeWideEvent({ request_error: true, ...attributes });
51+
}
52+
try {
53+
const requestLog = getRequestLogger();
54+
if (attributes) {
55+
requestLog.error(err, attributes as Record<string, unknown>);
56+
} else {
57+
requestLog.error(err);
58+
}
59+
} catch {
60+
log.error({
61+
service: "links",
62+
error_message: err.message,
63+
...(attributes ?? {}),
64+
});
17165
}
172-
173-
span.setStatus({
174-
code: statusCode >= 400 ? SpanStatusCode.ERROR : SpanStatusCode.OK,
175-
message: statusCode >= 400 ? `HTTP ${statusCode}` : undefined,
176-
});
177-
span.end();
17866
}

apps/links/src/utils/geo.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ async function lookupGeoLocation(ip: string): Promise<GeoResult> {
122122
}
123123
log.error({
124124
links: "geoip_lookup",
125-
error: err instanceof Error ? err.message : String(err),
125+
error_message: err instanceof Error ? err.message : String(err),
126126
});
127127
return EMPTY_GEO;
128128
}

0 commit comments

Comments
 (0)