Skip to content

Commit 448f5ca

Browse files
feat(vrl): add native parse_ddtags function
Add a VRL function `parse_ddtags(value, multivalue: true)` that parses Datadog tag strings (comma-separated key:value pairs) into objects. In multivalue mode (default), values are arrays so duplicate keys are preserved. In single-value mode, the first occurrence wins. Handles standalone keys (no colon), colons embedded in values (splits on first only), whitespace trimming, and empty segments. Benchmarked against the equivalent pure-VRL implementation (for_each + split + get/set/push) on a realistic 50-tag input: native: 10.1 µs/iter pure VRL: 320.8 µs/iter (~32x slower) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 02f281d commit 448f5ca

4 files changed

Lines changed: 452 additions & 0 deletions

File tree

benches/remap.rs

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,59 @@ use vector::{
1212
};
1313
use vrl::{event_path, prelude::*};
1414

15+
// ~50 realistic Datadog tags with long values and duplicate keys.
16+
const DDTAGS_BENCH_INPUT: &str = "\
17+
env:production,\
18+
service:payment-gateway-service,\
19+
version:4.12.7-rc3,\
20+
host:ip-10-42-137-29.us-east-1.compute.internal,\
21+
instance-type:m5.2xlarge,\
22+
availability-zone:us-east-1c,\
23+
region:us-east-1,\
24+
cluster:eks-prod-main-useast1-2024,\
25+
namespace:payments,\
26+
pod_name:payment-gateway-service-7f8b9c6d4f-x2k9m,\
27+
container_name:payment-gateway,\
28+
image_tag:registry.internal.example.com/payments/gateway:4.12.7-rc3-sha-a1b2c3d4,\
29+
team:platform-payments,\
30+
cost_center:cc-payments-12345,\
31+
owner:payments-oncall@example.com,\
32+
pagerduty:payments-p1,\
33+
slo:payments-availability-99.99,\
34+
tier:tier-0-critical,\
35+
compliance:pci-dss-v4,\
36+
compliance:soc2-type2,\
37+
compliance:gdpr,\
38+
datacenter:us-east-1-primary,\
39+
network:vpc-0a1b2c3d4e5f67890,\
40+
subnet:subnet-private-us-east-1c-payments,\
41+
security_group:sg-payment-gateway-prod,\
42+
load_balancer:arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/payment-gw-prod/50dc6c495c0c9188,\
43+
target_group:arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/payment-gw-tg/73e2d6bc24d8a067,\
44+
dns:payment-gateway.internal.prod.example.com,\
45+
port:8443,\
46+
protocol:https,\
47+
framework:spring-boot-3.2.1,\
48+
runtime:openjdk-21.0.2+13,\
49+
orchestrator:kubernetes-1.29,\
50+
deploy_pipeline:argo-cd,\
51+
deploy_sha:a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0,\
52+
deploy_timestamp:2024-11-15T14:32:07Z,\
53+
canary:false,\
54+
feature_flag:new-checkout-flow-v2,\
55+
feature_flag:payment-retry-logic-v3,\
56+
feature_flag:fraud-detection-ml-model-2024q4,\
57+
circuit_breaker:downstream-bank-api,\
58+
rate_limit_tier:premium,\
59+
db_pool:payments-primary-rds-cluster.cluster-abc123def456.us-east-1.rds.amazonaws.com,\
60+
cache_cluster:payments-redis-prod-001.abc123.0001.use1.cache.amazonaws.com,\
61+
message_queue:arn:aws:sqs:us-east-1:123456789012:payment-events-prod,\
62+
trace_sample_rate:0.15,\
63+
log_level:info,\
64+
custom_metric_prefix:payments.gateway,\
65+
git_repository:github.com/example-org/payment-gateway-service,\
66+
oncall_schedule:payments-primary-rotation-2024";
67+
1568
criterion_group!(
1669
name = benches;
1770
// encapsulates CI noise we saw in
@@ -205,4 +258,107 @@ fn benchmark_remap(c: &mut Criterion) {
205258
BatchSize::SmallInput,
206259
);
207260
});
261+
262+
let parse_ddtags_runner = |tform: &mut Box<dyn SyncTransform>, event: Event| {
263+
let mut outputs = TransformOutputsBuf::new_with_capacity(
264+
vec![TransformOutput::new(DataType::all_bits(), HashMap::new())],
265+
1,
266+
);
267+
tform.transform(event, &mut outputs);
268+
let result = outputs.take_primary();
269+
let output_1 = result.first().unwrap().as_log();
270+
271+
debug_assert!(output_1.get(event_path!("parsed")).is_some());
272+
273+
result
274+
};
275+
276+
group.bench_function("parse_ddtags/native", |b| {
277+
let mut tform: Box<dyn SyncTransform> = Box::new(
278+
Remap::new_ast(
279+
RemapConfig {
280+
source: Some(
281+
r#".parsed = parse_ddtags!(string!(.ddtags))"#.to_string(),
282+
),
283+
file: None,
284+
timezone: None,
285+
drop_on_error: true,
286+
drop_on_abort: true,
287+
..Default::default()
288+
},
289+
&Default::default(),
290+
)
291+
.unwrap()
292+
.0,
293+
);
294+
295+
let event = {
296+
let mut event = Event::Log(LogEvent::from("parse ddtags"));
297+
event
298+
.as_mut_log()
299+
.insert(event_path!("ddtags"), DDTAGS_BENCH_INPUT.to_owned());
300+
event
301+
};
302+
303+
b.iter_batched(
304+
|| event.clone(),
305+
|event| parse_ddtags_runner(&mut tform, event),
306+
BatchSize::SmallInput,
307+
);
308+
});
309+
310+
group.bench_function("parse_ddtags/pure_vrl", |b| {
311+
let mut tform: Box<dyn SyncTransform> = Box::new(
312+
Remap::new_ast(
313+
RemapConfig {
314+
source: Some(
315+
indoc! {r#"
316+
tags = split!(string!(.ddtags), ",")
317+
result = {}
318+
for_each(tags) -> |_i, tag| {
319+
parts = split(tag, ":", limit: 2)
320+
key = strip_whitespace!(to_string!(get!(parts, [0])))
321+
val_raw = get(parts, [1]) ?? null
322+
val = if val_raw != null {
323+
strip_whitespace!(to_string!(val_raw))
324+
} else {
325+
true
326+
}
327+
existing = get(result, [key]) ?? null
328+
if existing == null {
329+
result = set!(result, [key], [val])
330+
} else {
331+
result = set!(result, [key], push!(array!(existing), val))
332+
}
333+
}
334+
.parsed = result
335+
"#}
336+
.to_string(),
337+
),
338+
file: None,
339+
timezone: None,
340+
drop_on_error: true,
341+
drop_on_abort: true,
342+
..Default::default()
343+
},
344+
&Default::default(),
345+
)
346+
.unwrap()
347+
.0,
348+
);
349+
350+
let event = {
351+
let mut event = Event::Log(LogEvent::from("parse ddtags"));
352+
event
353+
.as_mut_log()
354+
.insert(event_path!("ddtags"), DDTAGS_BENCH_INPUT.to_owned());
355+
event
356+
};
357+
358+
b.iter_batched(
359+
|| event.clone(),
360+
|event| parse_ddtags_runner(&mut tform, event),
361+
BatchSize::SmallInput,
362+
);
363+
});
208364
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added a `parse_ddtags` VRL function that parses Datadog tag strings (comma-separated `key:value` pairs) into objects. The `multivalue` parameter controls duplicate key handling.

lib/vector-vrl/functions/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
use vrl::{compiler::Function, path::OwnedTargetPath};
1313

1414
pub mod get_secret;
15+
pub mod parse_ddtags;
1516
pub mod remove_secret;
1617
pub mod set_secret;
1718
pub mod set_semantic_meaning;
@@ -30,6 +31,7 @@ pub fn secret_functions() -> Vec<Box<dyn Function>> {
3031
vec![
3132
Box::new(set_semantic_meaning::SetSemanticMeaning) as _,
3233
Box::new(get_secret::GetSecret) as _,
34+
Box::new(parse_ddtags::ParseDdtags) as _,
3335
Box::new(remove_secret::RemoveSecret) as _,
3436
Box::new(set_secret::SetSecret) as _,
3537
]

0 commit comments

Comments
 (0)