Skip to content

Commit 6bebd48

Browse files
authored
Merge pull request ClickHouse#357 from caetanosauer/hyper-parquet
2 parents e4e021b + 9456e8c commit 6bebd48

5 files changed

Lines changed: 108 additions & 100 deletions

File tree

hyper-parquet/benchmark.sh

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,17 @@
22

33
sudo apt-get update
44
sudo apt-get install -y python3-pip
5-
pip install --break-system-packages tableauhyperapi
65

7-
seq 0 99 | xargs -P100 -I{} bash -c 'wget --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet'
6+
PIP_MAJOR=$(echo $(pip --version | awk '{print $2}') | cut -d. -f1)
7+
if [ $PIP_MAJOR -ge 23 ]; then
8+
pip install --break-system-packages tableauhyperapi
9+
else
10+
pip install tableauhyperapi
11+
fi
12+
13+
if [ ! -f hits_0.parquet ]; then
14+
seq 0 99 | xargs -P100 -I{} bash -c 'wget --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet'
15+
fi
816

917
./run.sh | tee log.txt
1018

hyper-parquet/create.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,4 @@ for array[
101101
'hits_99.parquet',
102102
'hits_9.parquet'
103103
]
104-
with (format => 'parquet');
104+
with (format => 'parquet', binary_as_text => true);

hyper-parquet/queries.sql

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,23 @@ SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPh
1818
SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10;
1919
SELECT "UserID", extract(minute FROM to_timestamp("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
2020
SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449;
21-
SELECT COUNT(*) FROM hits WHERE cast("URL" as text) LIKE '%google%';
22-
SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE cast("URL" as text) LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
23-
SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE cast("Title" as text) LIKE '%Google%' AND cast("URL" as text) NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
24-
SELECT * FROM hits WHERE cast("URL" as text) LIKE '%google%' ORDER BY "EventTime" LIMIT 10;
21+
SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%';
22+
SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
23+
SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
24+
SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10;
2525
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10;
2626
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10;
2727
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10;
28-
SELECT "CounterID", AVG(length(cast("URL" as text))) AS l, COUNT(*) AS c FROM hits WHERE cast("URL" as text) <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
29-
SELECT REGEXP_REPLACE(cast("Referer" as text), '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
28+
SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
29+
SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
3030
SELECT SUM("ResolutionWidth"::bigint), SUM("ResolutionWidth"::bigint + 1), SUM("ResolutionWidth"::bigint + 2), SUM("ResolutionWidth"::bigint + 3), SUM("ResolutionWidth"::bigint + 4), SUM("ResolutionWidth"::bigint + 5), SUM("ResolutionWidth"::bigint + 6), SUM("ResolutionWidth"::bigint + 7), SUM("ResolutionWidth"::bigint + 8), SUM("ResolutionWidth"::bigint + 9), SUM("ResolutionWidth"::bigint + 10), SUM("ResolutionWidth"::bigint + 11), SUM("ResolutionWidth"::bigint + 12), SUM("ResolutionWidth"::bigint + 13), SUM("ResolutionWidth"::bigint + 14), SUM("ResolutionWidth"::bigint + 15), SUM("ResolutionWidth"::bigint + 16), SUM("ResolutionWidth"::bigint + 17), SUM("ResolutionWidth"::bigint + 18), SUM("ResolutionWidth"::bigint + 19), SUM("ResolutionWidth"::bigint + 20), SUM("ResolutionWidth"::bigint + 21), SUM("ResolutionWidth"::bigint + 22), SUM("ResolutionWidth"::bigint + 23), SUM("ResolutionWidth"::bigint + 24), SUM("ResolutionWidth"::bigint + 25), SUM("ResolutionWidth"::bigint + 26), SUM("ResolutionWidth"::bigint + 27), SUM("ResolutionWidth"::bigint + 28), SUM("ResolutionWidth"::bigint + 29), SUM("ResolutionWidth"::bigint + 30), SUM("ResolutionWidth"::bigint + 31), SUM("ResolutionWidth"::bigint + 32), SUM("ResolutionWidth"::bigint + 33), SUM("ResolutionWidth"::bigint + 34), SUM("ResolutionWidth"::bigint + 35), SUM("ResolutionWidth"::bigint + 36), SUM("ResolutionWidth"::bigint + 37), SUM("ResolutionWidth"::bigint + 38), SUM("ResolutionWidth"::bigint + 39), SUM("ResolutionWidth"::bigint + 40), SUM("ResolutionWidth"::bigint + 41), SUM("ResolutionWidth"::bigint + 42), SUM("ResolutionWidth"::bigint + 43), SUM("ResolutionWidth"::bigint + 44), SUM("ResolutionWidth"::bigint + 45), SUM("ResolutionWidth"::bigint + 46), SUM("ResolutionWidth"::bigint + 47), SUM("ResolutionWidth"::bigint + 48), SUM("ResolutionWidth"::bigint + 49), SUM("ResolutionWidth"::bigint + 50), SUM("ResolutionWidth"::bigint + 51), SUM("ResolutionWidth"::bigint + 52), SUM("ResolutionWidth"::bigint + 53), SUM("ResolutionWidth"::bigint + 54), SUM("ResolutionWidth"::bigint + 55), SUM("ResolutionWidth"::bigint + 56), SUM("ResolutionWidth"::bigint + 57), SUM("ResolutionWidth"::bigint + 58), SUM("ResolutionWidth"::bigint + 59), SUM("ResolutionWidth"::bigint + 60), SUM("ResolutionWidth"::bigint + 61), SUM("ResolutionWidth"::bigint + 62), SUM("ResolutionWidth"::bigint + 63), SUM("ResolutionWidth"::bigint + 64), SUM("ResolutionWidth"::bigint + 65), SUM("ResolutionWidth"::bigint + 66), SUM("ResolutionWidth"::bigint + 67), SUM("ResolutionWidth"::bigint + 68), SUM("ResolutionWidth"::bigint + 69), SUM("ResolutionWidth"::bigint + 70), SUM("ResolutionWidth"::bigint + 71), SUM("ResolutionWidth"::bigint + 72), SUM("ResolutionWidth"::bigint + 73), SUM("ResolutionWidth"::bigint + 74), SUM("ResolutionWidth"::bigint + 75), SUM("ResolutionWidth"::bigint + 76), SUM("ResolutionWidth"::bigint + 77), SUM("ResolutionWidth"::bigint + 78), SUM("ResolutionWidth"::bigint + 79), SUM("ResolutionWidth"::bigint + 80), SUM("ResolutionWidth"::bigint + 81), SUM("ResolutionWidth"::bigint + 82), SUM("ResolutionWidth"::bigint + 83), SUM("ResolutionWidth"::bigint + 84), SUM("ResolutionWidth"::bigint + 85), SUM("ResolutionWidth"::bigint + 86), SUM("ResolutionWidth"::bigint + 87), SUM("ResolutionWidth"::bigint + 88), SUM("ResolutionWidth"::bigint + 89) FROM hits;
3131
SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
3232
SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
3333
SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
34-
SELECT cast("URL" as text), COUNT(*) AS c FROM hits GROUP BY cast("URL" as text) ORDER BY c DESC LIMIT 10;
35-
SELECT 1, cast("URL" as text), COUNT(*) AS c FROM hits GROUP BY 1, cast("URL" as text) ORDER BY c DESC LIMIT 10;
34+
SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10;
35+
SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10;
3636
SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10;
37-
SELECT "URL", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND (date '1970-01-01' + "EventDate") >= '2013-07-01' AND (date '1970-01-01' + "EventDate") <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND cast("URL" as text) <> '' GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10;
37+
SELECT "URL", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND (date '1970-01-01' + "EventDate") >= '2013-07-01' AND (date '1970-01-01' + "EventDate") <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10;
3838
SELECT "Title", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND (date '1970-01-01' + "EventDate") >= '2013-07-01' AND (date '1970-01-01' + "EventDate") <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY "PageViews" DESC LIMIT 10;
3939
SELECT "URL", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND (date '1970-01-01' + "EventDate") >= '2013-07-01' AND (date '1970-01-01' + "EventDate") <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10 OFFSET 1000;
4040
SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND (date '1970-01-01' + "EventDate") >= '2013-07-01' AND (date '1970-01-01' + "EventDate") <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY "PageViews" DESC LIMIT 10 OFFSET 1000;
Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"system": "Salesforce Hyper (Parquet)",
3-
"date": "2025-03-01",
3+
"date": "2025-05-03",
44
"machine": "c6a.4xlarge, 500gb gp2",
55
"cluster_size": 1,
66

@@ -9,48 +9,48 @@
99
"load_time": 0,
1010
"data_size": 14737666736,
1111
"result": [
12-
[0.6985541139999896,0.027216920000000755,0.02855089299998781],
13-
[0.6940044039999975,0.04860826000003726,0.049151961999996274],
14-
[0.8971933960000342,0.17664967700000034,0.1702298420000261],
15-
[1.6075812670000005,0.12920780600006765,0.12882554700001947],
16-
[2.0165824840000823,0.724728339999956,0.742897871000082],
17-
[2.1868178959999796,0.42228560900002776,0.41470915599995806],
18-
[0.7550829959999419,0.1037783890000128,0.10456691800004592],
19-
[0.7008819010000025,0.05105827900001714,0.04610908899996957],
20-
[2.3398681300000135,0.9372953990000497,0.9594750869999871],
21-
[2.9709995289999824,1.1059870939999428,1.0968814620000558],
22-
[1.79800228900001,0.13538952600003995,0.12795187599999736],
23-
[1.804587988000094,0.15226294699994014,0.15126008899994758],
24-
[2.22624754200001,0.4370266180000044,0.44781985700001314],
25-
[4.178947770000036,1.3337199710000505,1.3380890769999496],
26-
[2.2655287640000097,0.49567368199996054,0.48954709800000273],
27-
[2.067545822999932,0.7794534509999949,0.7788017039999886],
28-
[3.8150172480000037,1.124171179999962,1.13999706900006],
29-
[3.7510353590000705,1.0424183430000085,1.0304538410000532],
30-
[6.23214877700002,2.625024096000061,2.6283640130000094],
31-
[1.3714859949999436,0.058160605000011856,0.05800351100003809],
32-
[10.62139656699992,1.6995983519999527,1.7134582910000518],
33-
[11.258012755000095,1.7273341959999016,1.7680606559999887],
34-
[14.178312829000106,1.711714819000008,1.720642923000014],
35-
[26.960378805000005,1.8026549179999165,1.796680316999982],
36-
[3.8539238529999693,0.2625974090000227,0.26623333700001695],
37-
[2.005475039999965,0.18303656499995213,0.1786459960000002],
38-
[3.8546983010000986,0.27258231700000124,0.27733553299992764],
39-
[10.876402912999993,3.0302669819999437,3.0771695339999496],
40-
[9.971713970999986,4.639474280000059,4.58498229099996],
41-
[5.143137952000075,4.482304221000163,4.589473950999945],
42-
[3.8281821759999275,0.5546492420000959,0.5367887190000147],
43-
[7.5201330689999395,0.8570178140000735,0.8579126929998893],
44-
[8.042442435000112,4.483592042000055,4.425752202000012],
45-
[11.182898210000076,3.1866615869998896,3.1669316750001144],
46-
[11.228322655999818,3.178803332000143,3.1693062730000747],
47-
[1.6784054899999319,0.7295663160000458,0.7088829439999245],
48-
[0.8270137200001955,0.13063479800007372,0.11806330200010962],
49-
[0.7335704600000099,0.0433581879999565,0.04540310099991984],
50-
[0.7780583579999529,0.03800044800004798,0.03654643000004398],
51-
[0.9612860100000944,0.1585096279998197,0.16010026000003563],
52-
[0.6888319909999154,0.0291583170001104,0.02186890799998764],
53-
[0.8061543610001536,0.02377491800007192,0.024389062999944144],
54-
[0.6877752000000328,0.04160301900014929,0.04005347999986952]
12+
[0.6701854999999881,0.026543314999997847,0.025958850000009193],
13+
[0.7003911710000068,0.04786652099998889,0.0481495380000041],
14+
[0.926679878999991,0.1684335360000091,0.16737685900000088],
15+
[1.6070435240000052,0.1316950779999928,0.12427742900000283],
16+
[2.051801427000001,0.7240740839999944,0.7326129430000066],
17+
[2.228009202999999,0.6961326320000012,0.6926506739999923],
18+
[0.7338797419999992,0.1050219979999838,0.10613471100000993],
19+
[0.6971376909999947,0.048933579000021155,0.0479032130000121],
20+
[2.4079687820000117,0.9778794709999943,0.973062289000012],
21+
[2.966952063000008,1.1319956590000118,1.1385847820000095],
22+
[1.804526702000004,0.1305894520000095,0.12912899699998093],
23+
[1.8239010439999959,0.15489390599998387,0.15120102600002383],
24+
[2.245710525999982,0.6564460120000035,0.6927424650000091],
25+
[4.198124936999989,1.5505265689999987,1.5647105470000042],
26+
[2.290384840999991,0.7098389349999934,0.7031432739999843],
27+
[2.0624858170000095,0.7564595419999876,0.7647100700000067],
28+
[3.8589001449999785,1.34895337399999,1.3480098370000064],
29+
[3.7923260530000107,1.258077061000023,1.2380704850000086],
30+
[6.245484983000011,2.7453418019999845,2.7942323929999873],
31+
[1.3749370449999958,0.057798742000017,0.05783219199997802],
32+
[10.662096385000012,1.015954244999989,1.0371129399999859],
33+
[12.167588365,1.066495257000014,1.1062760350000076],
34+
[20.652561296999977,3.427336275000016,3.353315316000021],
35+
[48.880955812000025,4.189816081000004,4.112222158000009],
36+
[3.900662878999981,0.47990315400005557,0.47800477399999863],
37+
[2.0068144600000437,0.4193776219999563,0.4120854039999813],
38+
[3.8853593000000046,0.48367124499998226,0.5078486190000149],
39+
[10.795704075000003,1.2074172540000063,1.2257957819999774],
40+
[9.216421991000004,3.3965484020000076,3.3944443460000002],
41+
[5.09241250599996,4.465492724000001,4.450456777999989],
42+
[3.8766013779999753,0.7378201340000032,0.7380425509999782],
43+
[7.546830616999955,0.9763103290000004,0.9519056240000054],
44+
[7.957974240999988,4.232503436000002,4.200480980000009],
45+
[11.220359082000016,2.2355791449999742,2.2768837419999954],
46+
[11.12195684599999,2.2672269330000177,2.2741048989999513],
47+
[1.7048903779999591,0.7105519049999884,0.7146498820000033],
48+
[0.8249535709999805,0.09656561599996394,0.10543269699996927],
49+
[0.7409439529999986,0.10049716599996827,0.10203461099996503],
50+
[0.7255527350000079,0.043147469000018646,0.03804827699997304],
51+
[0.9524546800000167,0.19220473899997614,0.19758111499999131],
52+
[0.6795033200000375,0.02268315999998549,0.021795916000030502],
53+
[0.7876791399999661,0.022187652000013713,0.025524295000025177],
54+
[0.6931175519999897,0.038866935999976704,0.03911651900000379]
5555
]
5656
}

0 commit comments

Comments
 (0)