From 46cd25584cba93db8b81f6170d6d405eaffb4c9e Mon Sep 17 00:00:00 2001 From: qsliu Date: Thu, 22 Jan 2026 15:06:02 +0800 Subject: [PATCH 1/3] add pg_ducklake --- pg_ducklake/benchmark.sh | 22 ++++++ pg_ducklake/create.sql | 108 +++++++++++++++++++++++++++ pg_ducklake/queries.sql | 43 +++++++++++ pg_ducklake/results/c6a.4xlarge.json | 57 ++++++++++++++ pg_ducklake/results/c6a.xlarge.json | 57 ++++++++++++++ pg_ducklake/run.sh | 14 ++++ pg_ducklake/template.json | 11 +++ 7 files changed, 312 insertions(+) create mode 100755 pg_ducklake/benchmark.sh create mode 100644 pg_ducklake/create.sql create mode 100644 pg_ducklake/queries.sql create mode 100644 pg_ducklake/results/c6a.4xlarge.json create mode 100644 pg_ducklake/results/c6a.xlarge.json create mode 100755 pg_ducklake/run.sh create mode 100644 pg_ducklake/template.json diff --git a/pg_ducklake/benchmark.sh b/pg_ducklake/benchmark.sh new file mode 100755 index 000000000..19212c7c6 --- /dev/null +++ b/pg_ducklake/benchmark.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -e + +sudo apt-get update -y +sudo apt-get install -y docker.io postgresql-client + +wget --continue --progress=dot:giga https://datasets.clickhouse.com/hits_compatible/athena/hits.parquet +sudo docker run -d --name pgduck -p 5432:5432 -e POSTGRES_PASSWORD=duckdb -v ./hits.parquet:/tmp/hits.parquet pgducklake/pgducklake:18-main -c duckdb.max_memory=10GB + +sleep 5 # wait for pgducklake start up + +echo -n "Load time: " +command time -f '%e' psql postgres://postgres:duckdb@localhost:5432/postgres -f create.sql 2>&1 + +./run.sh 2>&1 | tee log.txt + +echo -n "Data size: " +sudo docker exec -i pgduck du -bcs /var/lib/postgresql/ | grep total + +cat log.txt | grep -oP 'Time: \d+\.\d+ ms|psql: error' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/; s/^.*psql: error.*$/null/' | + awk '{ if (i % 3 == 0) { printf "[" }; if ($1 == "null") { printf $1 } else { printf $1 / 1000 }; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/pg_ducklake/create.sql b/pg_ducklake/create.sql new file mode 100644 index 000000000..a7b150172 --- /dev/null +++ b/pg_ducklake/create.sql @@ -0,0 +1,108 @@ +create table hits using ducklake as +select + r['WatchID'] AS WatchID, + r['JavaEnable'] AS JavaEnable, + r['Title']::text AS Title, + r['GoodEvent'] AS GoodEvent, + ('epoch'::timestamp + (r['EventTime'] * interval '1 second'))::timestamp AS EventTime, + (DATE '1970-01-01' + (r['EventDate'] * interval '1 day'))::date AS EventDate, + r['CounterID'] AS CounterID, + r['ClientIP'] AS ClientIP, + r['RegionID'] AS RegionID, + r['UserID'] AS UserID, + r['CounterClass'] AS CounterClass, + r['OS'] AS OS, + r['UserAgent'] AS UserAgent, + r['URL']::text AS URL, + r['Referer']::text AS Referer, + r['IsRefresh'] AS IsRefresh, + r['RefererCategoryID'] AS RefererCategoryID, + r['RefererRegionID'] AS RefererRegionID, + r['URLCategoryID'] AS URLCategoryID, + r['URLRegionID'] AS URLRegionID, + r['ResolutionWidth'] AS ResolutionWidth, + r['ResolutionHeight'] AS ResolutionHeight, + r['ResolutionDepth'] AS ResolutionDepth, + r['FlashMajor'] AS FlashMajor, + r['FlashMinor'] AS FlashMinor, + r['FlashMinor2'] AS FlashMinor2, + r['NetMajor'] AS NetMajor, + r['NetMinor'] AS NetMinor, + r['UserAgentMajor'] AS UserAgentMajor, + r['UserAgentMinor'] AS UserAgentMinor, + r['CookieEnable'] AS CookieEnable, + r['JavascriptEnable'] AS JavascriptEnable, + r['IsMobile'] AS IsMobile, + r['MobilePhone'] AS MobilePhone, + r['MobilePhoneModel'] AS MobilePhoneModel, + r['Params'] AS Params, + r['IPNetworkID'] AS IPNetworkID, + r['TraficSourceID'] AS TraficSourceID, + r['SearchEngineID'] AS SearchEngineID, + r['SearchPhrase'] AS SearchPhrase, + r['AdvEngineID'] AS AdvEngineID, + r['IsArtifical'] AS IsArtifical, + r['WindowClientWidth'] AS WindowClientWidth, + r['WindowClientHeight'] AS WindowClientHeight, + r['ClientTimeZone'] AS ClientTimeZone, + ('epoch'::timestamp + (r['ClientEventTime'] * interval '1 second'))::timestamp AS ClientEventTime, + r['SilverlightVersion1'] AS SilverlightVersion1, + r['SilverlightVersion2'] AS SilverlightVersion2, + r['SilverlightVersion3'] AS SilverlightVersion3, + r['SilverlightVersion4'] AS SilverlightVersion4, + r['PageCharset'] AS PageCharset, + r['CodeVersion'] AS CodeVersion, + r['IsLink'] AS IsLink, + r['IsDownload'] AS IsDownload, + r['IsNotBounce'] AS IsNotBounce, + r['FUniqID'] AS FUniqID, + r['OriginalURL'] AS OriginalURL, + r['HID'] AS HID, + r['IsOldCounter'] AS IsOldCounter, + r['IsEvent'] AS IsEvent, + r['IsParameter'] AS IsParameter, + r['DontCountHits'] AS DontCountHits, + r['WithHash'] AS WithHash, + r['HitColor'] AS HitColor, + ('epoch'::timestamp + (r['LocalEventTime'] * interval '1 second'))::timestamp AS LocalEventTime, + r['Age'] AS Age, + r['Sex'] AS Sex, + r['Income'] AS Income, + r['Interests'] AS Interests, + r['Robotness'] AS Robotness, + r['RemoteIP'] AS RemoteIP, + r['WindowName'] AS WindowName, + r['OpenerName'] AS OpenerName, + r['HistoryLength'] AS HistoryLength, + r['BrowserLanguage'] AS BrowserLanguage, + r['BrowserCountry'] AS BrowserCountry, + r['SocialNetwork'] AS SocialNetwork, + r['SocialAction'] AS SocialAction, + r['HTTPError'] AS HTTPError, + r['SendTiming'] AS SendTiming, + r['DNSTiming'] AS DNSTiming, + r['ConnectTiming'] AS ConnectTiming, + r['ResponseStartTiming'] AS ResponseStartTiming, + r['ResponseEndTiming'] AS ResponseEndTiming, + r['FetchTiming'] AS FetchTiming, + r['SocialSourceNetworkID'] AS SocialSourceNetworkID, + r['SocialSourcePage'] AS SocialSourcePage, + r['ParamPrice'] AS ParamPrice, + r['ParamOrderID'] AS ParamOrderID, + r['ParamCurrency'] AS ParamCurrency, + r['ParamCurrencyID'] AS ParamCurrencyID, + r['OpenstatServiceName'] AS OpenstatServiceName, + r['OpenstatCampaignID'] AS OpenstatCampaignID, + r['OpenstatAdID'] AS OpenstatAdID, + r['OpenstatSourceID'] AS OpenstatSourceID, + r['UTMSource'] AS UTMSource, + r['UTMMedium'] AS UTMMedium, + r['UTMCampaign'] AS UTMCampaign, + r['UTMContent'] AS UTMContent, + r['UTMTerm'] AS UTMTerm, + r['FromTag'] AS FromTag, + r['HasGCLID'] AS HasGCLID, + r['RefererHash'] AS RefererHash, + r['URLHash'] AS URLHash, + r['CLID'] AS CLID +from read_parquet('/tmp/hits.parquet', binary_as_string => true) r; diff --git a/pg_ducklake/queries.sql b/pg_ducklake/queries.sql new file mode 100644 index 000000000..31f65fc89 --- /dev/null +++ b/pg_ducklake/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/pg_ducklake/results/c6a.4xlarge.json b/pg_ducklake/results/c6a.4xlarge.json new file mode 100644 index 000000000..eab1565a9 --- /dev/null +++ b/pg_ducklake/results/c6a.4xlarge.json @@ -0,0 +1,57 @@ +{ + "system": "pg_ducklake", + "date": "2026-01-22", + "machine": "c6a.4xlarge", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": ["column-oriented","PostgreSQL compatible", "lukewarm-cold-run"], + "load_time": 93.95, + "data_size": 14378439891, + "result": [ + [0.358329,0.003243,0.002577], + [0.304912,0.032367,0.030995], + [0.419534,0.053724,0.053974], + [1.20638,0.047637,0.04709], + [1.70042,0.247668,0.248871], + [1.73253,0.44234,0.440345], + [0.16756,0.033926,0.03317], + [0.383286,0.034359,0.032727], + [1.48776,0.406932,0.410867], + [1.88431,0.520919,0.52289], + [1.30133,0.118989,0.115565], + [1.67178,0.144067,0.140077], + [1.97717,0.462893,0.465399], + [3.67377,0.798237,0.820332], + [1.79265,0.523758,0.531904], + [1.099,0.312754,0.31557], + [3.83149,0.891766,0.912247], + [3.36577,0.702824,0.703113], + [7.26021,1.76622,1.78438], + [0.212289,0.029212,0.028365], + [10.3132,0.865901,0.863253], + [12.2893,0.939393,0.966405], + [20.1503,1.62257,1.61551], + [3.11453,0.369581,0.371386], + [0.386905,0.092639,0.079938], + [2.10451,0.2287,0.230152], + [0.434909,0.064355,0.063202], + [10.4124,1.00012,1.00744], + [9.32583,8.36457,8.31132], + [0.460624,0.063022,0.062375], + [3.71484,0.490669,0.499561], + [7.80084,0.579667,0.574134], + [6.42594,1.80026,1.87264], + [10.3995,2.16111,2.15899], + [10.4117,2.22497,2.14273], + [0.667533,0.450148,0.456124], + [0.158975,0.070187,0.067082], + [0.318069,0.029968,0.029922], + [0.407992,0.048538,0.042037], + [0.210574,0.104541,0.086629], + [0.152496,0.02389,0.024677], + [0.352524,0.028008,0.026809], + [0.355746,0.032128,0.029448] +] +} diff --git a/pg_ducklake/results/c6a.xlarge.json b/pg_ducklake/results/c6a.xlarge.json new file mode 100644 index 000000000..704669870 --- /dev/null +++ b/pg_ducklake/results/c6a.xlarge.json @@ -0,0 +1,57 @@ +{ + "system": "pg_ducklake", + "date": "2026-01-21", + "machine": "c6a.xlarge", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": ["column-oriented","PostgreSQL compatible", "lukewarm-cold-run"], + "load_time": 131.89, + "data_size": 14403222390, + "result": [ + [0.419685,0.00347,0.002752], + [0.549026,0.075349,0.075537], + [1.00263,0.193674,0.192751], + [1.16848,0.163853,0.163433], + [1.72052,0.828793,0.838691], + [2.32957,1.3953,1.39133], + [0.595009,0.114611,0.114151], + [0.569755,0.084823,0.083809], + [2.23208,1.25616,1.26362], + [3.06032,1.67818,1.69383], + [1.5518,0.377411,0.375841], + [1.81538,0.474402,0.472184], + [2.37542,1.43768,1.43485], + [4.28577,2.26273,2.28311], + [2.80034,1.6257,1.63986], + [1.89932,0.967983,0.983002], + [4.39881,2.96853,2.99695], + [3.59435,2.31012,2.31644], + [8.33636,5.9488,6.45869], + [0.716877,0.075185,0.074564], + [10.1201,3.00122,3.38025], + [12.3118,3.01709,3.01119], + [19.9692,5.54399,5.48446], + [2.66708,0.918497,0.907774], + [0.801723,0.22768,0.237503], + [1.73319,0.861207,0.856386], + [0.727712,0.192882,0.206855], + [10.0341,3.52107,3.50371], + [33.2059,31.806,31.8046], + [0.853564,0.178584,0.177118], + [3.809,1.6408,1.64184], + [7.97836,1.79472,1.80199], + [8.92026,7.42491,7.33177], + [11.1781,8.04339,7.93158], + [11.2136,8.59746,8.59502], + [2.08724,1.4724,1.46332], + [0.456639,0.094725,0.096047], + [0.379498,0.047462,0.046218], + [0.40735,0.046603,0.049856], + [0.56051,0.1574,0.154484], + [0.37359,0.026685,0.026118], + [0.363551,0.029908,0.028155], + [0.372222,0.040061,0.039667] +] +} diff --git a/pg_ducklake/run.sh b/pg_ducklake/run.sh new file mode 100755 index 000000000..68e37d403 --- /dev/null +++ b/pg_ducklake/run.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +TRIES=3 + +cat queries.sql | while read -r query; do + sync + echo 3 | sudo tee /proc/sys/vm/drop_caches + + echo "$query" + ( + echo '\timing' + yes "$query" | head -n $TRIES + ) | psql --no-psqlrc --tuples-only postgres://postgres:duckdb@localhost:5432/postgres 2>&1 +done diff --git a/pg_ducklake/template.json b/pg_ducklake/template.json new file mode 100644 index 000000000..2ff1aa4e0 --- /dev/null +++ b/pg_ducklake/template.json @@ -0,0 +1,11 @@ +{ + "system": "pg_ducklake", + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": [ + "column-oriented", + "PostgreSQL compatible", + "lukewarm-cold-run" + ] +} From 096366cebd15006b0a6acad4717783f5718c943a Mon Sep 17 00:00:00 2001 From: qsliu Date: Thu, 29 Jan 2026 14:43:17 +0800 Subject: [PATCH 2/3] address comments --- pg_ducklake/benchmark.sh | 4 +- pg_ducklake/results/c6a.4xlarge.json | 88 ++++++++++++++-------------- pg_ducklake/results/c6a.xlarge.json | 88 ++++++++++++++-------------- pg_ducklake/run.sh | 2 + 4 files changed, 92 insertions(+), 90 deletions(-) diff --git a/pg_ducklake/benchmark.sh b/pg_ducklake/benchmark.sh index 19212c7c6..791866089 100755 --- a/pg_ducklake/benchmark.sh +++ b/pg_ducklake/benchmark.sh @@ -6,7 +6,7 @@ sudo apt-get update -y sudo apt-get install -y docker.io postgresql-client wget --continue --progress=dot:giga https://datasets.clickhouse.com/hits_compatible/athena/hits.parquet -sudo docker run -d --name pgduck -p 5432:5432 -e POSTGRES_PASSWORD=duckdb -v ./hits.parquet:/tmp/hits.parquet pgducklake/pgducklake:18-main -c duckdb.max_memory=10GB +docker run -d --name pgduck -p 5432:5432 -e POSTGRES_PASSWORD=duckdb -v ./hits.parquet:/tmp/hits.parquet pgducklake/pgducklake:18-main sleep 5 # wait for pgducklake start up @@ -16,7 +16,7 @@ command time -f '%e' psql postgres://postgres:duckdb@localhost:5432/postgres -f ./run.sh 2>&1 | tee log.txt echo -n "Data size: " -sudo docker exec -i pgduck du -bcs /var/lib/postgresql/ | grep total +docker exec -i pgduck du -bcs /var/lib/postgresql/ | grep total cat log.txt | grep -oP 'Time: \d+\.\d+ ms|psql: error' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/; s/^.*psql: error.*$/null/' | awk '{ if (i % 3 == 0) { printf "[" }; if ($1 == "null") { printf $1 } else { printf $1 / 1000 }; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/pg_ducklake/results/c6a.4xlarge.json b/pg_ducklake/results/c6a.4xlarge.json index eab1565a9..705ca020a 100644 --- a/pg_ducklake/results/c6a.4xlarge.json +++ b/pg_ducklake/results/c6a.4xlarge.json @@ -6,52 +6,52 @@ "proprietary": "no", "hardware": "cpu", "tuned": "no", - "tags": ["column-oriented","PostgreSQL compatible", "lukewarm-cold-run"], + "tags": ["column-oriented", "PostgreSQL compatible", "cold-run"], "load_time": 93.95, "data_size": 14378439891, "result": [ - [0.358329,0.003243,0.002577], - [0.304912,0.032367,0.030995], - [0.419534,0.053724,0.053974], - [1.20638,0.047637,0.04709], - [1.70042,0.247668,0.248871], - [1.73253,0.44234,0.440345], - [0.16756,0.033926,0.03317], - [0.383286,0.034359,0.032727], - [1.48776,0.406932,0.410867], - [1.88431,0.520919,0.52289], - [1.30133,0.118989,0.115565], - [1.67178,0.144067,0.140077], - [1.97717,0.462893,0.465399], - [3.67377,0.798237,0.820332], - [1.79265,0.523758,0.531904], - [1.099,0.312754,0.31557], - [3.83149,0.891766,0.912247], - [3.36577,0.702824,0.703113], - [7.26021,1.76622,1.78438], - [0.212289,0.029212,0.028365], - [10.3132,0.865901,0.863253], - [12.2893,0.939393,0.966405], - [20.1503,1.62257,1.61551], - [3.11453,0.369581,0.371386], - [0.386905,0.092639,0.079938], - [2.10451,0.2287,0.230152], - [0.434909,0.064355,0.063202], - [10.4124,1.00012,1.00744], - [9.32583,8.36457,8.31132], - [0.460624,0.063022,0.062375], - [3.71484,0.490669,0.499561], - [7.80084,0.579667,0.574134], - [6.42594,1.80026,1.87264], - [10.3995,2.16111,2.15899], - [10.4117,2.22497,2.14273], - [0.667533,0.450148,0.456124], - [0.158975,0.070187,0.067082], - [0.318069,0.029968,0.029922], - [0.407992,0.048538,0.042037], - [0.210574,0.104541,0.086629], - [0.152496,0.02389,0.024677], - [0.352524,0.028008,0.026809], - [0.355746,0.032128,0.029448] + [0.426926,0.003319,0.002914], + [0.394652,0.03203,0.030735], + [0.639309,0.054248,0.053677], + [1.21937,0.047571,0.046974], + [1.29005,0.246164,0.248109], + [1.76258,0.432908,0.431297], + [0.462966,0.034642,0.033639], + [0.472952,0.03385,0.032233], + [1.58045,0.399257,0.400574], + [1.94988,0.504819,0.502837], + [1.40278,0.114715,0.112335], + [1.43525,0.141494,0.137127], + [1.85154,0.464813,0.457713], + [3.98416,0.783954,0.797456], + [1.88476,0.526653,0.525194], + [1.44308,0.326332,0.327343], + [3.97642,0.97331,0.956446], + [3.49258,0.71838,0.717693], + [7.52156,1.73769,1.78831], + [0.662547,0.028819,0.028901], + [9.9086,0.857202,0.877521], + [12.3596,0.979099,0.973662], + [19.9268,1.60558,1.57848], + [3.59571,0.392464,0.390463], + [0.51121,0.090815,0.087726], + [1.70483,0.23157,0.227687], + [0.467531,0.075129,0.064566], + [10.0688,0.947417,0.955145], + [9.35775,8.32927,8.311], + [0.479059,0.062975,0.061786], + [3.79271,0.48596,0.491309], + [7.88898,0.577122,0.579216], + [6.51295,1.8782,1.87676], + [10.3878,2.05736,2.11211], + [10.3986,2.1743,2.1748], + [0.741235,0.458409,0.463944], + [0.229121,0.060227,0.059358], + [0.390184,0.029766,0.029438], + [0.456341,0.043897,0.044554], + [0.521704,0.10282,0.085958], + [0.450722,0.024453,0.023007], + [0.448706,0.026684,0.025538], + [0.456184,0.032159,0.028639] ] } diff --git a/pg_ducklake/results/c6a.xlarge.json b/pg_ducklake/results/c6a.xlarge.json index 704669870..c3042ec30 100644 --- a/pg_ducklake/results/c6a.xlarge.json +++ b/pg_ducklake/results/c6a.xlarge.json @@ -6,52 +6,52 @@ "proprietary": "no", "hardware": "cpu", "tuned": "no", - "tags": ["column-oriented","PostgreSQL compatible", "lukewarm-cold-run"], + "tags": ["column-oriented", "PostgreSQL compatible", "cold-run"], "load_time": 131.89, "data_size": 14403222390, "result": [ - [0.419685,0.00347,0.002752], - [0.549026,0.075349,0.075537], - [1.00263,0.193674,0.192751], - [1.16848,0.163853,0.163433], - [1.72052,0.828793,0.838691], - [2.32957,1.3953,1.39133], - [0.595009,0.114611,0.114151], - [0.569755,0.084823,0.083809], - [2.23208,1.25616,1.26362], - [3.06032,1.67818,1.69383], - [1.5518,0.377411,0.375841], - [1.81538,0.474402,0.472184], - [2.37542,1.43768,1.43485], - [4.28577,2.26273,2.28311], - [2.80034,1.6257,1.63986], - [1.89932,0.967983,0.983002], - [4.39881,2.96853,2.99695], - [3.59435,2.31012,2.31644], - [8.33636,5.9488,6.45869], - [0.716877,0.075185,0.074564], - [10.1201,3.00122,3.38025], - [12.3118,3.01709,3.01119], - [19.9692,5.54399,5.48446], - [2.66708,0.918497,0.907774], - [0.801723,0.22768,0.237503], - [1.73319,0.861207,0.856386], - [0.727712,0.192882,0.206855], - [10.0341,3.52107,3.50371], - [33.2059,31.806,31.8046], - [0.853564,0.178584,0.177118], - [3.809,1.6408,1.64184], - [7.97836,1.79472,1.80199], - [8.92026,7.42491,7.33177], - [11.1781,8.04339,7.93158], - [11.2136,8.59746,8.59502], - [2.08724,1.4724,1.46332], - [0.456639,0.094725,0.096047], - [0.379498,0.047462,0.046218], - [0.40735,0.046603,0.049856], - [0.56051,0.1574,0.154484], - [0.37359,0.026685,0.026118], - [0.363551,0.029908,0.028155], - [0.372222,0.040061,0.039667] + [0.477732,0.003461,0.00267], + [0.618581,0.075816,0.075637], + [1.08492,0.192979,0.192646], + [1.24486,0.163356,0.163539], + [1.81359,0.838475,0.852681], + [2.42226,1.41158,1.42799], + [0.662021,0.11521,0.115072], + [0.660119,0.085456,0.084751], + [2.33907,1.27704,1.28281], + [3.19921,1.86175,1.90005], + [1.72465,0.469343,0.411195], + [1.91126,0.477352,0.477078], + [2.48784,1.44973,1.47575], + [4.35646,2.29366,2.3409], + [2.95192,1.65996,1.66574], + [2.0015,0.996522,1.00631], + [4.47985,2.96021,2.99019], + [3.68747,2.28343,2.30684], + [8.94438,7.23794,7.60675], + [0.77622,0.061502,0.061404], + [9.92958,2.94398,2.96968], + [12.35,3.0043,2.95032], + [20.1086,5.65846,5.55607], + [3.7057,1.12305,1.12733], + [0.829869,0.274687,0.257188], + [1.75419,0.862735,0.865698], + [0.796771,0.21117,0.198525], + [10.1187,3.45327,3.55118], + [33.2923,31.8397,31.8817], + [0.929163,0.178708,0.17813], + [3.9396,1.66853,1.65704], + [8.05231,1.80466,1.82885], + [14.3065,11.584,12.43], + [17.7537,15.2959,15.2644], + [18.3914,16.2462,16.5392], + [2.25075,1.51645,1.54148], + [0.538219,0.0926,0.092042], + [0.462589,0.045791,0.04397], + [0.498257,0.051134,0.050942], + [0.648039,0.162797,0.150939], + [0.460737,0.027639,0.02789], + [0.451258,0.030167,0.029397], + [0.458958,0.041236,0.040506] ] } diff --git a/pg_ducklake/run.sh b/pg_ducklake/run.sh index 68e37d403..1e2036f1d 100755 --- a/pg_ducklake/run.sh +++ b/pg_ducklake/run.sh @@ -5,6 +5,8 @@ TRIES=3 cat queries.sql | while read -r query; do sync echo 3 | sudo tee /proc/sys/vm/drop_caches + docker restart pgduck + sleep 5 # wait for restart echo "$query" ( From 9b1f51c9bbdac98a95ed62ae57ef43c9582c2ce5 Mon Sep 17 00:00:00 2001 From: qsliu Date: Thu, 29 Jan 2026 14:46:13 +0800 Subject: [PATCH 3/3] chore --- pg_ducklake/template.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pg_ducklake/template.json b/pg_ducklake/template.json index 2ff1aa4e0..5804333c7 100644 --- a/pg_ducklake/template.json +++ b/pg_ducklake/template.json @@ -6,6 +6,6 @@ "tags": [ "column-oriented", "PostgreSQL compatible", - "lukewarm-cold-run" + "cold-run" ] }