diff --git a/.github/scripts/compare-jmh.py b/.github/scripts/compare-jmh.py
index e0a6bd17d..5010938f1 100644
--- a/.github/scripts/compare-jmh.py
+++ b/.github/scripts/compare-jmh.py
@@ -317,6 +317,70 @@ def compute_discriminators(
return out
+# Visual markers used in rendered output. "Performance went up" =
+# improvement (less time / less memory per op).
+ARROW_REGRESS = "\u2b07\ufe0f" # ⬇️
+ARROW_IMPROVE = "\u2b06\ufe0f" # ⬆️
+ARROW_NOISE = "\u2796" # ➖
+
+DOT_REGRESS = "\U0001f534" # 🔴
+DOT_IMPROVE = "\U0001f7e2" # 🟢
+
+
+def _row_arrow(r: Row, threshold: float) -> str:
+ if any(d.regression(threshold) for d in r.deltas):
+ return ARROW_REGRESS
+ if any(d.improvement(threshold) for d in r.deltas):
+ return ARROW_IMPROVE
+ return ARROW_NOISE
+
+
+def _short_delta(d: MetricDelta, threshold: float) -> str:
+ """One-line metric delta for the brief bullet list.
+
+ Returns "" for noise (caller drops it). Regressions are 🔴-marked and
+ bold so they stand out on a packed PR comment; improvements are
+ 🟢-marked but unbolded.
+ """
+ if d.delta_pct is None:
+ return ""
+ label = d.metric.label
+ delta = fmt_delta(d.delta_pct)
+ if d.regression(threshold):
+ return f"{DOT_REGRESS} **{label} {delta}**"
+ if d.improvement(threshold):
+ return f"{DOT_IMPROVE} {label} {delta}"
+ return ""
+
+
+def _stats_cell(r: Row, threshold: float) -> str:
+ """Render the joined Stats cell for one row in the detail table.
+
+ Each metric occupies a `
`-separated line. Regressions are
+ bolded and 🔴-tagged; improvements are 🟢-tagged. Metrics with no
+ baseline/current data are rendered grey ("—") so the cell still
+ shows which dimension is missing.
+ """
+ lines: List[str] = []
+ for d in r.deltas:
+ label = d.metric.label
+ if d.delta_pct is None:
+ base = fmt_score(d.baseline, d.baseline_err, d.unit)
+ curr = fmt_score(d.current, d.current_err, d.unit)
+ lines.append(f"{label} {base} → {curr} (—)")
+ continue
+ base = fmt_score(d.baseline, d.baseline_err, d.unit)
+ curr = fmt_score(d.current, d.current_err, d.unit)
+ delta = fmt_delta(d.delta_pct)
+ if d.regression(threshold):
+ lines.append(f"{DOT_REGRESS} **{label}** {base} → {curr} (**{delta}**)")
+ elif d.improvement(threshold):
+ lines.append(f"{DOT_IMPROVE} **{label}** {base} → {curr} ({delta})")
+ else:
+ lines.append(f"{label} {base} → {curr} ({delta})")
+ return "
".join(lines)
+
+
def build_markdown(
rows: List[Row],
only_current: List[Key],
@@ -341,14 +405,14 @@ def build_markdown(
out: List[str] = [""]
if regressions:
out.append(
- f"## ❌ JMH benchmark comparison — {regressions} regression(s) over {threshold:g}%"
+ f"## {DOT_REGRESS} JMH benchmark comparison — {regressions} regression(s) over {threshold:g}%"
)
elif improvements:
out.append(
- f"## ✅ JMH benchmark comparison — no regressions, {improvements} improvement(s) over {threshold:g}%"
+ f"## {DOT_IMPROVE} JMH benchmark comparison — no regressions, {improvements} improvement(s) over {threshold:g}%"
)
else:
- out.append(f"## ✅ JMH benchmark comparison — no changes over {threshold:g}%")
+ out.append(f"## {DOT_IMPROVE} JMH benchmark comparison — no changes over {threshold:g}%")
out.append("")
if repo and baseline_run_id and current_run_id:
@@ -376,21 +440,13 @@ def bucket(r: Row) -> int:
for r in rows:
bench, _ = r.key
disc = discriminators.get(r.key, "")
- b = bucket(r)
- icon = "❌" if b == 0 else "✅"
-
- # In the brief view, only mention metrics that actually crossed
- # the threshold — keeps noisy rows to a single line.
- bits: List[str] = []
- for d in r.deltas:
- if d.delta_pct is None:
- continue
- if d.regression(threshold):
- bits.append(f"**{d.metric.label} {fmt_delta(d.delta_pct)}**")
- elif d.improvement(threshold):
- bits.append(f"{d.metric.label} {fmt_delta(d.delta_pct)}")
-
- line = f"- {icon} `{short_bench(bench)}`"
+ arrow = _row_arrow(r, threshold)
+
+ # Only mention metrics that actually crossed the threshold
+ # in the brief view — keeps noisy rows to a single line.
+ bits = [s for s in (_short_delta(d, threshold) for d in r.deltas) if s]
+
+ line = f"- {arrow} `{short_bench(bench)}`"
if disc:
line += f" `[{disc}]`"
if bits:
@@ -410,16 +466,14 @@ def bucket(r: Row) -> int:
""
)
out.append("")
- header = "| Benchmark | Params | " + " | ".join(m.label for m in METRICS) + " | Status |"
- sep = "|---|---|" + "|".join(["---"] * len(METRICS)) + "|---|"
- out.append(header)
- out.append(sep)
+ out.append("| Benchmark | Stats |")
+ out.append("|---|---|")
for r in rows:
bench, params = r.key
- cells = " | ".join(d.cell() for d in r.deltas)
- out.append(
- f"| `{short_bench(bench)}` | {params or '—'} | {cells} | {r.status(threshold)} |"
- )
+ bench_cell = f"`{short_bench(bench)}`"
+ if params:
+ bench_cell += f"
{params}"
+ out.append(f"| {bench_cell} | {_stats_cell(r, threshold)} |")
out.append("")
out.append("")
out.append("")
diff --git a/.github/scripts/test_compare_jmh.py b/.github/scripts/test_compare_jmh.py
new file mode 100644
index 000000000..d426c1ef2
--- /dev/null
+++ b/.github/scripts/test_compare_jmh.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+"""End-to-end tests for `compare-jmh.py`.
+
+Runs `compare-jmh.py` against every scenario in `test_data/` via
+subprocess and asserts on both the rendered markdown and the
+`--summary-output` counters. Designed to be run locally
+(`python3 .github/scripts/test_compare_jmh.py`) and from CI without
+extra dependencies — only the standard library is used.
+"""
+
+from __future__ import annotations
+
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import unittest
+from pathlib import Path
+from typing import Dict, List
+
+HERE = Path(__file__).resolve().parent
+SCRIPT = HERE / "compare-jmh.py"
+DATA = HERE / "test_data"
+
+
+def _parse_summary(path: Path) -> Dict[str, str]:
+ out: Dict[str, str] = {}
+ for line in path.read_text(encoding="utf-8").splitlines():
+ if "=" in line:
+ k, v = line.split("=", 1)
+ out[k.strip()] = v.strip()
+ return out
+
+
+def _run_compare(
+ case: str,
+ *,
+ threshold: float = 10.0,
+ extra_args: List[str] | None = None,
+) -> Dict[str, object]:
+ """Invoke compare-jmh.py against a fixture; return its outputs.
+
+ Returns a dict with: returncode, stdout, stderr, markdown, summary.
+ """
+ case_dir = DATA / case
+ assert case_dir.is_dir(), f"missing test fixture: {case_dir}"
+
+ tmp = Path(tempfile.mkdtemp(prefix=f"cmp-jmh-{case}-"))
+ try:
+ out_md = tmp / "out.md"
+ summary = tmp / "summary.env"
+ cmd = [
+ sys.executable,
+ str(SCRIPT),
+ "--baseline",
+ str(case_dir / "baseline"),
+ "--current",
+ str(case_dir / "current"),
+ "--threshold-pct",
+ str(threshold),
+ "--output",
+ str(out_md),
+ "--summary-output",
+ str(summary),
+ ]
+ if extra_args:
+ cmd.extend(extra_args)
+ proc = subprocess.run(cmd, capture_output=True, text=True)
+ result: Dict[str, object] = {
+ "returncode": proc.returncode,
+ "stdout": proc.stdout,
+ "stderr": proc.stderr,
+ "markdown": out_md.read_text(encoding="utf-8") if out_md.exists() else "",
+ "summary": _parse_summary(summary) if summary.exists() else {},
+ }
+ return result
+ finally:
+ shutil.rmtree(tmp, ignore_errors=True)
+
+
+# Pre-computed marker strings — keep in sync with compare-jmh.py.
+ARROW_REGRESS = "\u2b07\ufe0f"
+ARROW_IMPROVE = "\u2b06\ufe0f"
+ARROW_NOISE = "\u2796"
+DOT_REGRESS = "\U0001f534"
+DOT_IMPROVE = "\U0001f7e2"
+
+
+class CompareJmhTest(unittest.TestCase):
+ """Each case in test_data/ has a dedicated test asserting on the
+ headline shape, the bullet markers, and the summary counters."""
+
+ def test_all_improvements(self) -> None:
+ r = _run_compare("all_improvements")
+ self.assertEqual(r["returncode"], 0)
+ md = r["markdown"]
+ self.assertIn("no regressions", md)
+ self.assertIn(ARROW_IMPROVE, md)
+ self.assertIn(DOT_IMPROVE, md)
+ self.assertNotIn(ARROW_REGRESS, md)
+ self.assertNotIn(DOT_REGRESS, md)
+ s = r["summary"]
+ self.assertEqual(s.get("regressions"), "0")
+ self.assertEqual(s.get("improvements"), "2")
+ self.assertEqual(s.get("matched"), "2")
+
+ def test_all_regressions(self) -> None:
+ r = _run_compare("all_regressions")
+ self.assertEqual(r["returncode"], 0)
+ md = r["markdown"]
+ self.assertIn("regression(s) over", md)
+ self.assertIn(ARROW_REGRESS, md)
+ self.assertIn(DOT_REGRESS, md)
+ # Bold markdown around the metric label.
+ self.assertIn("**Time", md)
+ s = r["summary"]
+ self.assertEqual(s.get("regressions"), "2")
+ # No row "purely improved" → improvements should be 0.
+ self.assertEqual(s.get("improvements"), "0")
+
+ def test_mixed(self) -> None:
+ r = _run_compare("mixed")
+ self.assertEqual(r["returncode"], 0)
+ md = r["markdown"]
+ # Both directions present.
+ self.assertIn(ARROW_REGRESS, md)
+ self.assertIn(ARROW_IMPROVE, md)
+ self.assertIn(DOT_REGRESS, md)
+ self.assertIn(DOT_IMPROVE, md)
+ # Discriminator suffix `[limit=…]` appears for the two
+ # `queryV2` variants of the same benchmark.
+ self.assertIn("[limit=10000]", md)
+ self.assertIn("[limit=100000]", md)
+ # The unique-named benchmark must NOT get a discriminator.
+ self.assertNotIn("`JDBCQuery.selectJDBCV2` `[", md)
+ s = r["summary"]
+ self.assertEqual(s.get("matched"), "4")
+ self.assertGreater(int(s.get("regressions", "0")), 0)
+
+ def test_no_alloc(self) -> None:
+ # No `gc.alloc.rate.norm` present anywhere → script must
+ # still compare Time and emit a diagnostic on stderr.
+ r = _run_compare("no_alloc")
+ self.assertEqual(r["returncode"], 0)
+ self.assertIn("no `gc.alloc.rate.norm`", r["stderr"])
+ md = r["markdown"]
+ # Time regression should still be detected and 🔴-tagged…
+ self.assertIn(DOT_REGRESS, md)
+ # …but no Alloc/op metric is ever 🟢/🔴 because we have no
+ # baseline/current data for it.
+ self.assertIn("Alloc/op", md)
+ # The detail-table cell should fall back to "(—)" for alloc.
+ self.assertIn("Alloc/op — → — (—)", md)
+ s = r["summary"]
+ self.assertEqual(s.get("matched"), "2")
+
+ def test_noise_only(self) -> None:
+ r = _run_compare("noise_only")
+ self.assertEqual(r["returncode"], 0)
+ md = r["markdown"]
+ self.assertIn("no changes over 10%", md)
+ # Every row should be on the noise arrow…
+ self.assertIn(ARROW_NOISE, md)
+ # …and there should be no red/green dots anywhere.
+ self.assertNotIn(DOT_REGRESS, md)
+ # 🟢 *is* in the header for the OK case, so don't assert on
+ # DOT_IMPROVE alone.
+ s = r["summary"]
+ self.assertEqual(s.get("regressions"), "0")
+ self.assertEqual(s.get("improvements"), "0")
+
+ def test_only_in_pr(self) -> None:
+ r = _run_compare("only_in_pr")
+ self.assertEqual(r["returncode"], 0)
+ md = r["markdown"]
+ self.assertIn("Benchmarks only in PR run", md)
+ self.assertIn("QueryClient.queryV3New", md)
+ s = r["summary"]
+ # one shared row matched.
+ self.assertEqual(s.get("matched"), "1")
+
+ def test_only_in_baseline(self) -> None:
+ r = _run_compare("only_in_baseline")
+ self.assertEqual(r["returncode"], 0)
+ md = r["markdown"]
+ self.assertIn("Benchmarks only in baseline run", md)
+ self.assertIn("QueryClient.queryV0Removed", md)
+ s = r["summary"]
+ self.assertEqual(s.get("matched"), "1")
+
+ def test_empty_intersection(self) -> None:
+ r = _run_compare("empty_intersection")
+ self.assertEqual(r["returncode"], 0)
+ md = r["markdown"]
+ self.assertIn("_No benchmarks matched between baseline and PR._", md)
+ # Both unique-side sections still appear as .
+ self.assertIn("Benchmarks only in PR run", md)
+ self.assertIn("Benchmarks only in baseline run", md)
+ s = r["summary"]
+ self.assertEqual(s.get("matched"), "0")
+ self.assertEqual(s.get("regressions"), "0")
+ self.assertEqual(s.get("improvements"), "0")
+
+ def test_threshold_knob(self) -> None:
+ # The same fixture flips from "regression" to "ok" when the
+ # threshold is widened past the largest delta.
+ strict = _run_compare("all_regressions", threshold=10.0)
+ lenient = _run_compare("all_regressions", threshold=200.0)
+ self.assertGreater(int(strict["summary"]["regressions"]), 0)
+ self.assertEqual(lenient["summary"]["regressions"], "0")
+ self.assertIn("no changes", lenient["markdown"])
+
+
+if __name__ == "__main__":
+ # `-v` prints each scenario name so failures are obvious in CI logs.
+ unittest.main(verbosity=2)
diff --git a/.github/scripts/test_data/README.md b/.github/scripts/test_data/README.md
new file mode 100644
index 000000000..4a0938f09
--- /dev/null
+++ b/.github/scripts/test_data/README.md
@@ -0,0 +1,33 @@
+# `compare-jmh.py` test fixtures
+
+Each subdirectory is a self-contained scenario for `compare-jmh.py`. The
+layout is always:
+
+```
+/
+ baseline/jmh-results-baseline.json
+ current/jmh-results-current.json
+```
+
+`compare-jmh.py` discovers result files by globbing for
+`jmh-results-*.json` under the `--baseline` and `--current` directories,
+so any filename starting with `jmh-results-` works.
+
+JSON records mirror the structure produced by JMH 1.37's
+`ResultFormatType.JSON`: an array of objects with `benchmark`, `params`,
+`primaryMetric.{score,scoreError,scoreUnit}`, and optionally
+`secondaryMetrics["gc.alloc.rate.norm"]`.
+
+| Case | What it covers |
+|---|---|
+| `all_improvements` | Multiple benchmarks where both Time and Alloc/op fall well below the threshold; report should be all ⬆️ / 🟢 with no failure. |
+| `all_regressions` | Multiple benchmarks where Time and/or Alloc/op rise well above the threshold; report should be ⬇️ / 🔴, script flags every row as `REGRESSION`, summary `regressions > 0`. |
+| `mixed` | A blend of regressions, improvements, and within-noise rows including multiple variants of the same benchmark — verifies the bucket ordering and the param-discriminator (`[limit=…]`) logic. |
+| `no_alloc` | Records with no `gc.alloc.rate.norm` at all; verifies the script falls back to Time-only and doesn't render `🔴`/`🟢` markers in the absence of the key (plus prints the diagnostic warning). |
+| `noise_only` | All deltas are inside ±10%; report should be the ✅ "no changes" header and every row should carry the ➖ neutral arrow. |
+| `only_in_pr` | A benchmark appears in `current` but not in `baseline`; verifies the "Benchmarks only in PR run" `` block. |
+| `only_in_baseline` | The mirror case — verifies the "Benchmarks only in baseline run" block. |
+| `empty_intersection` | `baseline` and `current` contain different sets of benchmarks so no rows are matched; verifies the "_No benchmarks matched_" path. |
+
+The companion runner `test_compare_jmh.py` exercises every case and
+checks both the rendered markdown and the `--summary-output` counters.
diff --git a/.github/scripts/test_data/all_improvements/baseline/jmh-results-baseline.json b/.github/scripts/test_data/all_improvements/baseline/jmh-results-baseline.json
new file mode 100644
index 000000000..ff9ab2a4d
--- /dev/null
+++ b/.github/scripts/test_data/all_improvements/baseline/jmh-results-baseline.json
@@ -0,0 +1,20 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 20.0, "scoreError": 0.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 2048.0, "scoreError": 12.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.InsertClient.insertV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 80.0, "scoreError": 1.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 4096.0, "scoreError": 24.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/all_improvements/current/jmh-results-current.json b/.github/scripts/test_data/all_improvements/current/jmh-results-current.json
new file mode 100644
index 000000000..034fa71f0
--- /dev/null
+++ b/.github/scripts/test_data/all_improvements/current/jmh-results-current.json
@@ -0,0 +1,20 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 16.0, "scoreError": 0.4, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 1536.0, "scoreError": 10.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.InsertClient.insertV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 64.0, "scoreError": 1.3, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 3200.0, "scoreError": 20.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/all_regressions/baseline/jmh-results-baseline.json b/.github/scripts/test_data/all_regressions/baseline/jmh-results-baseline.json
new file mode 100644
index 000000000..ff9ab2a4d
--- /dev/null
+++ b/.github/scripts/test_data/all_regressions/baseline/jmh-results-baseline.json
@@ -0,0 +1,20 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 20.0, "scoreError": 0.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 2048.0, "scoreError": 12.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.InsertClient.insertV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 80.0, "scoreError": 1.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 4096.0, "scoreError": 24.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/all_regressions/current/jmh-results-current.json b/.github/scripts/test_data/all_regressions/current/jmh-results-current.json
new file mode 100644
index 000000000..f6f1aff82
--- /dev/null
+++ b/.github/scripts/test_data/all_regressions/current/jmh-results-current.json
@@ -0,0 +1,20 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 26.0, "scoreError": 0.6, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 3300.0, "scoreError": 18.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.InsertClient.insertV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 100.0, "scoreError": 1.8, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 5400.0, "scoreError": 30.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/empty_intersection/baseline/jmh-results-baseline.json b/.github/scripts/test_data/empty_intersection/baseline/jmh-results-baseline.json
new file mode 100644
index 000000000..e2483a43c
--- /dev/null
+++ b/.github/scripts/test_data/empty_intersection/baseline/jmh-results-baseline.json
@@ -0,0 +1,11 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryOld",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 20.0, "scoreError": 0.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 2048.0, "scoreError": 12.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/empty_intersection/current/jmh-results-current.json b/.github/scripts/test_data/empty_intersection/current/jmh-results-current.json
new file mode 100644
index 000000000..40b2e0d86
--- /dev/null
+++ b/.github/scripts/test_data/empty_intersection/current/jmh-results-current.json
@@ -0,0 +1,11 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryNew",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 18.0, "scoreError": 0.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 1900.0, "scoreError": 11.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/mixed/baseline/jmh-results-baseline.json b/.github/scripts/test_data/mixed/baseline/jmh-results-baseline.json
new file mode 100644
index 000000000..c967a2f66
--- /dev/null
+++ b/.github/scripts/test_data/mixed/baseline/jmh-results-baseline.json
@@ -0,0 +1,38 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"datasetSourceName": "file://default.csv", "limit": "10000"},
+ "primaryMetric": {"score": 20.0, "scoreError": 0.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 2048.0, "scoreError": 12.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"datasetSourceName": "file://default.csv", "limit": "100000"},
+ "primaryMetric": {"score": 180.0, "scoreError": 2.0, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 16384.0, "scoreError": 96.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.InsertClient.insertV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 80.0, "scoreError": 1.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 4096.0, "scoreError": 24.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.JDBCQuery.selectJDBCV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 30.0, "scoreError": 0.7, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 6144.0, "scoreError": 36.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/mixed/current/jmh-results-current.json b/.github/scripts/test_data/mixed/current/jmh-results-current.json
new file mode 100644
index 000000000..a115b036b
--- /dev/null
+++ b/.github/scripts/test_data/mixed/current/jmh-results-current.json
@@ -0,0 +1,38 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"datasetSourceName": "file://default.csv", "limit": "10000"},
+ "primaryMetric": {"score": 24.0, "scoreError": 0.6, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 2100.0, "scoreError": 13.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"datasetSourceName": "file://default.csv", "limit": "100000"},
+ "primaryMetric": {"score": 184.0, "scoreError": 2.0, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 24576.0, "scoreError": 110.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.InsertClient.insertV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 60.0, "scoreError": 1.2, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 3200.0, "scoreError": 20.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.JDBCQuery.selectJDBCV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 31.0, "scoreError": 0.7, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 6200.0, "scoreError": 36.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/no_alloc/baseline/jmh-results-baseline.json b/.github/scripts/test_data/no_alloc/baseline/jmh-results-baseline.json
new file mode 100644
index 000000000..1358026eb
--- /dev/null
+++ b/.github/scripts/test_data/no_alloc/baseline/jmh-results-baseline.json
@@ -0,0 +1,20 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 20.0, "scoreError": 0.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "p0.50": {"score": 19.5, "scoreError": 0.0, "scoreUnit": "ms/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.InsertClient.insertV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 80.0, "scoreError": 1.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "p0.50": {"score": 79.0, "scoreError": 0.0, "scoreUnit": "ms/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/no_alloc/current/jmh-results-current.json b/.github/scripts/test_data/no_alloc/current/jmh-results-current.json
new file mode 100644
index 000000000..76629e32a
--- /dev/null
+++ b/.github/scripts/test_data/no_alloc/current/jmh-results-current.json
@@ -0,0 +1,20 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 26.0, "scoreError": 0.6, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "p0.50": {"score": 25.0, "scoreError": 0.0, "scoreUnit": "ms/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.InsertClient.insertV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 82.0, "scoreError": 1.4, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "p0.50": {"score": 81.0, "scoreError": 0.0, "scoreUnit": "ms/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/noise_only/baseline/jmh-results-baseline.json b/.github/scripts/test_data/noise_only/baseline/jmh-results-baseline.json
new file mode 100644
index 000000000..ff9ab2a4d
--- /dev/null
+++ b/.github/scripts/test_data/noise_only/baseline/jmh-results-baseline.json
@@ -0,0 +1,20 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 20.0, "scoreError": 0.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 2048.0, "scoreError": 12.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.InsertClient.insertV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 80.0, "scoreError": 1.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 4096.0, "scoreError": 24.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/noise_only/current/jmh-results-current.json b/.github/scripts/test_data/noise_only/current/jmh-results-current.json
new file mode 100644
index 000000000..1b54b5bef
--- /dev/null
+++ b/.github/scripts/test_data/noise_only/current/jmh-results-current.json
@@ -0,0 +1,20 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 20.6, "scoreError": 0.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 2080.0, "scoreError": 12.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.InsertClient.insertV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 78.5, "scoreError": 1.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 4170.0, "scoreError": 24.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/only_in_baseline/baseline/jmh-results-baseline.json b/.github/scripts/test_data/only_in_baseline/baseline/jmh-results-baseline.json
new file mode 100644
index 000000000..32c7d3f81
--- /dev/null
+++ b/.github/scripts/test_data/only_in_baseline/baseline/jmh-results-baseline.json
@@ -0,0 +1,20 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 20.0, "scoreError": 0.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 2048.0, "scoreError": 12.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV0Removed",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 30.0, "scoreError": 0.7, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 3072.0, "scoreError": 18.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/only_in_baseline/current/jmh-results-current.json b/.github/scripts/test_data/only_in_baseline/current/jmh-results-current.json
new file mode 100644
index 000000000..e9611885b
--- /dev/null
+++ b/.github/scripts/test_data/only_in_baseline/current/jmh-results-current.json
@@ -0,0 +1,11 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 20.4, "scoreError": 0.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 2050.0, "scoreError": 12.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/only_in_pr/baseline/jmh-results-baseline.json b/.github/scripts/test_data/only_in_pr/baseline/jmh-results-baseline.json
new file mode 100644
index 000000000..5c589d4c2
--- /dev/null
+++ b/.github/scripts/test_data/only_in_pr/baseline/jmh-results-baseline.json
@@ -0,0 +1,11 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 20.0, "scoreError": 0.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 2048.0, "scoreError": 12.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/.github/scripts/test_data/only_in_pr/current/jmh-results-current.json b/.github/scripts/test_data/only_in_pr/current/jmh-results-current.json
new file mode 100644
index 000000000..3551830f2
--- /dev/null
+++ b/.github/scripts/test_data/only_in_pr/current/jmh-results-current.json
@@ -0,0 +1,20 @@
+[
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV2",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 20.4, "scoreError": 0.5, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 2050.0, "scoreError": 12.0, "scoreUnit": "B/op"}
+ }
+ },
+ {
+ "benchmark": "com.clickhouse.benchmark.clients.QueryClient.queryV3New",
+ "mode": "sample",
+ "params": {"limit": "10000"},
+ "primaryMetric": {"score": 15.0, "scoreError": 0.4, "scoreUnit": "ms/op"},
+ "secondaryMetrics": {
+ "gc.alloc.rate.norm": {"score": 1500.0, "scoreError": 10.0, "scoreUnit": "B/op"}
+ }
+ }
+]
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f2fae7798..c89550cbf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,4 @@
-## 0.9.9
+## 0.10.9
### New Features