From d765dc16510af8496cb4fcb4359abee4051aa33c Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Tue, 5 May 2026 19:07:25 +0200 Subject: [PATCH 1/3] re._compiler: bytearray slice fill in _optimize_charset --- Lib/re/_compiler.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index c2ca8e25abe34d..ec1f9c3788d89e 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -280,8 +280,11 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None): if not hascased: hascased = any(map(iscased, r)) else: - for i in r: - charmap[i] = 1 + end = av[1] + 1 + if end > len(charmap): + # Trigger the IndexError growth path below. + raise IndexError + charmap[av[0]:end] = b'\x01' * (end - av[0]) elif op is NEGATE: out.append((op, av)) elif op is CATEGORY and tail and (CATEGORY, CH_NEGATE[av]) in tail: From e7ca3efb32c45cc1250dbc4c380d87dc9ef17fd7 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Tue, 5 May 2026 20:40:29 +0200 Subject: [PATCH 2/3] blurb --- .../Library/2026-05-05-18-39-52.gh-issue-149427.0ggedQ.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-05-05-18-39-52.gh-issue-149427.0ggedQ.rst diff --git a/Misc/NEWS.d/next/Library/2026-05-05-18-39-52.gh-issue-149427.0ggedQ.rst b/Misc/NEWS.d/next/Library/2026-05-05-18-39-52.gh-issue-149427.0ggedQ.rst new file mode 100644 index 00000000000000..38d6c21f1ef988 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-05-18-39-52.gh-issue-149427.0ggedQ.rst @@ -0,0 +1,3 @@ +Speed up :func:`re.compile` of patterns with character ranges by replacing +the per-byte loop in :mod:`!re._compiler` with a single bytearray slice +fill. From 56e25cd1f854d9397c0bf5791f021a969401d98f Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Tue, 19 May 2026 09:33:46 +0200 Subject: [PATCH 3/3] review comment --- Lib/re/_compiler.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index ec1f9c3788d89e..0c76cef5f600be 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -266,7 +266,9 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None): else: charmap[av] = 1 elif op is RANGE: - r = range(av[0], av[1]+1) + start, end = av + end += 1 + r = range(start, end) if fixup: # IGNORECASE and not LOCALE if fixes: for i in map(fixup, r): @@ -280,11 +282,10 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None): if not hascased: hascased = any(map(iscased, r)) else: - end = av[1] + 1 if end > len(charmap): # Trigger the IndexError growth path below. raise IndexError - charmap[av[0]:end] = b'\x01' * (end - av[0]) + charmap[start:end] = b'\x01' * (end - start) elif op is NEGATE: out.append((op, av)) elif op is CATEGORY and tail and (CATEGORY, CH_NEGATE[av]) in tail: