diff --git a/.drone.jsonnet b/.drone.jsonnet
index 4aaae745..6e443018 100644
--- a/.drone.jsonnet
+++ b/.drone.jsonnet
@@ -1,8 +1,9 @@
 # Copyright 2022, 2023 Peter Dimov
+# Copyright 2025 - 2026 Matt Borland
 # Distributed under the Boost Software License, Version 1.0.
 # https://www.boost.org/LICENSE_1_0.txt
 
-local library = "decimal";
+local library = "int128";
 
 local triggers =
 {
@@ -23,9 +24,8 @@ local linux_pipeline(name, image, environment, packages = "", sources = [], arch
         os: "linux",
         arch: arch
     },
-    clone:
-    {
-        retries: 5,
+    "clone": {
+       "retries": 5
     },
     steps:
     [
@@ -37,7 +37,9 @@ local linux_pipeline(name, image, environment, packages = "", sources = [], arch
             commands:
             [
                 'set -e',
-                'wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -',
+                'echo $DRONE_STAGE_MACHINE',
+                'uname -a',
+                'curl -sSL --retry 5 https://apt.llvm.org/llvm-snapshot.gpg.key | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/llvm-snapshot.gpg',
             ] +
             (if sources != [] then [ ('apt-add-repository "' + source + '"') for source in sources ] else []) +
             (if packages != "" then [ 'apt-get update', 'apt-get -y install ' + packages ] else []) +
@@ -267,6 +269,34 @@ local windows_pipeline(name, image, environment, arch = "amd64") =
         "g++-14-multilib",
     ),
 
+    linux_pipeline(
+        "Linux 26.04 GCC 15 32",
+        "cppalliance/droneubuntu2604:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-15', CXXSTD: '03,11,14,17,20,23', ADDRMD: '32', CXXFLAGS: "-fexcess-precision=fast" },
+        "g++-15-multilib",
+    ),
+
+    linux_pipeline(
+        "Linux 26.04 GCC 15 64",
+        "cppalliance/droneubuntu2604:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-15', CXXSTD: '03,11,14,17,20,23', ADDRMD: '64', CXXFLAGS: "-fexcess-precision=fast" },
+        "g++-15-multilib",
+    ),
+
+    linux_pipeline(
+        "Linux 26.04 GCC 16 32",
+        "cppalliance/droneubuntu2604:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-16', CXXSTD: '03,11,14,17,20,23', ADDRMD: '32', CXXFLAGS: "-fexcess-precision=fast" },
+        "g++-16-multilib",
+    ),
+
+    linux_pipeline(
+        "Linux 26.04 GCC 16 64",
+        "cppalliance/droneubuntu2604:1",
+        { TOOLSET: 'gcc', COMPILER: 'g++-16', CXXSTD: '03,11,14,17,20,23', ADDRMD: '64', CXXFLAGS: "-fexcess-precision=fast" },
+        "g++-16-multilib libabsl-dev",
+    ),
+
     linux_pipeline(
         "Linux 18.04 Clang 5.0",
         "cppalliance/droneubuntu1804:1",
@@ -386,17 +416,27 @@ local windows_pipeline(name, image, environment, arch = "amd64") =
     ),
 
     linux_pipeline(
-        "Linux 24.04 Clang 20 ASAN",
+        "Linux 24.04 Clang 21",
         "cppalliance/droneubuntu2404:1",
-        { TOOLSET: 'clang', COMPILER: 'clang++-20', CXXSTD: '03,11,14,17,20,23,2c' } + asan,
-        "clang-20",
-        ["deb http://apt.llvm.org/noble/ llvm-toolchain-noble-20 main"],
+        { TOOLSET: 'clang', COMPILER: 'clang++-21', CXXSTD: '17,20,2b' },
+        "clang-21",
+        ["deb http://apt.llvm.org/noble/ llvm-toolchain-noble-21 main"],
+    ),
+
+    linux_pipeline(
+        "Linux 24.04 Clang 21 UBSAN",
+        "cppalliance/droneubuntu2404:1",
+        { TOOLSET: 'clang', COMPILER: 'clang++-21', CXXSTD: '17,20,2b' } + ubsan,
+        "clang-21",
+        ["deb http://apt.llvm.org/noble/ llvm-toolchain-noble-21 main"],
     ),
 
-    macos_pipeline(
-        "MacOS Xcode 14.3.1",
-        { TOOLSET: 'clang', COMPILER: 'clang++', CXXSTD: '03,11,14,17,20,2b' } + asan,
-        xcode_version = "14.3.1", osx_version = "sonoma", arch = "arm64",
+    linux_pipeline(
+        "Linux 24.04 Clang 21 ASAN",
+        "cppalliance/droneubuntu2404:1",
+        { TOOLSET: 'clang', COMPILER: 'clang++-21', CXXSTD: '17,20,2b' } + asan,
+        "clang-21",
+        ["deb http://apt.llvm.org/noble/ llvm-toolchain-noble-21 main"],
     ),
 
     windows_pipeline(
diff --git a/doc/modules/ROOT/images/i128_graphs/linux/ARM32_benchmarks.png b/doc/modules/ROOT/images/i128_graphs/linux/ARM32_benchmarks.png
index dc6a7617..c3bcc0cb 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/linux/ARM32_benchmarks.png and b/doc/modules/ROOT/images/i128_graphs/linux/ARM32_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/linux/ARM32_relative_performance.png b/doc/modules/ROOT/images/i128_graphs/linux/ARM32_relative_performance.png
index 83c8c583..f415d0f3 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/linux/ARM32_relative_performance.png and b/doc/modules/ROOT/images/i128_graphs/linux/ARM32_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/linux/ARM64_benchmarks.png b/doc/modules/ROOT/images/i128_graphs/linux/ARM64_benchmarks.png
index 24535136..9763188a 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/linux/ARM64_benchmarks.png and b/doc/modules/ROOT/images/i128_graphs/linux/ARM64_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/linux/ARM64_relative_performance.png b/doc/modules/ROOT/images/i128_graphs/linux/ARM64_relative_performance.png
index 53de90ef..a0850e12 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/linux/ARM64_relative_performance.png and b/doc/modules/ROOT/images/i128_graphs/linux/ARM64_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/linux/ppc64le_benchmarks.png b/doc/modules/ROOT/images/i128_graphs/linux/ppc64le_benchmarks.png
index fcda4936..be27885c 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/linux/ppc64le_benchmarks.png and b/doc/modules/ROOT/images/i128_graphs/linux/ppc64le_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/linux/ppc64le_relative_performance.png b/doc/modules/ROOT/images/i128_graphs/linux/ppc64le_relative_performance.png
index d3882063..9bc0fa08 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/linux/ppc64le_relative_performance.png and b/doc/modules/ROOT/images/i128_graphs/linux/ppc64le_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/linux/s390x_benchmarks.png b/doc/modules/ROOT/images/i128_graphs/linux/s390x_benchmarks.png
index 4c6e4a0b..a4acb9e8 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/linux/s390x_benchmarks.png and b/doc/modules/ROOT/images/i128_graphs/linux/s390x_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/linux/s390x_relative_performance.png b/doc/modules/ROOT/images/i128_graphs/linux/s390x_relative_performance.png
index 6e4afd1f..48b13255 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/linux/s390x_relative_performance.png and b/doc/modules/ROOT/images/i128_graphs/linux/s390x_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/linux/x64_benchmarks.png b/doc/modules/ROOT/images/i128_graphs/linux/x64_benchmarks.png
index 04006510..a617985c 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/linux/x64_benchmarks.png and b/doc/modules/ROOT/images/i128_graphs/linux/x64_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/linux/x64_relative_performance.png b/doc/modules/ROOT/images/i128_graphs/linux/x64_relative_performance.png
index b0c804dc..ff8ea077 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/linux/x64_relative_performance.png and b/doc/modules/ROOT/images/i128_graphs/linux/x64_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/linux/x86_benchmarks.png b/doc/modules/ROOT/images/i128_graphs/linux/x86_benchmarks.png
index 21d31774..ba00ac99 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/linux/x86_benchmarks.png and b/doc/modules/ROOT/images/i128_graphs/linux/x86_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/linux/x86_relative_performance.png b/doc/modules/ROOT/images/i128_graphs/linux/x86_relative_performance.png
index 426e25e2..070f0ec9 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/linux/x86_relative_performance.png and b/doc/modules/ROOT/images/i128_graphs/linux/x86_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/macos/ARM64_benchmarks.png b/doc/modules/ROOT/images/i128_graphs/macos/ARM64_benchmarks.png
index 7c489191..a87c426d 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/macos/ARM64_benchmarks.png and b/doc/modules/ROOT/images/i128_graphs/macos/ARM64_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/macos/ARM64_relative_performance.png b/doc/modules/ROOT/images/i128_graphs/macos/ARM64_relative_performance.png
index 623bb465..19eb0bd6 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/macos/ARM64_relative_performance.png and b/doc/modules/ROOT/images/i128_graphs/macos/ARM64_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/macos/x64_benchmarks.png b/doc/modules/ROOT/images/i128_graphs/macos/x64_benchmarks.png
deleted file mode 100644
index e377a803..00000000
Binary files a/doc/modules/ROOT/images/i128_graphs/macos/x64_benchmarks.png and /dev/null differ
diff --git a/doc/modules/ROOT/images/i128_graphs/macos/x64_relative_performance.png b/doc/modules/ROOT/images/i128_graphs/macos/x64_relative_performance.png
deleted file mode 100644
index f33de2ca..00000000
Binary files a/doc/modules/ROOT/images/i128_graphs/macos/x64_relative_performance.png and /dev/null differ
diff --git a/doc/modules/ROOT/images/i128_graphs/windows/ARM64_benchmarks.png b/doc/modules/ROOT/images/i128_graphs/windows/ARM64_benchmarks.png
index 136cbe9e..1771b3af 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/windows/ARM64_benchmarks.png and b/doc/modules/ROOT/images/i128_graphs/windows/ARM64_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/windows/ARM64_relative_performance.png b/doc/modules/ROOT/images/i128_graphs/windows/ARM64_relative_performance.png
index 02fb8df2..28156d8b 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/windows/ARM64_relative_performance.png and b/doc/modules/ROOT/images/i128_graphs/windows/ARM64_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/windows/x64_benchmarks.png b/doc/modules/ROOT/images/i128_graphs/windows/x64_benchmarks.png
index 811ed34b..d12d4ad9 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/windows/x64_benchmarks.png and b/doc/modules/ROOT/images/i128_graphs/windows/x64_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/windows/x64_relative_performance.png b/doc/modules/ROOT/images/i128_graphs/windows/x64_relative_performance.png
index 6d8d4b7b..44cab7da 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/windows/x64_relative_performance.png and b/doc/modules/ROOT/images/i128_graphs/windows/x64_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/windows/x86_benchmarks.png b/doc/modules/ROOT/images/i128_graphs/windows/x86_benchmarks.png
index f267154e..f6061bff 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/windows/x86_benchmarks.png and b/doc/modules/ROOT/images/i128_graphs/windows/x86_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/i128_graphs/windows/x86_relative_performance.png b/doc/modules/ROOT/images/i128_graphs/windows/x86_relative_performance.png
index aadf4d5f..7b05c54e 100644
Binary files a/doc/modules/ROOT/images/i128_graphs/windows/x86_relative_performance.png and b/doc/modules/ROOT/images/i128_graphs/windows/x86_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/linux/ARM32_benchmarks.png b/doc/modules/ROOT/images/u128_graphs/linux/ARM32_benchmarks.png
index 35b756ef..656c35ad 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/linux/ARM32_benchmarks.png and b/doc/modules/ROOT/images/u128_graphs/linux/ARM32_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/linux/ARM32_relative_performance.png b/doc/modules/ROOT/images/u128_graphs/linux/ARM32_relative_performance.png
index ab77fb2b..43f03412 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/linux/ARM32_relative_performance.png and b/doc/modules/ROOT/images/u128_graphs/linux/ARM32_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/linux/ARM64_benchmarks.png b/doc/modules/ROOT/images/u128_graphs/linux/ARM64_benchmarks.png
index 7144e238..60353279 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/linux/ARM64_benchmarks.png and b/doc/modules/ROOT/images/u128_graphs/linux/ARM64_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/linux/ARM64_relative_performance.png b/doc/modules/ROOT/images/u128_graphs/linux/ARM64_relative_performance.png
index 706a4de8..c86d7035 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/linux/ARM64_relative_performance.png and b/doc/modules/ROOT/images/u128_graphs/linux/ARM64_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/linux/ppc64le_benchmarks.png b/doc/modules/ROOT/images/u128_graphs/linux/ppc64le_benchmarks.png
index ad886c80..a2142d97 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/linux/ppc64le_benchmarks.png and b/doc/modules/ROOT/images/u128_graphs/linux/ppc64le_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/linux/ppc64le_relative_performance.png b/doc/modules/ROOT/images/u128_graphs/linux/ppc64le_relative_performance.png
index b3b87b3b..6950202b 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/linux/ppc64le_relative_performance.png and b/doc/modules/ROOT/images/u128_graphs/linux/ppc64le_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/linux/s390x_benchmarks.png b/doc/modules/ROOT/images/u128_graphs/linux/s390x_benchmarks.png
index e99ab249..1f8be96e 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/linux/s390x_benchmarks.png and b/doc/modules/ROOT/images/u128_graphs/linux/s390x_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/linux/s390x_relative_performance.png b/doc/modules/ROOT/images/u128_graphs/linux/s390x_relative_performance.png
index 08ed1a3c..db03e704 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/linux/s390x_relative_performance.png and b/doc/modules/ROOT/images/u128_graphs/linux/s390x_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/linux/x64_benchmarks.png b/doc/modules/ROOT/images/u128_graphs/linux/x64_benchmarks.png
index 5d6194f9..9dfc748e 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/linux/x64_benchmarks.png and b/doc/modules/ROOT/images/u128_graphs/linux/x64_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/linux/x64_relative_performance.png b/doc/modules/ROOT/images/u128_graphs/linux/x64_relative_performance.png
index ed9cbc71..eb200f34 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/linux/x64_relative_performance.png and b/doc/modules/ROOT/images/u128_graphs/linux/x64_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/linux/x86_benchmarks.png b/doc/modules/ROOT/images/u128_graphs/linux/x86_benchmarks.png
index d3567a5e..93b48c97 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/linux/x86_benchmarks.png and b/doc/modules/ROOT/images/u128_graphs/linux/x86_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/linux/x86_relative_performance.png b/doc/modules/ROOT/images/u128_graphs/linux/x86_relative_performance.png
index 3780492b..5b498e0e 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/linux/x86_relative_performance.png and b/doc/modules/ROOT/images/u128_graphs/linux/x86_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/macos/ARM64_benchmarks.png b/doc/modules/ROOT/images/u128_graphs/macos/ARM64_benchmarks.png
index 989c040f..756dc31a 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/macos/ARM64_benchmarks.png and b/doc/modules/ROOT/images/u128_graphs/macos/ARM64_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/macos/ARM64_relative_performance.png b/doc/modules/ROOT/images/u128_graphs/macos/ARM64_relative_performance.png
index 15f49776..36047908 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/macos/ARM64_relative_performance.png and b/doc/modules/ROOT/images/u128_graphs/macos/ARM64_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/macos/x64_benchmarks.png b/doc/modules/ROOT/images/u128_graphs/macos/x64_benchmarks.png
deleted file mode 100644
index fd5c43e1..00000000
Binary files a/doc/modules/ROOT/images/u128_graphs/macos/x64_benchmarks.png and /dev/null differ
diff --git a/doc/modules/ROOT/images/u128_graphs/macos/x64_relative_performance.png b/doc/modules/ROOT/images/u128_graphs/macos/x64_relative_performance.png
deleted file mode 100644
index f8123403..00000000
Binary files a/doc/modules/ROOT/images/u128_graphs/macos/x64_relative_performance.png and /dev/null differ
diff --git a/doc/modules/ROOT/images/u128_graphs/windows/ARM64_benchmarks.png b/doc/modules/ROOT/images/u128_graphs/windows/ARM64_benchmarks.png
new file mode 100644
index 00000000..0ccdcf58
Binary files /dev/null and b/doc/modules/ROOT/images/u128_graphs/windows/ARM64_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/windows/ARM64_relative_performance.png b/doc/modules/ROOT/images/u128_graphs/windows/ARM64_relative_performance.png
new file mode 100644
index 00000000..75ef018b
Binary files /dev/null and b/doc/modules/ROOT/images/u128_graphs/windows/ARM64_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/windows/arm64_benchmarks.png b/doc/modules/ROOT/images/u128_graphs/windows/arm64_benchmarks.png
deleted file mode 100644
index 7f6b0ff7..00000000
Binary files a/doc/modules/ROOT/images/u128_graphs/windows/arm64_benchmarks.png and /dev/null differ
diff --git a/doc/modules/ROOT/images/u128_graphs/windows/arm64_relative_performance.png b/doc/modules/ROOT/images/u128_graphs/windows/arm64_relative_performance.png
deleted file mode 100644
index 3338a211..00000000
Binary files a/doc/modules/ROOT/images/u128_graphs/windows/arm64_relative_performance.png and /dev/null differ
diff --git a/doc/modules/ROOT/images/u128_graphs/windows/x64_benchmarks.png b/doc/modules/ROOT/images/u128_graphs/windows/x64_benchmarks.png
index 9c6fba5b..aa3d9c30 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/windows/x64_benchmarks.png and b/doc/modules/ROOT/images/u128_graphs/windows/x64_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/windows/x64_relative_performance.png b/doc/modules/ROOT/images/u128_graphs/windows/x64_relative_performance.png
index 514ce6f3..5dc1c090 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/windows/x64_relative_performance.png and b/doc/modules/ROOT/images/u128_graphs/windows/x64_relative_performance.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/windows/x86_benchmarks.png b/doc/modules/ROOT/images/u128_graphs/windows/x86_benchmarks.png
index 1841898b..038ff287 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/windows/x86_benchmarks.png and b/doc/modules/ROOT/images/u128_graphs/windows/x86_benchmarks.png differ
diff --git a/doc/modules/ROOT/images/u128_graphs/windows/x86_relative_performance.png b/doc/modules/ROOT/images/u128_graphs/windows/x86_relative_performance.png
index 9352658a..2446939c 100644
Binary files a/doc/modules/ROOT/images/u128_graphs/windows/x86_relative_performance.png and b/doc/modules/ROOT/images/u128_graphs/windows/x86_relative_performance.png differ
diff --git a/doc/modules/ROOT/nav.adoc b/doc/modules/ROOT/nav.adoc
index f1b1df9b..27492ba9 100644
--- a/doc/modules/ROOT/nav.adoc
+++ b/doc/modules/ROOT/nav.adoc
@@ -7,6 +7,7 @@
 ** xref:examples.adoc#examples_bit[`<bit>` support]
 ** xref:examples.adoc#examples_numeric[`<numeric>` support (Saturating Arithmetic)]
 ** xref:examples.adoc#examples_numeric_algorithms[`<numeric>` support (Numeric Algorithms)]
+** xref:examples.adoc#examples_checked[Checked Arithmetic]
 ** xref:examples.adoc#examples_mixed_sign[Mixed Signedness Arithmetic]
 ** xref:examples.adoc#examples_to_string[String Conversion (to_string)]
 ** xref:examples.adoc#examples_boost_math_random[Boost Math and Random Integration]
@@ -23,10 +24,12 @@
 *** xref:api_reference.adoc#api_cstdlib[`<cstdlib>`]
 *** xref:api_reference.adoc#api_charconv[`<charconv>`]
 *** xref:api_reference.adoc#api_cmath[`<cmath>`]
+*** xref:api_reference.adoc#api_functional[`<functional>`]
 *** xref:api_reference.adoc#api_iostream[`<iostream>`]
 *** xref:api_reference.adoc#api_ios[`<ios>`]
 *** xref:api_reference.adoc#api_numeric[`<numeric>`]
 *** xref:api_reference.adoc#api_string[`<string>`]
+*** xref:api_reference.adoc#api_utilities[Utilities]
 ** xref:api_reference.adoc#api_macros[Macros]
 *** xref:api_reference.adoc#api_macro_literals[Literals]
 *** xref:api_reference.adoc#api_macro_configuration[Configuration]
@@ -53,8 +56,14 @@
 * xref:cstdlib.adoc[]
 * xref:charconv.adoc[]
 * xref:stream.adoc[]
+* xref:hash.adoc[]
 * xref:numeric.adoc[]
 * xref:string.adoc[]
+* xref:utilities.adoc[]
+** xref:utilities.adoc#powm[Modular Exponentiation]
+** xref:utilities.adoc#ipow[Integer Power]
+** xref:utilities.adoc#isqrt[Integer Square Root]
+** xref:utilities.adoc#checked[Checked Arithmetic]
 * Benchmarks
 ** xref:u128_benchmarks.adoc[]
 *** xref:u128_benchmarks.adoc#u128_linux[Linux]
@@ -64,5 +73,6 @@
 *** xref:i128_benchmarks.adoc#i128_linux[Linux]
 *** xref:i128_benchmarks.adoc#i128_windows[Windows]
 *** xref:i128_benchmarks.adoc#i128_mac[macOS]
+* xref:comp_to_multiprecision.adoc[]
 * xref:reference.adoc[]
 * xref:copyright.adoc[]
diff --git a/doc/modules/ROOT/pages/api_reference.adoc b/doc/modules/ROOT/pages/api_reference.adoc
index af870252..4a55dc25 100644
--- a/doc/modules/ROOT/pages/api_reference.adoc
+++ b/doc/modules/ROOT/pages/api_reference.adoc
@@ -72,6 +72,12 @@ https://www.boost.org/LICENSE_1_0.txt
 | https://en.cppreference.com/w/cpp/types/numeric_limits[`std::numeric_limits<int128_t>`]
 | Numeric limits specialization for `int128_t`
 
+| xref:hash.adoc[`std::hash<uint128_t>`]
+| Hash specialization for `uint128_t`
+
+| xref:hash.adoc[`std::hash<int128_t>`]
+| Hash specialization for `int128_t`
+
 | xref:cstdlib.adoc#div_structs[`u128div_t`]
 | Result type for `div(uint128_t, uint128_t)`
 
@@ -164,6 +170,20 @@ Listed by analogous STL header.
 | Computes quotient and remainder simultaneously
 |===
 
+[#api_functional]
+=== xref:hash.adoc[`<functional>`]
+
+[cols="1,2", options="header"]
+|===
+| Specialization | Description
+
+| xref:hash.adoc[`std::hash<uint128_t>`]
+| Enables `uint128_t` as a key in unordered associative containers
+
+| xref:hash.adoc[`std::hash<int128_t>`]
+| Enables `int128_t` as a key in unordered associative containers
+|===
+
 [#api_formatting]
 === xref:format.adoc[Formatting]
 
@@ -257,6 +277,32 @@ Listed by analogous STL header.
 | `std::string` conversion of base-10 values
 |===
 
+[#api_utilities]
+=== xref:utilities.adoc[Utilities]
+
+[cols="1,2", options="header"]
+|===
+| Function | Description
+
+| xref:utilities.adoc#powm[`powm`]
+| Modular exponentiation `(base ^ exp) mod m`
+
+| xref:utilities.adoc#ipow[`ipow`]
+| Integer power `base ^ exp` (wraps modulo `2^128`)
+
+| xref:utilities.adoc#isqrt[`isqrt`]
+| Integer square root `floor(sqrt(n))`
+
+| xref:utilities.adoc#checked[`ckd_add`]
+| Checked addition (C23 `<stdckdint.h>` contract)
+
+| xref:utilities.adoc#checked[`ckd_sub`]
+| Checked subtraction (C23 `<stdckdint.h>` contract)
+
+| xref:utilities.adoc#checked[`ckd_mul`]
+| Checked multiplication (C23 `<stdckdint.h>` contract)
+|===
+
 [#api_macros]
 == Macros
 
@@ -302,12 +348,6 @@ Listed by analogous STL header.
 | xref:config.adoc#no_int128[`BOOST_INT128_NO_BUILTIN_INT128`]
 | Disables use of compiler built-in `__int128`
 
-| xref:config.adoc#sign_compare[`BOOST_INT128_ALLOW_SIGN_COMPARE`]
-| Allows comparison between signed and unsigned types
-
-| xref:config.adoc#sign_conversion[`BOOST_INT128_ALLOW_SIGN_CONVERSION`]
-| Allows implicit sign conversion
-
 | xref:config.adoc#disable_exceptions[`BOOST_INT128_DISABLE_EXCEPTIONS`]
 | Disables exception throwing
 
@@ -362,6 +402,9 @@ Listed by analogous STL header.
 | xref:format.adoc#std_format[`<boost/int128/format.hpp>`]
 | Formatting integration for pass:[C++20] `<format>`
 
+| xref:hash.adoc[`<boost/int128/hash.hpp>`]
+| `std::hash` specializations for `int128_t` and `uint128_t`
+
 | `<boost/int128/int128.hpp>`
 | The xref:uint128_t.adoc[`uint128_t`] and xref:int128_t.adoc[`int128_t`] types
 
@@ -383,4 +426,7 @@ Listed by analogous STL header.
 | `<boost/int128/random.hpp>`
 | Required for usage of Boost.Random
 
+| xref:utilities.adoc[`<boost/int128/utilities.hpp>`]
+| Modular exponentiation and other library-specific utilities
+
 |===
diff --git a/doc/modules/ROOT/pages/comp_to_multiprecision.adoc b/doc/modules/ROOT/pages/comp_to_multiprecision.adoc
new file mode 100644
index 00000000..540f3bcd
--- /dev/null
+++ b/doc/modules/ROOT/pages/comp_to_multiprecision.adoc
@@ -0,0 +1,19 @@
+////
+Copyright 2026 Matt Borland
+Distributed under the Boost Software License, Version 1.0.
+https://www.boost.org/LICENSE_1_0.txt
+////
+
+[#mp]
+= Comparison to Boost.Multiprecision
+
+An easy question to ask is why this is a separate library from Boost.Multiprecision.
+There are several reasons why:
+
+- The goal is for this library to be extremely lightweight.
+The module weight of this library will be 5, whereas https://pdimov.github.io/boostdep-report/develop/module-weights.html[Boost.Multiprecision is 25]
+- Fundamentally, the designs of the types are different.
+In Boost.Multiprecision, `int128` and `uint128` are incident to the arbitrary precision integer `cpp_int`, not a specifically designed type.
+In this library both `int128_t` and `uint128_t` are their own classes with individually implemented operators to maximize performance.
+- In Boost.Multiprecision all types are based on a high-level template `number` to allow interoperability with each other.
+Again, both `int128_t` and `uint128_t` are their own classes, and are designed to work with and act like the built-in integer types.
diff --git a/doc/modules/ROOT/pages/config.adoc b/doc/modules/ROOT/pages/config.adoc
index c9f0b9f8..8a5cdb39 100644
--- a/doc/modules/ROOT/pages/config.adoc
+++ b/doc/modules/ROOT/pages/config.adoc
@@ -24,16 +24,6 @@ Allowed functions have `BOOST_INT128_HOST_DEVICE` as part of their function sign
 [#no_int128]
 - `BOOST_INT128_NO_BUILTIN_INT128`: The user may define this when they do not want the internal implementations to rely on builtin `pass:[__int128]` or `pass:[unsigned __int128]` types.
 
-[#sign_compare]
-- `BOOST_INT128_ALLOW_SIGN_COMPARE`: Allows comparisons between this library's types and built-in types of opposite signedness. Analogous to disabling GCC's `-Wsign-compare` warning.
-
-IMPORTANT: NOT DEFINED BY DEFAULT FOR CORRECTNESS
-
-[#sign_conversion]
-- `BOOST_INT128_ALLOW_SIGN_CONVERSION`: Allows arithmetic operations between this library's types and built-in types of opposite signedness. Analogous to disabling GCC's `-Wsign-conversion` warning. Implies `BOOST_INT128_ALLOW_SIGN_COMPARE`.
-
-IMPORTANT: NOT DEFINED BY DEFAULT FOR CORRECTNESS
-
 [#disable_exceptions]
 - `BOOST_INT128_DISABLE_EXCEPTIONS`: Allows exceptions to be disabled.
 This macro will automatically be defined in the presence of `-fno-exceptions` or similar MSVC flags.
diff --git a/doc/modules/ROOT/pages/examples.adoc b/doc/modules/ROOT/pages/examples.adoc
index 7a8a4693..18620bbc 100644
--- a/doc/modules/ROOT/pages/examples.adoc
+++ b/doc/modules/ROOT/pages/examples.adoc
@@ -28,7 +28,7 @@ From builtin (42U): 42
 From parts (1, 0) = 2^64: 18446744073709551616
 From parts (max, max): 340282366920938463463374607431768211455
   Equals numeric_limits max? true
-From literal "36893488147419103232"_U128: 36893488147419103232
+From literal 12345_U128: 12345
 From BOOST_INT128_UINT128_C(max): 340282366920938463463374607431768211455
 From stringstream: 12345678901234567890123456789
 
@@ -36,13 +36,28 @@ From stringstream: 12345678901234567890123456789
 From builtin (-42): -42
 From parts (INT64_MIN, 0): -170141183460469231731687303715884105728
   Equals numeric_limits min? true
-From literal "-99999999999999999999"_i128: -99999999999999999999
-From literal "99999999999999999999"_I128: 99999999999999999999
+From literal -12345_i128: -12345
+From literal 12345_I128: 12345
+From BOOST_INT128_INT128_C(-99999999999999999999): -99999999999999999999
+From string literal: -99999999999999999999
 From BOOST_INT128_INT128_C(min): -170141183460469231731687303715884105728
 
 === Default and Copy Construction ===
 Default constructed: 0
-Copy constructed: 36893488147419103232
+Copy constructed: 340282366920938463463374607431768211455
+
+=== Floating-Point Construction ===
+uint128_t from 12345.9 (truncated): 12345
+int128_t from -12345.9 (truncated toward zero): -12345
+uint128_t from 2^100: 1267650600228229401496703205376
+
+=== Floating-Point Edge Cases ===
+uint128_t from NaN: 0
+int128_t from NaN: 0
+uint128_t from -1.0 (clamped to zero): 0
+uint128_t from +infinity (saturates to UINT128_MAX): 340282366920938463463374607431768211455
+int128_t from 1e40 (saturates to INT128_MAX): 170141183460469231731687303715884105727
+int128_t from -1e40 (saturates to INT128_MIN): -170141183460469231731687303715884105728
 ----
 ====
 
@@ -263,6 +278,38 @@ midpoint(-100, -50) = -75
 ----
 ====
 
+[#examples_checked]
+== Checked Arithmetic
+
+.This https://github.com/cppalliance/int128/blob/develop/examples/checked_arithmetic.cpp[example] demonstrates checked addition, subtraction, and multiplication following the C23 checked-integer contract
+====
+[source, c++]
+----
+include::example$checked_arithmetic.cpp[]
+----
+
+.Expected Output
+[listing]
+----
+=== Results That Fit ===
+ckd_add(20, 22): overflow=false, result=42
+
+=== Addition Overflow ===
+ckd_add(UINT128_MAX, 1): overflow=true, wrapped=0
+
+=== Subtraction Underflow ===
+ckd_sub(0, 1): overflow=true, wrapped=340282366920938463463374607431768211455
+
+=== Multiplication Overflow ===
+ckd_mul(INT128_MAX, 2): overflow=true, wrapped=-2
+ckd_mul(INT128_MIN, -1): overflow=true, wrapped=-170141183460469231731687303715884105728
+
+=== Mixed Types ===
+ckd_add<int64_t>(uint128_t{5}, int128_t{-3}): overflow=false, result=2
+ckd_mul<uint8_t>(20, 20): overflow=true, wrapped=144
+----
+====
+
 [#examples_mixed_sign]
 == Mixed Signedness Arithmetic
 
diff --git a/doc/modules/ROOT/pages/file_structure.adoc b/doc/modules/ROOT/pages/file_structure.adoc
index 84c6765f..1abb1c4b 100644
--- a/doc/modules/ROOT/pages/file_structure.adoc
+++ b/doc/modules/ROOT/pages/file_structure.adoc
@@ -39,6 +39,9 @@ The entire library can be consumed via `<boost/int128.hpp>`, or by independently
 | xref:format.adoc[`<boost/int128/format.hpp>`]
 | C++20 `std::format` support
 
+| xref:hash.adoc[`<boost/int128/hash.hpp>`]
+| `std::hash` specializations for `int128_t` and `uint128_t`
+
 | `<boost/int128/int128.hpp>`
 | Core type definitions (`uint128_t`, `int128_t`)
 
diff --git a/doc/modules/ROOT/pages/hash.adoc b/doc/modules/ROOT/pages/hash.adoc
new file mode 100644
index 00000000..a03ef14c
--- /dev/null
+++ b/doc/modules/ROOT/pages/hash.adoc
@@ -0,0 +1,69 @@
+////
+Copyright 2026 Matt Borland
+Distributed under the Boost Software License, Version 1.0.
+https://www.boost.org/LICENSE_1_0.txt
+////
+
+[#hash]
+= Hashing
+:idprefix: hash_
+
+The `<boost/int128/hash.hpp>` header provides specializations of `std::hash` for `uint128_t` and `int128_t`, allowing the library types to be used as keys in `std::unordered_map`, `std::unordered_set`, and any other container that relies on `std::hash`.
+
+[source, c++]
+----
+#include <boost/int128/hash.hpp>
+----
+
+[#hash_specializations]
+== Specializations
+
+[source, c++]
+----
+namespace std {
+
+template <>
+struct hash<boost::int128::int128_t>
+{
+    std::size_t operator()(boost::int128::int128_t v) const noexcept;
+};
+
+template <>
+struct hash<boost::int128::uint128_t>
+{
+    std::size_t operator()(boost::int128::uint128_t v) const noexcept;
+};
+
+} // namespace std
+----
+
+Each 64-bit half of the value is first run through a SplitMix64 finalizer so that every input bit influences the lower bits of the result.
+This is necessary because `std::hash<std::uint64_t>` is permitted to truncate to `std::size_t`, which would lose the upper 32 bits on 32-bit platforms and cause distinct 128-bit values to collide.
+The two finalized halves are then combined with the `boost::hash_combine` mixing formula.
+
+[#hash_guarantees]
+== Guarantees
+
+* Two values comparing equal under `operator==` produce the same hash.
+* For any non-zero `v`, `std::hash<int128_t>{}(v) != std::hash<int128_t>{}(-v)`.
+* The mixing function is asymmetric, so `{high, low}` and `{low, high}` do not collide except by chance.
+* The hash value is implementation-defined and may differ across platforms, compilers, or library versions. Do not persist hash values across runs.
+
+[#hash_example]
+== Example
+
+[source, c++]
+----
+#include <boost/int128/int128.hpp>
+#include <boost/int128/hash.hpp>
+#include <unordered_map>
+
+int main()
+{
+    std::unordered_map<boost::int128::uint128_t, int> counts {};
+    counts[boost::int128::uint128_t{1, 0}] = 1;
+    counts[boost::int128::uint128_t{0, 1}] = 2;
+
+    return 0;
+}
+----
diff --git a/doc/modules/ROOT/pages/i128_benchmarks.adoc b/doc/modules/ROOT/pages/i128_benchmarks.adoc
index 915e1be5..af9b1f3c 100644
--- a/doc/modules/ROOT/pages/i128_benchmarks.adoc
+++ b/doc/modules/ROOT/pages/i128_benchmarks.adoc
@@ -24,12 +24,12 @@ On MSVC platforms we use as reference `std::_Signed128` from the header `<__msvc
 |===
 | Operation | `__int128` | `int128_t` | `boost::mp::int128_t` | `absl::int128`
 
-| Comparisons | 879535 | 748787 | 2210502 | 741269
-| Addition | 92165 | 92441 | 283528 | 92323
-| Subtraction | 92514 | 88390 | 668953 | 90394
-| Multiplication | 115727 | 90897 | 312723 | 89558
-| Division | 1234838 | 1352795 | 1320695 | 1200439
-| Modulo | 1193529 | 1256687 | 1287093 | 1293439
+| Comparisons | 2232997 | 1970941 | 5478483 | 1944089
+| Addition | 244246 | 292081 | 650160 | 227720
+| Subtraction | 220957 | 196953 | 1625774 | 315611
+| Multiplication | 433431 | 321168 | 1595688 | 304069
+| Division | 4462364 | 4983165 | 4992819 | 4986970
+| Modulo | 4803576 | 5257406 | 4988844 | 5081814
 |===
 
 ////
@@ -44,12 +44,12 @@ image::i128_graphs/linux/x64_relative_performance.png[x64 Relative Performance,
 |===
 | Operation | `__int128` | `int128_t` | `boost::mp::int128_t` | `absl::int128`
 
-| Comparisons | 3495621 | 2279914 | 5910287 | 3749448
-| Addition | 191514 | 133319 | 566860 | 164848
-| Subtraction | 131380 | 193984 | 1066509 | 193467
-| Multiplication | 236071 | 234594 | 864526 | 237676
-| Division | 2412757 | 2434752 | 2508755 | 2484139
-| Modulo | 2501357 | 2171828 | 2571959 | 2158203
+| Comparisons | 4115337 | 2169531 | 5914108 | 3725321
+| Addition | 194461 | 196244 | 543680 | 195216
+| Subtraction | 151441 | 97565 | 1161677 | 192729
+| Multiplication | 334847 | 232518 | 904461 | 240980
+| Division | 2403064 | 1848517 | 2493904 | 2431322
+| Modulo | 2235322 | 2159401 | 2535438 | 2321638
 |===
 
 ////
@@ -64,12 +64,12 @@ image::i128_graphs/linux/ARM64_relative_performance.png[ARM64 Relative Performan
 |===
 | Operation | `__int128` | `int128_t` | `boost::mp::int128_t` | `absl::int128`
 
-| Comparisons | 14099505 | 12588237 | 21074294 | 13972778
-| Addition | 1151086 | 1374984 | 3303931 | 1195725
-| Subtraction | 1223119 | 753561 | 4224613 | 1295929
-| Multiplication | 1904542 | 2060986 | 3034387 | 1733150
-| Division | 8768877 | 7080113 | 7306287 | 7968543
-| Modulo | 8661233 | 7180650 | 8801605 | 8175497
+| Comparisons | 5171094 | 5069329 | 7457296 | 5343843
+| Addition | 625328 | 785936 | 1286888 | 670826
+| Subtraction | 667538 | 356865 | 2555881 | 741947
+| Multiplication | 904480 | 729911 | 1562062 | 786829
+| Division | 3758577 | 2211087 | 3095993 | 3940264
+| Modulo | 4218409 | 2330114 | 3684163 | 3849849
 |===
 
 ////
@@ -107,12 +107,12 @@ NOTE: This platform has no hardware type so we compare relative to `boost::mp::i
 |===
 | Operation | `int128_t` | `boost::mp::int128_t`
 
-| Comparisons | 9530060 | 12168353
-| Addition | 785799 | 7777469
-| Subtraction  | 778881 | 8214089
-| Multiplication  | 1148024 | 9477355
-| Division  | 10337258 | 22857709
-| Modulo | 10438037 | 14848256
+| Comparisons | 10310201 | 14160000
+| Addition | 786499 | 7379646
+| Subtraction  | 907051 | 7890190
+| Multiplication  | 855780 | 10826565
+| Division  | 10254664 | 24702433
+| Modulo | 10851123 | 17348307
 |===
 
 ////
@@ -152,12 +152,12 @@ image::i128_graphs/linux/ARM32_relative_performance.png[ARM32 Relative Performan
 |===
 | Operation | `std::_Signed128` | `int128_t` | `boost::mp::int128_t`
 
-| Comparisons | 2186843 | 2142626 | 4854983
-| Addition | 186771 | 184598 | 2645943
-| Subtraction | 193660 | 186335 | 2925784
-| Multiplication | 402806 | 117413 | 3887479
-| Division | 1612873 | 2369701 | 6437280
-| Modulo | 1637135 | 2218627 | 6236026
+| Comparisons | 1879694 | 1894168 | 5198915
+| Addition | 141120 | 143877 | 2846799
+| Subtraction | 157649 | 156965 | 3027203
+| Multiplication | 266740 | 138754 | 4080611
+| Division | 1387560 | 1752869 | 6924406
+| Modulo | 1616895 | 1908345 | 6397442
 |===
 ////
 image::i128_graphs/windows/x64_benchmarks.png[x64 Benchmark Results, width=100%]
@@ -171,12 +171,12 @@ image::i128_graphs/windows/x64_relative_performance.png[x64 Relative Performance
 |===
 | Operation | `std::_Signed128` | `int128_t` | `boost::mp::int128_t`
 
-| Comparisons | 911829 | 368104 | 2376802
-| Addition | 33233 | 34001 | 121700
-| Subtraction | 33411 | 34130 | 1488822
-| Multiplication | 117586 | 56324 | 1564799
-| Division | 1127267 | 1500725 | 2808293
-| Modulo | 1287100 | 1548073 | 2997474
+| Comparisons | 991273 | 391918 | 2551137
+| Addition | 34519 | 48953 | 1243326
+| Subtraction | 34184 | 36278 | 1387708
+| Multiplication | 126490 | 36781 | 1632232
+| Division | 1128432 | 1107571 | 2472959
+| Modulo | 1427629 | 1310481 | 2926904
 |===
 ////
 image::i128_graphs/windows/ARM64_benchmarks.png[ARM64 Benchmark Results, width=100%]
@@ -190,12 +190,12 @@ image::i128_graphs/windows/ARM64_relative_performance.png[ARM64 Relative Perform
 |===
 | Operation | `std::_Signed128` | `int128_t` | `boost::mp::int128_t`
 
-| Comparisons | 3187340 | 3046252 | 4269507
-| Addition | 185960 | 189165 | 2488618
-| Subtraction | 979025 | 192609 | 2783600
-| Multiplication | 1896082 | 3569921 | 4908622
-| Division | 5566403 | 4348306 | 6835035
-| Modulo | 4697289 | 4793845 | 6476032
+| Comparisons | 3832024 | 3823023 | 5568151
+| Addition | 232554 | 197092 | 3488510
+| Subtraction | 1198377 | 145823 | 4011233
+| Multiplication | 2921104 | 428925 | 6219931
+| Division | 7174578 | 7189000 | 9748526
+| Modulo | 5528639 | 7028725 | 9205892
 |===
 ////
 image::i128_graphs/windows/x86_benchmarks.png[x86_32 Benchmark Results, width=100%]
@@ -212,12 +212,12 @@ image::i128_graphs/windows/x86_relative_performance.png[x86_32 Relative Performa
 |===
 | Operation | `__int128` | `int128_t` | `boost::mp::int128_t` | `absl::int128`
 
-| Comparisons | 133275 | 131953 | 340555 | 133509
-| Addition | 20203 | 17797 | 169909 | 20208
-| Subtraction | 20203 | 17832 | 172497 | 22199
-| Multiplication | 21496 | 20202 | 78269 | 20364
-| Division | 662767 | 682891 | 969277 | 663602
-| Modulo | 719179 | 692509 | 1026090 | 717897
+| Comparisons | 135259 | 134127 | 340037 | 136845
+| Addition | 20399 | 18575 | 169575 | 20429
+| Subtraction | 20156 | 18983 | 168041 | 20875
+| Multiplication | 20654 | 20860 | 69443 | 20651
+| Division | 668004 | 659823 | 976248 | 660963
+| Modulo | 664356 | 662282 | 1026487 | 665474
 |===
 
 ////
@@ -225,23 +225,3 @@ image::i128_graphs/macos/ARM64_benchmarks.png[ARM64 Benchmark Results, width=100
 ////
 
 image::i128_graphs/macos/ARM64_relative_performance.png[ARM64 Relative Performance, width=100%]
-
-=== x86_64
-
-[cols="1,1,1,1"]
-|===
-| Operation | `__int128` | `int128_t` | `boost::mp::int128_t`
-
-| Comparisons | 1628142 | 1748005 | 4318109
-| Addition | 224648 | 180393 | 925013
-| Subtraction | 212849 | 131062 | 1876834
-| Multiplication | 432205 | 407829 | 651209
-| Division | 3924951 | 2409106 | 3719183
-| Modulo | 3042060 | 2423738 | 4443402
-|===
-
-////
-image::i128_graphs/macos/x64_benchmarks.png[x64 Benchmark Results, width=100%]
-////
-
-image::i128_graphs/macos/x64_relative_performance.png[x64 Relative Performance, width=100%]
diff --git a/doc/modules/ROOT/pages/int128_t.adoc b/doc/modules/ROOT/pages/int128_t.adoc
index 70090981..70f7aa56 100644
--- a/doc/modules/ROOT/pages/int128_t.adoc
+++ b/doc/modules/ROOT/pages/int128_t.adoc
@@ -62,11 +62,16 @@ Otherwise, it is left up to the compiler to decide.
 [#i128_operator_behavior]
 == Operator Behavior
 
-For all the following operators use of unsigned overloads will error from `static_assert` by default.
-This is the library's way of enforcing the behavior of `-Wsign-conversion` and `-Wsign-comparison` in a library type.
-If you want to compare with unsigned types you must define `BOOST_INT128_ALLOW_SIGN_COMPARE`,
-and similarly you must define `BOOST_INT128_ALLOW_SIGN_CONVERSION` for other operations with mixed signedness.
-These will both cast the unsigned integer to a signed integer and then perform the operation.
+All comparison, arithmetic, bitwise, and shift operators are defined between `int128_t` and any built-in integer type, signed or unsigned.
+Their behavior follows the C++ usual arithmetic conversions and is value-identical to the corresponding builtin `__int128` operation.
+Specifically:
+
+* For built-in unsigned types of lesser rank (`uint8_t` through `uint64_t`), `int128_t` is the common type and arithmetic / bitwise operations return `int128_t` with signed semantics.
+* For `unsigned __int128` (same rank), the signed `int128_t` is converted to the unsigned counterpart, the operation is performed unsigned, and arithmetic / bitwise operations return `uint128_t` (the library's wrapper for `unsigned __int128`).
+* For shift operators, the result type follows the LHS: `int128_t << T` and `int128_t >> T` always return `int128_t`, regardless of `T`.
+* All comparison operators return `bool`, with the comparison performed on the operands after conversion to the common type.
+
+See xref:mixed_type_ops.adoc[Mixed Type Operations] for the full set of cross-type signatures and detailed result-type rules.
 
 [#i128_constructors]
 == Constructors
@@ -87,7 +92,7 @@ struct int128_t
     BOOST_INT128_HOST_DEVICE constexpr int128_t& operator=(const int128_t&) noexcept = default;
     BOOST_INT128_HOST_DEVICE constexpr int128_t& operator=(int128_t&&) noexcept = default;
 
-    BOOST_INT128_HOST_DEVICE explicit constexpr int128_t(const uint128_t& v) noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr int128_t(const uint128_t& v) noexcept;
 
     // Construct from integral types
     BOOST_INT128_HOST_DEVICE constexpr int128_t(const std::int64_t hi, const std::uint64_t lo) noexcept;
@@ -107,14 +112,21 @@ struct int128_t
     BOOST_INT128_HOST_DEVICE constexpr int128_t(const detail::builtin_u128 v) noexcept;
 
     #endif // BOOST_INT128_HAS_INT128
+
+    // Construct from floating-point types
+    template <BOOST_INT128_FLOATING_POINT_CONCEPT Float>
+    BOOST_INT128_HOST_DEVICE constexpr int128_t(Float f) noexcept;
 };
 
 } // namespace int128
 } // namespace boost
 ----
 
-All constructors are only defined for integers and are subject to mixed sign limitations discussed xref:int128_t.adoc#i128_operator_behavior[above].
-None are marked `explicit` in order to match the implicit conversion behavior of the built-in integer types.
+None of the constructors are marked `explicit` in order to match the implicit conversion behavior of the built-in integer types.
+Integer constructors are subject to mixed sign limitations discussed xref:int128_t.adoc#i128_operator_behavior[above].
+
+The floating-point constructor truncates toward zero, matching `static_cast<__int128>(f)`.
+Edge cases mirror libgcc's `__fixXfti`: NaN yields zero, values `>= 2^127` saturate to `INT128_MAX`, and values `<= -2^127` saturate to `INT128_MIN`.
 
 [#i128_conversions]
 == Conversions
@@ -129,35 +141,36 @@ struct int128_t
     ...
 
     // Integer conversion operators
-    BOOST_INT128_HOST_DEVICE constexpr operator bool() const noexcept;
+    BOOST_INT128_HOST_DEVICE explicit constexpr operator bool() const noexcept;
 
     template <BOOST_INT128_SIGNED_INTEGER_CONCEPT SignedInteger>
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator SignedInteger() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator SignedInteger() const noexcept;
 
     template <BOOST_INT128_UNSIGNED_INTEGER_CONCEPT UnsignedInteger>
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator UnsignedInteger() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator UnsignedInteger() const noexcept;
 
     #ifdef BOOST_INT128_HAS_INT128
 
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator detail::builtin_i128() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator detail::builtin_i128() const noexcept;
 
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator detail::builtin_u128() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator detail::builtin_u128() const noexcept;
 
     #endif // BOOST_INT128_HAS_INT128
 
-    // Conversion to float
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator float() const noexcept;
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept;
-    explicit constexpr operator long double() const noexcept; // There are no long doubles on device
+    // Conversion to floating point
+    BOOST_INT128_HOST_DEVICE constexpr operator float() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator double() const noexcept;
+    constexpr operator long double() const noexcept; // There are no long doubles on device
 };
 
 } // namespace int128
 } // namespace boost
 ----
 
+All conversion operators except `operator bool()` are implicit to match the behavior of built-in integer types.
+`operator bool()` is explicit so that an `int128_t` cannot accidentally bind to a `bool` parameter; contextual conversions (`if (x)`, `!x`, etc.) still work.
 Conversions to unsigned integers are subject to mixed sign limitations discussed xref:int128_t.adoc#i128_operator_behavior[above].
-Conversion to `bool` is not marked explicit to match the behavior of built-in integer types.
-Conversions to floating point types may not be lossless depending on the value of the `int128_t` at time of conversion,
+Conversions to floating-point types may not be lossless depending on the value of the `int128_t` at time of conversion,
 as the number of digits it represents can exceed the precision of the significand in floating point types.
 
 [#i128_comparison_operators]
diff --git a/doc/modules/ROOT/pages/literals.adoc b/doc/modules/ROOT/pages/literals.adoc
index bb4a5a0f..979c5644 100644
--- a/doc/modules/ROOT/pages/literals.adoc
+++ b/doc/modules/ROOT/pages/literals.adoc
@@ -26,10 +26,6 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_u128(const char* str, s
 
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_U128(const char* str, std::size_t len) noexcept;
 
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_u128(unsigned long long v) noexcept;
-
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_U128(unsigned long long v) noexcept;
-
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_i128(const char* str) noexcept;
 
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_I128(const char* str) noexcept;
@@ -38,10 +34,6 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_i128(const char* str, st
 
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_I128(const char* str, std::size_t len) noexcept;
 
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_i128(unsigned long long v) noexcept;
-
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_I128(unsigned long long v) noexcept;
-
 } // namespace literals
 } // namespace int128
 } // namespace boost
@@ -54,3 +46,22 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_I128(unsigned long long
 The macros at the end allow you to write out a 128-bit number like you would with say `UINT64_C` without having to add quotes.
 
 See the xref:examples.adoc#examples_construction[construction examples] for usage demonstrations of both literals and macros.
+
+== Design Rationale
+
+All of the user-defined literals provided by the library are string-form: the operator receives a `const char*` and parses the digit sequence via `from_chars`.
+This holds even for raw numeric tokens like `12345_U128`, which the compiler forwards to the operator as the string `"12345"`.
+The choice is intentional.
+A 128-bit value cannot be represented by `unsigned long long`, so any literal whose magnitude exceeds 2^64 must go through a string-based parse.
+Providing only the string form means there is a single overload that handles every magnitude uniformly, rather than a numeric form for small values and a string form for large ones with a hard cutoff at 2^64.
+The API is the same regardless of how large the value is.
+The trade-off is that every literal pays the cost of parsing, even when the value would fit in a builtin integer.
+For values smaller than 2^64, prefer the constructor:
+
+[source, c++]
+----
+constexpr uint128_t small {42U};            // direct conversion from a builtin
+const auto small_literal {42_U128};         // parses "42" via from_chars
+----
+
+The two produce the same value, but the constructor avoids the parse and should be used in hot paths or in code where many small constants are constructed.
diff --git a/doc/modules/ROOT/pages/mixed_type_ops.adoc b/doc/modules/ROOT/pages/mixed_type_ops.adoc
index 38d2e356..8961ecdb 100644
--- a/doc/modules/ROOT/pages/mixed_type_ops.adoc
+++ b/doc/modules/ROOT/pages/mixed_type_ops.adoc
@@ -12,16 +12,47 @@ https://www.boost.org/LICENSE_1_0.txt
 
 The ability to convert between the two types via `static_cast` is available as documented in the above class descriptions.
 
-== Comparisons and Arithmetics
+== Operator Overloads Across Types
 
-The following operations are *disabled by default*.
-Since we cannot enforce `-Wsign-conversion` and `-Wsign-compare` through the compiler, we instead `static_assert` that the operation is unavailable.
-This removes a common source of error (search "Sign Conversion" on Stack Overflow).
+All comparison, arithmetic, bitwise, and shift operators are provided across:
 
-To enable these operations, define the appropriate configuration macros before including any library headers:
+* `int128_t` and `uint128_t` (cross-type),
+* `int128_t` / `uint128_t` and any built-in integer type (signed or unsigned, including the compiler's 128-bit `pass:[__int128]` / `pass:[unsigned __int128]` where supported).
 
-* xref:config.adoc#sign_compare[`BOOST_INT128_ALLOW_SIGN_COMPARE`] - enables comparison operators between signed and unsigned types
-* xref:config.adoc#sign_conversion[`BOOST_INT128_ALLOW_SIGN_CONVERSION`] - enables arithmetic operators between signed and unsigned types (implies `BOOST_INT128_ALLOW_SIGN_COMPARE`)
+The behavior and return type of every mixed-sign overload follow the C++ usual arithmetic conversions, identical to what the equivalent built-in `pass:[__int128]` / `pass:[unsigned __int128]` operation would produce, including two's-complement wrap-around semantics.
+
+=== Result Type Rules
+
+[cols="3,2,3", options="header"]
+|===
+| Operands | Common type | Result of arithmetic / bitwise
+
+| `int128_t` and `uint128_t` (same rank, mixed sign)
+| `uint128_t`
+| `uint128_t`
+
+| `int128_t` and `pass:[unsigned __int128]` (same rank, mixed sign)
+| `uint128_t`
+| `uint128_t`
+
+| `uint128_t` and `pass:[__int128]` (same rank, mixed sign)
+| `uint128_t`
+| `uint128_t`
+
+| `int128_t` and a small unsigned built-in (`uint8_t` to `uint64_t`)
+| `int128_t`
+| `int128_t`
+
+| `uint128_t` and a small signed built-in (`int8_t` to `int64_t`)
+| `uint128_t`
+| `uint128_t`
+|===
+
+For shift operators (`pass:[<<]`, `pass:[>>]`), the result type follows the LHS type regardless of the RHS, matching the built-in shift rules.
+
+For comparison operators (`pass:[==]`, `pass:[!=]`, `pass:[<]`, `pass:[<=]`, `pass:[>]`, `pass:[>=]`), the return type is always `bool` and the comparison is performed on the operands after they have been converted to the common type above.
+
+=== Cross-type Operator Signatures
 
 [source, c++]
 ----
@@ -33,27 +64,16 @@ namespace int128 {
 //=====================================
 
 BOOST_INT128_HOST_DEVICE constexpr bool operator==(uint128_t lhs, int128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr bool operator==(int128_t lhs, uint128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr bool operator!=(uint128_t lhs, int128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr bool operator!=(int128_t lhs, uint128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr bool operator<(uint128_t lhs, int128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr bool operator<(int128_t lhs, uint128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr bool operator<=(uint128_t lhs, int128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr bool operator<=(int128_t lhs, uint128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr bool operator>(uint128_t lhs, int128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr bool operator>(int128_t lhs, uint128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr bool operator>=(uint128_t lhs, int128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr bool operator>=(int128_t lhs, uint128_t rhs);
 
 //=====================================
@@ -61,165 +81,45 @@ BOOST_INT128_HOST_DEVICE constexpr bool operator>=(int128_t lhs, uint128_t rhs);
 //=====================================
 
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(uint128_t lhs, int128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(int128_t lhs, uint128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(uint128_t lhs, int128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(int128_t lhs, uint128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(uint128_t lhs, int128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(int128_t lhs, uint128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(uint128_t lhs, int128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(int128_t lhs, uint128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(uint128_t lhs, int128_t rhs);
-
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(int128_t lhs, uint128_t rhs);
 
-} // namespace int128
-} // namespace boost
-
-----
-
-== Comparisons
-
-If you define xref:config.adoc#sign_compare[`BOOST_INT128_ALLOW_SIGN_COMPARE`], the operators have the following behavior.
-
-=== Equality
-
-[source, c++]
-----
-BOOST_INT128_HOST_DEVICE constexpr bool operator==(uint128_t lhs, int128_t rhs);
-
-BOOST_INT128_HOST_DEVICE constexpr bool operator==(int128_t lhs, uint128_t rhs);
-----
-
-If the `int128_t` argument is less than 0 returns `false`.
-Otherwise, returns the same as `static_cast<uint128_t>(lhs) == static_cast<uint128_t>(rhs)`.
-
-=== Inequality
-
-[source, c++]
-----
-BOOST_INT128_HOST_DEVICE constexpr bool operator!=(uint128_t lhs, int128_t rhs);
-
-BOOST_INT128_HOST_DEVICE constexpr bool operator!=(int128_t lhs, uint128_t rhs);
-----
-
-If the `int128_t` argument is less than 0 returns `true`.
-Otherwise, returns the same as `static_cast<uint128_t>(lhs) != static_cast<uint128_t>(rhs)`.
-
-=== Less Than
-
-[source, c++]
-----
-BOOST_INT128_HOST_DEVICE constexpr bool operator<(uint128_t lhs, int128_t rhs);
-
-BOOST_INT128_HOST_DEVICE constexpr bool operator<(int128_t lhs, uint128_t rhs);
-----
-
-If `lhs` is type `int128_t` returns `true` if `lhs < 0`
-If `rhs` is type `int128_t` returns `false` if `rhs < 0`
-Otherwise, returns the same as `static_cast<uint128_t>(lhs) < static_cast<uint128_t>(rhs)`.
-
-=== Less Than or Equal To
-
-[source, c++]
-----
-BOOST_INT128_HOST_DEVICE constexpr bool operator<=(uint128_t lhs, int128_t rhs);
-
-BOOST_INT128_HOST_DEVICE constexpr bool operator<=(int128_t lhs, uint128_t rhs);
-----
-
-If `lhs` is type `int128_t` returns `true` if `lhs < 0`
-If `rhs` is type `int128_t` returns `false` if `rhs < 0`
-Otherwise, returns the same as `static_cast<uint128_t>(lhs) pass:[<=] static_cast<uint128_t>(rhs)`.
-
-=== Greater Than
-
-[source, c++]
-----
-BOOST_INT128_HOST_DEVICE constexpr bool operator>(uint128_t lhs, int128_t rhs);
-
-BOOST_INT128_HOST_DEVICE constexpr bool operator>(int128_t lhs, uint128_t rhs);
-----
-
-If `lhs` is type `int128_t` returns `false` if `lhs < 0`
-If `rhs` is type `int128_t` returns `true` if `rhs < 0`
-Otherwise, returns the same as `static_cast<uint128_t>(lhs) > static_cast<uint128_t>(rhs)`.
-
-=== Greater Than or Equal To
-
-[source, c++]
-----
-BOOST_INT128_HOST_DEVICE constexpr bool operator>=(uint128_t lhs, int128_t rhs);
-
-BOOST_INT128_HOST_DEVICE constexpr bool operator>=(int128_t lhs, uint128_t rhs);
-----
-
-If `lhs` is type `int128_t` returns `false` if `lhs < 0`
-If `rhs` is type `int128_t` returns `true` if `rhs < 0`
-Otherwise, returns the same as `static_cast<uint128_t>(lhs) pass:[>=] static_cast<uint128_t>(rhs)`.
-
-== Arithmetic
-
-If you define xref:config.adoc#sign_conversion[`BOOST_INT128_ALLOW_SIGN_CONVERSION`], the operators have the following behavior.
-
-=== Addition
-
-[source, c++]
-----
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(uint128_t lhs, int128_t rhs);
-
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(int128_t lhs, uint128_t rhs);
-----
-
-Returns the same as `static_cast<uint128_t>(lhs) + static_cast<uint128_t>(rhs)`
-
-=== Subtraction
-
-[source, c++]
-----
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(uint128_t lhs, int128_t rhs);
-
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(int128_t lhs, uint128_t rhs);
-----
-
-Returns the same as `static_cast<uint128_t>(lhs) - static_cast<uint128_t>(rhs)`
-
-=== Multiplication
-
-[source, c++]
-----
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(uint128_t lhs, int128_t rhs);
-
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(int128_t lhs, uint128_t rhs);
-----
+//=====================================
+// Bitwise Operators
+//=====================================
 
-Returns the same as `static_cast<uint128_t>(lhs) * static_cast<uint128_t>(rhs)`
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(uint128_t lhs, int128_t rhs);
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(int128_t lhs, uint128_t rhs);
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(uint128_t lhs, int128_t rhs);
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(int128_t lhs, uint128_t rhs);
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(uint128_t lhs, int128_t rhs);
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(int128_t lhs, uint128_t rhs);
 
-=== Division
+//=====================================
+// Shift Operators
+//=====================================
+// Result type follows the LHS.
 
-[source, c++]
-----
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(uint128_t lhs, int128_t rhs);
+BOOST_INT128_HOST_DEVICE constexpr int128_t  operator<<(int128_t  lhs, uint128_t rhs);
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator<<(uint128_t lhs, int128_t  rhs);
+BOOST_INT128_HOST_DEVICE constexpr int128_t  operator>>(int128_t  lhs, uint128_t rhs);
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator>>(uint128_t lhs, int128_t  rhs);
 
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(int128_t lhs, uint128_t rhs);
+} // namespace int128
+} // namespace boost
 ----
 
-Returns the same as `static_cast<uint128_t>(lhs) / static_cast<uint128_t>(rhs)`
-
-=== Modulo
-
-[source, c++]
-----
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(uint128_t lhs, int128_t rhs);
+The cross-type arithmetic and bitwise operators return the same value as `static_cast<uint128_t>(lhs) op static_cast<uint128_t>(rhs)`.
+The comparison operators return the same value as that expression compared with the matching unsigned operator.
 
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(int128_t lhs, uint128_t rhs);
-----
+=== Operations with built-in `pass:[__int128]` / `pass:[unsigned __int128]`
 
-Returns the same as `static_cast<uint128_t>(lhs) % static_cast<uint128_t>(rhs)`
+When the compiler provides 128-bit built-in integer types, all of the operators above are also available between a library type and the built-in type of opposite signedness (e.g. `uint128_t op pass:[__int128]`, `pass:[unsigned __int128] op int128_t`).
+The result type follows the same rules as the table above (`uint128_t` for arithmetic and bitwise; the LHS type for shifts; `bool` for comparisons), and the produced value is identical to what an all-built-in computation would yield.
diff --git a/doc/modules/ROOT/pages/overview.adoc b/doc/modules/ROOT/pages/overview.adoc
index bf37e313..eb785908 100644
--- a/doc/modules/ROOT/pages/overview.adoc
+++ b/doc/modules/ROOT/pages/overview.adoc
@@ -29,9 +29,9 @@ The types provided by the library also natively support running on GPUs using CU
 
 == Use Cases
 
-* **Networking** — IPv6 addresses are 128 bits wide; a single integer makes masking, comparison, and arithmetic straightforward.
-* **Unique identifiers** — UUIDs / GUIDs are 128-bit values commonly used as database keys and distributed system identifiers.
-* **Scientific and Financial computing** — Extended-range accumulators, large combinatorial values, and algorithms that need overflow-free 64×64 multiplication.
+* Networking: IPv6 addresses are 128 bits wide; a single integer makes masking, comparison, and arithmetic straightforward.
+* Unique identifiers: UUIDs / GUIDs are 128-bit values commonly used as database keys and distributed system identifiers.
+* Scientific and Financial computing: Extended-range accumulators, large combinatorial values, and algorithms that need overflow-free 64x64 multiplication.
 
 == Supported Compilers
 
diff --git a/doc/modules/ROOT/pages/u128_benchmarks.adoc b/doc/modules/ROOT/pages/u128_benchmarks.adoc
index 88f9a03b..ff2e0089 100644
--- a/doc/modules/ROOT/pages/u128_benchmarks.adoc
+++ b/doc/modules/ROOT/pages/u128_benchmarks.adoc
@@ -24,12 +24,12 @@ On MSVC platforms we use as reference `std::_Unsigned128` from the header `<__ms
 |===
 | Operation | `unsigned __int128` | `uint128_t` | `boost::mp::uint128_t` | `absl::uint128`
 
-| Comparisons | 785130 | 765065 | 1363581 | 766205
-| Addition | 90260 | 85758 | 89958 | 89255
-| Subtraction | 91143 | 91449 | 91224 | 89716
-| Multiplication | 111803 | 90069 | 113559 | 89660
-| Division | 1058435 | 901516 | 1040071 | 1044710
-| Modulo | 1003366 | 830830 | 1001701 | 978533
+| Comparisons | 2555576 | 2404372 | 3576079 | 2099066
+| Addition | 242772 | 241336 | 328546 | 301186
+| Subtraction | 372481 | 260064 | 287267 | 282908
+| Multiplication | 356366 | 312736 | 326328 | 277284
+| Division | 4481403 | 4498211 | 4602586 | 4290212
+| Modulo | 3965562 | 4506879 | 4487023 | 4247367
 |===
 
 ////
@@ -44,12 +44,12 @@ image::u128_graphs/linux/x64_relative_performance.png[x64 Relative Performance,
 |===
 | Operation | `unsigned __int128` | `uint128_t` | `boost::mp::uint128_t` | `absl::uint128`
 
-| Comparisons | 3427201 | 2078586 | 5026689 | 3753922
-| Addition | 194968 | 159662 | 587373 | 194070
-| Subtraction | 193067 | 161903 | 330052 | 140777
-| Multiplication | 263187 | 201333 | 972009 | 244420
-| Division | 2338258 | 2247175 | 2190856 | 2223032
-| Modulo | 2260200 | 2097760 | 2227961 | 2186750
+| Comparisons | 4077924 | 2335044 | 5360167 | 4184235
+| Addition | 137276 | 151553 | 184406 | 151276
+| Subtraction | 155498 | 133470 | 186793 | 149111
+| Multiplication | 218009 | 233811 | 324341 | 293431
+| Division | 2254781 | 1819447 | 2211225 | 2152312
+| Modulo | 2274294 | 1743274 | 2324356 | 2381378
 |===
 
 ////
@@ -64,12 +64,12 @@ image::u128_graphs/linux/ARM64_relative_performance.png[ARM64 Relative Performan
 |===
 | Operation | `unsigned __int128` | `uint128_t` | `boost::mp::uint128_t` | `absl::uint128`
 
-| Comparisons | 6803419 | 6280326 | 7965082 | 10515929
-| Addition | 546801 | 618774 | 621572 | 1744226
-| Subtraction | 590011 | 359100 | 691515 | 1527622
-| Multiplication | 891753 | 1192196 | 944289 | 1839038
-| Division | 3827125 | 3201674 | 3997037 | 4913142
-| Modulo | 4925696 | 3360251 | 5144403 | 5422155
+| Comparisons | 7293935 | 6198402 | 8182815 | 13820009
+| Addition | 636224 | 707436 | 611849 | 1530136
+| Subtraction | 572225 | 350035 | 595266 | 1211168
+| Multiplication | 1040424 | 741789 | 899957 | 1843000
+| Division | 4191637 | 2593472 | 4106663 | 4883553
+| Modulo | 4156643 | 2133029 | 4398856 | 5011442
 |===
 
 ////
@@ -107,12 +107,12 @@ NOTE: This platform has no hardware type so we compare relative to `boost::mp::u
 |===
 | Operation | `uint128_t` | `boost::mp::uint128_t`
 
-| Comparisons | 9000979 | 8722814
-| Addition | 898718 | 9912175
-| Subtraction  | 778881 | 9773677
-| Multiplication  | 1778273 | 8678420
-| Division  | 8496503 | 18133965
-| Modulo | 9081442 | 11257837
+| Comparisons | 9545542 | 8582001
+| Addition | 686648 | 7261481
+| Subtraction  | 618456 | 7968678
+| Multiplication  | 859253 | 6746697
+| Division  | 8271920 | 15931092
+| Modulo | 9932867 | 10242720
 |===
 
 ////
@@ -152,12 +152,12 @@ image::u128_graphs/linux/ARM32_relative_performance.png[ARM32 Relative Performan
 |===
 | Operation | `std::_Unsigned128` | `uint128_t` | `boost::mp::uint128_t`
 
-| Comparisons | 2060556 | 1921174 | 3009890
-| Addition | 261475 | 106545 | 2710279
-| Subtraction | 178724 | 124181 | 3059187
-| Multiplication | 146063 | 136115 | 3495634
-| Division | 1332838 | 1360295 | 4852899
-| Modulo | 1465138 | 1471169 | 3926336
+| Comparisons | 2055229 | 1714007 | 2490543
+| Addition | 152603 | 116444 | 2596037
+| Subtraction | 150576 | 116367 | 2901567
+| Multiplication | 131223 | 123694 | 3300491
+| Division | 1476783 | 1489919 | 4898388
+| Modulo | 1421066 | 1411521 | 3793762
 |===
 ////
 image::u128_graphs/windows/x64_benchmarks.png[x64 Benchmark Results, width=100%]
@@ -171,18 +171,18 @@ image::u128_graphs/windows/x64_relative_performance.png[x64 Relative Performance
 |===
 | Operation | `std::_Unsigned128` | `uint128_t` | `boost::mp::uint128_t`
 
-| Comparisons | 3424403 | 2062167 | 5026689
-| Addition | 123659 | 133084 | 587373
-| Subtraction | 171721 | 99453 | 330052
-| Multiplication | 329287 | 283443 | 972009
-| Division | 2044821 | 1825020 | 2190856
-| Modulo | 2176318 | 1897933 | 2227961
+| Comparisons | 945196 | 405891 | 1306884
+| Addition | 37403 | 40039 | 1351728
+| Subtraction | 33927 | 38887 | 1594845
+| Multiplication | 74384 | 46406 | 1281286
+| Division | 992963 | 790846 | 2035065
+| Modulo | 1087702 | 861121 | 1702396
 |===
 ////
-image::u128_graphs/windows/arm64_benchmarks.png[ARM64 Benchmark Results, width=100%]
+image::u128_graphs/windows/ARM64_benchmarks.png[ARM64 Benchmark Results, width=100%]
 ////
 
-image::u128_graphs/windows/arm64_relative_performance.png[ARM64 Relative Performance, width=100%]
+image::u128_graphs/windows/ARM64_relative_performance.png[ARM64 Relative Performance, width=100%]
 
 === x86_32
 
@@ -190,12 +190,12 @@ image::u128_graphs/windows/arm64_relative_performance.png[ARM64 Relative Perform
 |===
 | Operation | `std::_Unsigned128` | `uint128_t` | `boost::mp::uint128_t`
 
-| Comparisons | 4215438 | 3883846 | 2852442
-| Addition | 199945 | 208436 | 3242910
-| Subtraction | 1206168 | 210874 | 3851129
-| Multiplication | 2282869 | 2680359 | 5378001
-| Division | 5516964 | 4328917 | 6948267
-| Modulo | 4551146 | 4330152 | 6294325
+| Comparisons | 4806287 | 3940703 | 2624013
+| Addition | 254275 | 202421 | 2961566
+| Subtraction | 1322877 | 207351 | 3703369
+| Multiplication | 2327500 | 2312040 | 4375417
+| Division | 5596877 | 5629510 | 6756883
+| Modulo | 4616488 | 5696116 | 6409969
 |===
 ////
 image::u128_graphs/windows/x86_benchmarks.png[x86_32 Benchmark Results, width=100%]
@@ -212,12 +212,12 @@ image::u128_graphs/windows/x86_relative_performance.png[x86_32 Relative Performa
 |===
 | Operation | `unsigned __int128` | `uint128_t` | `boost::mp::uint128_t` | `absl::uint128`
 
-| Comparisons | 131902 | 133564 | 134182 | 132366
-| Addition | 20613 | 17912 | 40176 | 20178
-| Subtraction | 20484 | 18237 | 40311 | 20207
-| Multiplication | 20160 | 20580 | 43285 | 20049
-| Division | 686521 | 699201 | 945928 | 672398
-| Modulo | 777084 | 724648 | 953117 | 734229
+| Comparisons | 134425 | 134742 | 133107 | 135182
+| Addition | 20754 | 18389 | 20653 | 20929
+| Subtraction | 20552 | 18573 | 20590 | 20439
+| Multiplication | 20264 | 20150 | 20181 | 20228
+| Division | 685358 | 740877 | 913877 | 718985
+| Modulo | 733080 | 699666 | 951657 | 719500
 |===
 
 ////
@@ -225,23 +225,3 @@ image::u128_graphs/macos/ARM64_benchmarks.png[ARM64 Benchmark Results, width=100
 ////
 
 image::u128_graphs/macos/ARM64_relative_performance.png[ARM64 Relative Performance, width=100%]
-
-=== x86_64
-
-[cols="1,1,1,1"]
-|===
-| Operation | `unsigned __int128` | `uint128_t` | `boost::mp::uint128_t`
-
-| Comparisons | 688225 | 712352 | 689146
-| Addition | 104921 | 124992 | 137819
-| Subtraction | 129150 | 102302 | 153484
-| Multiplication | 120363 | 119652 | 164100
-| Division | 2333812 | 1981469 | 2784139
-| Modulo | 2621949 | 2219481 | 2736682
-|===
-
-////
-image::u128_graphs/macos/x64_benchmarks.png[x64 Benchmark Results, width=100%]
-////
-
-image::u128_graphs/macos/x64_relative_performance.png[x64 Relative Performance, width=100%]
diff --git a/doc/modules/ROOT/pages/uint128_t.adoc b/doc/modules/ROOT/pages/uint128_t.adoc
index 88802b2c..bd5e68c4 100644
--- a/doc/modules/ROOT/pages/uint128_t.adoc
+++ b/doc/modules/ROOT/pages/uint128_t.adoc
@@ -62,50 +62,17 @@ Otherwise, it is left up to the compiler to decide.
 [#u128_operator_behavior]
 == Operator Behavior
 
-For all the following operators use of signed overloads will error from `static_assert` by default.
-This is the library's way of enforcing the behavior of `-Wsign-conversion` and `-Wsign-comparison` in a library type.
-If you want to compare with signed types you must define `BOOST_INT128_ALLOW_SIGN_COMPARE`,
-and similarly you must define `BOOST_INT128_ALLOW_SIGN_CONVERSION` for other operations with mixed signedness.
-These will both cast the signed integer to an unsigned integer and then perform the operation.
+All comparison, arithmetic, bitwise, and shift operators are defined between `uint128_t` and any built-in integer type, signed or unsigned.
+Their behavior follows the C++ usual arithmetic conversions and is value-identical to the corresponding builtin `unsigned __int128` operation.
+Specifically:
 
-=== Sign Compare Behavior Deviation
+* The signed operand (whether a small `intN_t` or `__int128`) is converted to `uint128_t` (sign-extended for negatives), and the operation is performed unsigned with two's-complement wrap-around semantics. Arithmetic and bitwise operations return `uint128_t`.
+* For shift operators, the result type follows the LHS: `uint128_t << T` and `uint128_t >> T` always return `uint128_t`, regardless of `T`.
+* All comparison operators return `bool`, with the comparison performed on the operands after conversion to the common type.
 
-The behavior of `uint128_t` will deviate from the behavior of builtin unsigned integers with mixed sign comparisons in hopefully a less surprising way.
-A built-in sign compare looks something like
+For example, `uint128_t{5} > -1` returns `false` because the signed `-1` converts to a value greater than `5` under unsigned 128-bit arithmetic, exactly as `(unsigned __int128){5} > -1` would.
 
-[source, c++]
-----
-template <typename UnsignedInteger, typename SignedInteger>
-constexpr bool operator>(const UnsignedInteger lhs, const SignedInteger rhs)
-{
-    return lhs > static_cast<UnsignedInteger>(rhs)
-}
-----
-
-If you were to call this function with arguments 5U and -5, you would get the surprising answer of `false`.
-Why?
-The two's complement representation of -5 has its most significant bit set (along with many other high bits).
-When cast to unsigned, this bit pattern is reinterpreted as a huge positive number, far greater than 5.
-
-With `uint128_t` we have checks even in this case like so:
-
-[source, c++]
-----
-template <typename SignedInteger>
-constexpr bool operator>(const uint128_t lhs, const SignedInteger rhs) noexcept
-{
-    if (rhs >= 0)
-    {
-        return lhs > static_cast<uint128_t>(rhs);
-    }
-    else
-    {
-        return true;
-    }
-}
-----
-
-This allows the library to return the correct answer even when mixing signs.
+See xref:mixed_type_ops.adoc[Mixed Type Operations] for the full set of cross-type signatures and detailed result-type rules.
 
 [#u128_constructors]
 == Constructors
@@ -126,7 +93,7 @@ struct uint128_t
     BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator=(const uint128_t&) noexcept = default;
     BOOST_INT128_HOST_DEVICE constexpr uint128_t& operator=(uint128_t&&) noexcept = default;
 
-    BOOST_INT128_HOST_DEVICE explicit constexpr uint128_t(const int128_t& v) noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr uint128_t(const int128_t& v) noexcept;
 
     // Construct from integral types
     BOOST_INT128_HOST_DEVICE constexpr uint128_t(const std::uint64_t hi, const std::uint64_t lo) noexcept;
@@ -146,14 +113,21 @@ struct uint128_t
     BOOST_INT128_HOST_DEVICE constexpr uint128_t(const detail::builtin_u128 v) noexcept;
 
     #endif // BOOST_INT128_HAS_INT128
+
+    // Construct from floating-point types
+    template <BOOST_INT128_FLOATING_POINT_CONCEPT Float>
+    BOOST_INT128_HOST_DEVICE constexpr uint128_t(Float f) noexcept;
 };
 
 } // namespace int128
 } // namespace boost
 ----
 
-All constructors are only defined for integers and are subject to mixed sign limitations discussed xref:uint128_t.adoc#u128_operator_behavior[above].
-None are marked `explicit` in order to match the implicit conversion behavior of the built-in integer types.
+None of the constructors are marked `explicit` in order to match the implicit conversion behavior of the built-in integer types.
+Integer constructors are subject to mixed sign limitations discussed xref:uint128_t.adoc#u128_operator_behavior[above].
+
+The floating-point constructor truncates toward zero, matching `static_cast<unsigned __int128>(f)`.
+Edge cases mirror libgcc's `__fixunsXfti`: NaN and negative values yield zero, and values `>= 2^128` (including positive infinity) saturate to `UINT128_MAX`.
 
 [#u128_conversions]
 == Conversions
@@ -168,35 +142,36 @@ struct uint128_t
     ...
 
     // Integer conversion operators
-    BOOST_INT128_HOST_DEVICE constexpr operator bool() const noexcept;
+    BOOST_INT128_HOST_DEVICE explicit constexpr operator bool() const noexcept;
 
     template <BOOST_INT128_SIGNED_INTEGER_CONCEPT SignedInteger>
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator SignedInteger() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator SignedInteger() const noexcept;
 
     template <BOOST_INT128_UNSIGNED_INTEGER_CONCEPT UnsignedInteger>
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator UnsignedInteger() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator UnsignedInteger() const noexcept;
 
     #ifdef BOOST_INT128_HAS_INT128
 
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator detail::builtin_i128() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator detail::builtin_i128() const noexcept;
 
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator detail::builtin_u128() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator detail::builtin_u128() const noexcept;
 
     #endif // BOOST_INT128_HAS_INT128
 
-    // Conversion to float
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator float() const noexcept;
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept;
-    explicit constexpr operator long double() const noexcept; // There are no long doubles on device
+    // Conversion to floating point
+    BOOST_INT128_HOST_DEVICE constexpr operator float() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator double() const noexcept;
+    constexpr operator long double() const noexcept; // There are no long doubles on device
 };
 
 } // namespace int128
 } // namespace boost
 ----
 
+All conversion operators except `operator bool()` are implicit to match the behavior of built-in integer types.
+`operator bool()` is explicit so that a `uint128_t` cannot accidentally bind to a `bool` parameter; contextual conversions (`if (x)`, `!x`, etc.) still work.
 Conversions to signed integers are subject to mixed sign limitations discussed xref:uint128_t.adoc#u128_operator_behavior[above].
-Conversion to `bool` is not marked explicit to match the behavior of built-in integer types.
-Conversions to floating point types may not be lossless depending on the value of the `uint128_t` at time of conversion,
+Conversions to floating-point types may not be lossless depending on the value of the `uint128_t` at time of conversion,
 as the number of digits it represents can exceed the precision of the significand in floating point types.
 
 [#u128_comparison_operators]
diff --git a/doc/modules/ROOT/pages/utilities.adoc b/doc/modules/ROOT/pages/utilities.adoc
new file mode 100644
index 00000000..959c87bb
--- /dev/null
+++ b/doc/modules/ROOT/pages/utilities.adoc
@@ -0,0 +1,201 @@
+////
+Copyright 2026 Matt Borland
+Distributed under the Boost Software License, Version 1.0.
+https://www.boost.org/LICENSE_1_0.txt
+////
+
+[#utilities]
+= Utilities
+:idprefix: utilities_
+
+The `<boost/int128/utilities.hpp>` header collects helpers that operate on the library types directly and would not fit naturally into the analogous STL-style headers.
+The functions are tuned specifically for `uint128_t` and `int128_t` rather than being template generalizations, which allows the library to dispatch to a fast path based on the shape of the modulus.
+
+[source, c++]
+----
+#include <boost/int128/utilities.hpp>
+----
+
+[#powm]
+== Modular Exponentiation
+
+Computes `(base ^ exp) mod m`.
+The naive expression `pow(base, exp) % m` is unusable for 128-bit inputs because `base ^ exp` overflows almost immediately; `powm` performs the reduction inside the exponentiation loop and selects an algorithm based on the modulus:
+
+* If `has_single_bit(m)` is `true`, modular reduction collapses to a bitmask and no division is performed.
+* If the modulus fits in 64 bits (`m.high == 0`), the loop runs on 64-bit lanes. Each squaring is a single 64x64 -> 128 multiply followed by a 128-by-64 reduction.
+* Otherwise the modulus uses the full 128 bits, and `powm` uses a shift-and-add inner multiply so that no intermediate value ever exceeds 128 bits. This avoids forming the 256-bit product that a naive square-and-multiply implementation would require.
+
+[source, c++]
+----
+namespace boost {
+namespace int128 {
+
+BOOST_INT128_HOST_DEVICE constexpr uint128_t powm(uint128_t base, uint128_t exp, uint128_t m) noexcept;
+
+BOOST_INT128_HOST_DEVICE constexpr int128_t powm(int128_t base, int128_t exp, int128_t m) noexcept;
+
+} // namespace int128
+} // namespace boost
+----
+
+The signed overload returns the non-negative residue in the range `[0, m)`, matching the convention used by `pow(a, b, m)` in Python and most arbitrary-precision libraries.
+Negative bases are reduced before exponentiation; `(std::numeric_limits<int128_t>::min)()` is handled correctly even though its magnitude is not representable in `int128_t`.
+
+=== Special Cases
+
+[cols="1,1", options="header"]
+|===
+| Input | Result
+
+| `m == 0`
+| `0` (consistent with the library's convention for division by zero)
+
+| `m == 1`
+| `0`
+
+| `exp == 0`
+| `1` (including `powm(0, 0, m)`, which follows the conventional definition `0^0 == 1`)
+
+| `base == 0` and `exp > 0`
+| `0`
+
+| Signed overload with non-positive `m` or negative `exp`
+| `0` (modular exponentiation requires a positive modulus; a negative exponent would require a modular inverse, which this interface does not provide)
+|===
+
+[#ipow]
+== Integer Power
+
+Computes `base ^ exp` by exponentiation by squaring, with a non-negative 64-bit exponent.
+Unlike `powm` there is no modulus: the result is the true power reduced modulo `2^128`, which is the same rollover behavior as the library's `operator*`.
+`ipow(base, exp)` is therefore equivalent to multiplying `base` by itself `exp` times.
+
+[source, c++]
+----
+namespace boost {
+namespace int128 {
+
+BOOST_INT128_HOST_DEVICE constexpr uint128_t ipow(uint128_t base, std::uint64_t exp) noexcept;
+
+BOOST_INT128_HOST_DEVICE constexpr int128_t ipow(int128_t base, std::uint64_t exp) noexcept;
+
+} // namespace int128
+} // namespace boost
+----
+
+The exponent is unsigned, so negative powers (which are not integers) cannot be requested.
+Because the result wraps on overflow rather than saturating or reporting an error, `ipow` is appropriate when rollover semantics are intended.
+
+=== Special Cases
+
+[cols="1,1", options="header"]
+|===
+| Input | Result
+
+| `exp == 0`
+| `1` (including `ipow(0, 0) == 1`, following the conventional definition `0^0 == 1`)
+
+| `base == 0` and `exp > 0`
+| `0`
+
+| `base ^ exp` exceeds 128 bits
+| The low 128 bits of the true power, matching the rollover of `operator*`
+|===
+
+[#isqrt]
+== Integer Square Root
+
+Computes the integer square root `floor(sqrt(n))`: the largest integer `r` whose square does not exceed `n`.
+The computation runs entirely in integer arithmetic using Newton's method, so it is exact (no floating-point rounding) and usable in a `constexpr` context.
+
+[source, c++]
+----
+namespace boost {
+namespace int128 {
+
+BOOST_INT128_HOST_DEVICE constexpr uint128_t isqrt(uint128_t n) noexcept;
+
+BOOST_INT128_HOST_DEVICE constexpr int128_t isqrt(int128_t n) noexcept;
+
+} // namespace int128
+} // namespace boost
+----
+
+=== Special Cases
+
+[cols="1,1", options="header"]
+|===
+| Input | Result
+
+| `n < 0` (signed overload)
+| `0` (a real square root does not exist)
+
+| `n >= 0`
+| `floor(sqrt(n))`, the largest `r` whose square does not exceed `n` (so `isqrt(0) == 0` and `isqrt(1) == 1`)
+|===
+
+[#checked]
+== Checked Arithmetic
+
+`ckd_add`, `ckd_sub`, and `ckd_mul` implement the checked integer arithmetic interface introduced by C23's `<stdckdint.h>`, but without requiring a C23 toolchain; they are available in C++14 and later.
+
+Each function computes `a + b`, `a - b`, or `a * b` respectively, as if both operands were represented in a signed integer type with infinite range, and then converts that mathematical result to the type pointed to by `result`.
+The function returns `false` when `*result` correctly represents the mathematical result of the operation.
+Otherwise it returns `true`, and `*result` is set to the mathematical result wrapped around (reduced modulo `2^N`) to the width `N` of `*result`.
+`*result` is always written, whether or not the operation overflowed.
+
+[source, c++]
+----
+namespace boost {
+namespace int128 {
+
+template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_add(T1* result, T2 a, T3 b) noexcept;
+
+template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_sub(T1* result, T2 a, T3 b) noexcept;
+
+template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_mul(T1* result, T2 a, T3 b) noexcept;
+
+} // namespace int128
+} // namespace boost
+----
+
+The three type parameters are independent: the result type and the two operand types may differ in width and signedness.
+The operation always uses the exact mathematical value of each operand, so a negative signed value added to an unsigned value, or a product that needs up to 256 bits internally, is evaluated correctly.
+
+Following the C23 rules, `T1`, `T2`, and `T3` may be any integer type other than `bool`, plain `char`, an enumerated type, or a bit-precise (`_BitInt`) type.
+In addition to the standard and extended integer types, the library's `uint128_t` and `int128_t` are accepted.
+
+The following example exercises all three operations, including the wrap-around, the `INT128_MIN * -1` case, and the mixed-type behavior described above.
+
+.This https://github.com/cppalliance/int128/blob/develop/examples/checked_arithmetic.cpp[example] demonstrates checked addition, subtraction, and multiplication following the C23 checked-integer contract
+====
+[source, c++]
+----
+include::example$checked_arithmetic.cpp[]
+----
+
+.Expected Output
+[listing]
+----
+=== Results That Fit ===
+ckd_add(20, 22): overflow=false, result=42
+
+=== Addition Overflow ===
+ckd_add(UINT128_MAX, 1): overflow=true, wrapped=0
+
+=== Subtraction Underflow ===
+ckd_sub(0, 1): overflow=true, wrapped=340282366920938463463374607431768211455
+
+=== Multiplication Overflow ===
+ckd_mul(INT128_MAX, 2): overflow=true, wrapped=-2
+ckd_mul(INT128_MIN, -1): overflow=true, wrapped=-170141183460469231731687303715884105728
+
+=== Mixed Types ===
+ckd_add<int64_t>(uint128_t{5}, int128_t{-3}): overflow=false, result=2
+ckd_mul<uint8_t>(20, 20): overflow=true, wrapped=144
+----
+====
diff --git a/doc/package-lock.json b/doc/package-lock.json
index d693fdc1..bd450228 100644
--- a/doc/package-lock.json
+++ b/doc/package-lock.json
@@ -9,19 +9,19 @@
         "@cppalliance/antora-downloads-extension": "^0.0.2"
       },
       "devDependencies": {
-        "@antora/cli": "3.1.14",
-        "@antora/site-generator": "3.1.14",
-        "antora": "3.1.14"
+        "@antora/cli": "3.1.15",
+        "@antora/site-generator": "3.1.15",
+        "antora": "3.1.15"
       }
     },
     "node_modules/@antora/asciidoc-loader": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/asciidoc-loader/-/asciidoc-loader-3.1.14.tgz",
-      "integrity": "sha512-4xxisnoBFrlLNY6f3xZtyyfgm+tBLsqesTcEStfc8jtXUMYJ4b2DWIzo1vULmxvZ7yY5+Q7YqEvS5o6kIWAG0A==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/asciidoc-loader/-/asciidoc-loader-3.1.15.tgz",
+      "integrity": "sha512-MVspbcMPmBgxZms0EjmyC9nlCAWBJfHYSwQCXRZn6T7OujRrLvJFPgz+EROz9XOqh4v76BeqgEuLsUJIZjH3cw==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
-        "@antora/logger": "3.1.14",
+        "@antora/logger": "3.1.15",
         "@antora/user-require-helper": "~3.0",
         "@asciidoctor/core": "~2.2"
       },
@@ -29,15 +29,31 @@
         "node": ">=16.0.0"
       }
     },
+    "node_modules/@antora/asciidoc-loader/node_modules/@antora/logger": {
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/logger/-/logger-3.1.15.tgz",
+      "integrity": "sha512-txA2Nv0QQ+hIt6arc3Rrh1BiUJNlucsyNF7ZA7LgtN+rzxyjcqQPpbQ2F3tU2lOV/LOQCEvMbmz9Dj0tY8oBuA==",
+      "dev": true,
+      "license": "MPL-2.0",
+      "dependencies": {
+        "@antora/expand-path-helper": "~3.0",
+        "pino": "~9.2",
+        "pino-pretty": "~11.2",
+        "sonic-boom": "~4.0"
+      },
+      "engines": {
+        "node": ">=16.0.0"
+      }
+    },
     "node_modules/@antora/cli": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/cli/-/cli-3.1.14.tgz",
-      "integrity": "sha512-I6WcygMU2bFInjdURJjkYjo7K5M8B3lBB53v9OO0IcY0LhEY8Wa7IlZ7wVinf5qEjHvaYzRGTZVl6RsJtVt7Sw==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/cli/-/cli-3.1.15.tgz",
+      "integrity": "sha512-76vLhkyzyFd49WJHsC04oPAZlK4qWbJaeZdS/pLUUVBqgaxeSeNqpTZ0pXo7f+5laGRO19fXyk1eDWGus9h8jA==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
-        "@antora/logger": "3.1.14",
-        "@antora/playbook-builder": "3.1.14",
+        "@antora/logger": "3.1.15",
+        "@antora/playbook-builder": "3.1.15",
         "@antora/user-require-helper": "~3.0",
         "commander": "~11.1"
       },
@@ -48,31 +64,31 @@
         "node": ">=16.0.0"
       }
     },
-    "node_modules/@antora/cli/node_modules/@antora/playbook-builder": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/playbook-builder/-/playbook-builder-3.1.14.tgz",
-      "integrity": "sha512-Ss2r7In00u/n9Da+JOxEqIE8NeRosf+f+agzH3Te09JV/mpgZKxEOE5V/VuP+TNNq4ww1eu5aOS8DiU2PYwj4Q==",
+    "node_modules/@antora/cli/node_modules/@antora/logger": {
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/logger/-/logger-3.1.15.tgz",
+      "integrity": "sha512-txA2Nv0QQ+hIt6arc3Rrh1BiUJNlucsyNF7ZA7LgtN+rzxyjcqQPpbQ2F3tU2lOV/LOQCEvMbmz9Dj0tY8oBuA==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
-        "@iarna/toml": "~2.2",
-        "convict": "~6.2",
-        "js-yaml": "~4.1",
-        "json5": "~2.2"
+        "@antora/expand-path-helper": "~3.0",
+        "pino": "~9.2",
+        "pino-pretty": "~11.2",
+        "sonic-boom": "~4.0"
       },
       "engines": {
         "node": ">=16.0.0"
       }
     },
     "node_modules/@antora/content-aggregator": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/content-aggregator/-/content-aggregator-3.1.14.tgz",
-      "integrity": "sha512-FVuBgnrGPiktYqK1WHbGF8O8l4m5KHlkxoJumrbacgFo8SKuiRFEo31zalxrCUsv8QM3UBEgX+LdHrve/9CGLg==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/content-aggregator/-/content-aggregator-3.1.15.tgz",
+      "integrity": "sha512-w84rJRKx+C4dsSbOHmjg78oM2T6xP9JRDsxpXjTmlh9T4zlNELCB6AD5s6Gztt3S6wlTiCNFLZw0v/HEVtuhzQ==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
         "@antora/expand-path-helper": "~3.0",
-        "@antora/logger": "3.1.14",
+        "@antora/logger": "3.1.15",
         "@antora/user-require-helper": "~3.0",
         "braces": "~3.0",
         "cache-directory": "~2.0",
@@ -91,15 +107,31 @@
         "node": ">=16.0.0"
       }
     },
+    "node_modules/@antora/content-aggregator/node_modules/@antora/logger": {
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/logger/-/logger-3.1.15.tgz",
+      "integrity": "sha512-txA2Nv0QQ+hIt6arc3Rrh1BiUJNlucsyNF7ZA7LgtN+rzxyjcqQPpbQ2F3tU2lOV/LOQCEvMbmz9Dj0tY8oBuA==",
+      "dev": true,
+      "license": "MPL-2.0",
+      "dependencies": {
+        "@antora/expand-path-helper": "~3.0",
+        "pino": "~9.2",
+        "pino-pretty": "~11.2",
+        "sonic-boom": "~4.0"
+      },
+      "engines": {
+        "node": ">=16.0.0"
+      }
+    },
     "node_modules/@antora/content-classifier": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/content-classifier/-/content-classifier-3.1.14.tgz",
-      "integrity": "sha512-y8Fk+KU1lqD3aawOu3ZFK92YfOZ1k3YBJhLI9QIFM6Ck4STPnf7AwYbhfOtjODlwer5/OhFmfhjUB2hn7onGnA==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/content-classifier/-/content-classifier-3.1.15.tgz",
+      "integrity": "sha512-m7INbqJcXBZU04HdBMqfL/NvezC3aaJGHHa0KfzeEKICg5FT22cVsEp6mYTgJVT4HqRy7JPCn9UeZvoa9x+MzQ==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
-        "@antora/asciidoc-loader": "3.1.14",
-        "@antora/logger": "3.1.14",
+        "@antora/asciidoc-loader": "3.1.15",
+        "@antora/logger": "3.1.15",
         "mime-types": "~2.1",
         "vinyl": "~3.0"
       },
@@ -107,14 +139,30 @@
         "node": ">=16.0.0"
       }
     },
+    "node_modules/@antora/content-classifier/node_modules/@antora/logger": {
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/logger/-/logger-3.1.15.tgz",
+      "integrity": "sha512-txA2Nv0QQ+hIt6arc3Rrh1BiUJNlucsyNF7ZA7LgtN+rzxyjcqQPpbQ2F3tU2lOV/LOQCEvMbmz9Dj0tY8oBuA==",
+      "dev": true,
+      "license": "MPL-2.0",
+      "dependencies": {
+        "@antora/expand-path-helper": "~3.0",
+        "pino": "~9.2",
+        "pino-pretty": "~11.2",
+        "sonic-boom": "~4.0"
+      },
+      "engines": {
+        "node": ">=16.0.0"
+      }
+    },
     "node_modules/@antora/document-converter": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/document-converter/-/document-converter-3.1.14.tgz",
-      "integrity": "sha512-f6wFnL+489DI0ZDgoxYWzbxxWqPviRiJ56OHS1NixEfvJ7OpRBDPEbX1xnsIeiyFBgqX4+nY92MsCWKTa+Gf3w==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/document-converter/-/document-converter-3.1.15.tgz",
+      "integrity": "sha512-7YZsc/iIJVTxvHKy0/eqPTuRIJupBd7Pq49gWvCxiDBR9Zj4esqMZIU3HaIbkBgqJLlQv5TLBeTjiQ1Qpe1hNw==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
-        "@antora/asciidoc-loader": "3.1.14"
+        "@antora/asciidoc-loader": "3.1.15"
       },
       "engines": {
         "node": ">=16.0.0"
@@ -130,32 +178,16 @@
       }
     },
     "node_modules/@antora/file-publisher": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/file-publisher/-/file-publisher-3.1.14.tgz",
-      "integrity": "sha512-fTaAnkyKSOlsxQM1TBFCAmiERA6Q67XleDCD2bMPVgfcENmo0Xfx59KwCHaA92IcRSmMftydlXHPaFxNh0UVsg==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/file-publisher/-/file-publisher-3.1.15.tgz",
+      "integrity": "sha512-UfLYeyD6Na9YXespr3Xjy6OPIAGG6GTbdW3SNn8KxHl3hGeF/AtM3NaR+AJgyOmTb2r9lHzfODXeZevqX+vMww==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
         "@antora/expand-path-helper": "~3.0",
         "@antora/user-require-helper": "~3.0",
         "vinyl": "~3.0",
-        "yazl": "~2.5"
-      },
-      "engines": {
-        "node": ">=16.0.0"
-      }
-    },
-    "node_modules/@antora/logger": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/logger/-/logger-3.1.14.tgz",
-      "integrity": "sha512-kVEeGqZbXR903hPIm+BlN97fLdQ3LoUzE/BOPZ6vRp9m9Mmbnm67Kg7fSYkfTMLB0S2UWpAPFg22RdsU5ZoAzA==",
-      "dev": true,
-      "license": "MPL-2.0",
-      "dependencies": {
-        "@antora/expand-path-helper": "~3.0",
-        "pino": "~9.2",
-        "pino-pretty": "~11.2",
-        "sonic-boom": "~4.0"
+        "yazl": "~3.3"
       },
       "engines": {
         "node": ">=16.0.0"
@@ -179,26 +211,26 @@
       }
     },
     "node_modules/@antora/navigation-builder": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/navigation-builder/-/navigation-builder-3.1.14.tgz",
-      "integrity": "sha512-/637YLGD7oUHGSfEfszXkk4ASfIhDAg5Xs9035J1dV07XYRlGqmtUb15rtapbcECpcQFjCyM5jFQYSNNvLrGcQ==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/navigation-builder/-/navigation-builder-3.1.15.tgz",
+      "integrity": "sha512-XRs4pfNd88GCG9lDAJ1J+2vwvre7OzNRSgRmZhEhtgv0A13NEZq37X4YuaH46F2kj2BqiZT8UOuxqAqarLaxmg==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
-        "@antora/asciidoc-loader": "3.1.14"
+        "@antora/asciidoc-loader": "3.1.15"
       },
       "engines": {
         "node": ">=16.0.0"
       }
     },
     "node_modules/@antora/page-composer": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/page-composer/-/page-composer-3.1.14.tgz",
-      "integrity": "sha512-RfA+67TxCqUPrQbZdrfjgLpHh8MR2z2du7cyF3HGX4N6DpqEBvz81NHHl3rA3fj6BQZPQbGm2OYAMU6wzJ6Pog==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/page-composer/-/page-composer-3.1.15.tgz",
+      "integrity": "sha512-koKlhWilA0E0QdCCOeLLzCFLNViBVjNe3aIlmJnMCwAK0p85wyhVfyolNqbNejAvdCZ87YhUQJ52Q4ikCgkQQg==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
-        "@antora/logger": "3.1.14",
+        "@antora/logger": "3.1.15",
         "handlebars": "~4.7",
         "require-from-string": "~2.0"
       },
@@ -206,10 +238,42 @@
         "node": ">=16.0.0"
       }
     },
+    "node_modules/@antora/page-composer/node_modules/@antora/logger": {
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/logger/-/logger-3.1.15.tgz",
+      "integrity": "sha512-txA2Nv0QQ+hIt6arc3Rrh1BiUJNlucsyNF7ZA7LgtN+rzxyjcqQPpbQ2F3tU2lOV/LOQCEvMbmz9Dj0tY8oBuA==",
+      "dev": true,
+      "license": "MPL-2.0",
+      "dependencies": {
+        "@antora/expand-path-helper": "~3.0",
+        "pino": "~9.2",
+        "pino-pretty": "~11.2",
+        "sonic-boom": "~4.0"
+      },
+      "engines": {
+        "node": ">=16.0.0"
+      }
+    },
+    "node_modules/@antora/playbook-builder": {
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/playbook-builder/-/playbook-builder-3.1.15.tgz",
+      "integrity": "sha512-L2bE9FS0Th/d37DeDjz/dg9YXrkHM1xI0WQB3eiW3K/6d0Mc7eJhbmDMT0K8S+hgdaO0AT4kqDWzvx2866ZobA==",
+      "dev": true,
+      "license": "MPL-2.0",
+      "dependencies": {
+        "@iarna/toml": "~2.2",
+        "convict": "~6.2",
+        "js-yaml": "~4.1",
+        "json5": "~2.2"
+      },
+      "engines": {
+        "node": ">=16.0.0"
+      }
+    },
     "node_modules/@antora/redirect-producer": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/redirect-producer/-/redirect-producer-3.1.14.tgz",
-      "integrity": "sha512-5koAwRk1cZrvE/qfOWKXqb3jtxrZbWA5EYHYGFEoato5By3cbC42blH4Bre9/48pjyS6znFpbZhYUBpT7PRhZA==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/redirect-producer/-/redirect-producer-3.1.15.tgz",
+      "integrity": "sha512-mV0KnRiTr9oi0hPm7okT/Bw8kkz+PWYxp9AVSGqzhkoQgr3crxhgyS0NFCoViHwjaj4NfQrf++yxbhr6Igd7Dw==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
@@ -220,55 +284,55 @@
       }
     },
     "node_modules/@antora/site-generator": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/site-generator/-/site-generator-3.1.14.tgz",
-      "integrity": "sha512-hQIUVtM9+xwleYWc4fIRZmiKl2p+ItOJuUm2+Hkdh07BZsySxkMOxxCyZsvTn9rc+4R94CYqDQCYElwFwdB2WA==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/site-generator/-/site-generator-3.1.15.tgz",
+      "integrity": "sha512-Z9YiRTqw3ssnLQxSHI3VZHEPLytQXt8cWC5C/o9vS+Fc560TSNs1UO4quPFIkg+bFUpxXtcAxqbp650aA4/N1g==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
-        "@antora/asciidoc-loader": "3.1.14",
-        "@antora/content-aggregator": "3.1.14",
-        "@antora/content-classifier": "3.1.14",
-        "@antora/document-converter": "3.1.14",
-        "@antora/file-publisher": "3.1.14",
-        "@antora/logger": "3.1.14",
-        "@antora/navigation-builder": "3.1.14",
-        "@antora/page-composer": "3.1.14",
-        "@antora/playbook-builder": "3.1.14",
-        "@antora/redirect-producer": "3.1.14",
-        "@antora/site-mapper": "3.1.14",
-        "@antora/site-publisher": "3.1.14",
-        "@antora/ui-loader": "3.1.14",
+        "@antora/asciidoc-loader": "3.1.15",
+        "@antora/content-aggregator": "3.1.15",
+        "@antora/content-classifier": "3.1.15",
+        "@antora/document-converter": "3.1.15",
+        "@antora/file-publisher": "3.1.15",
+        "@antora/logger": "3.1.15",
+        "@antora/navigation-builder": "3.1.15",
+        "@antora/page-composer": "3.1.15",
+        "@antora/playbook-builder": "3.1.15",
+        "@antora/redirect-producer": "3.1.15",
+        "@antora/site-mapper": "3.1.15",
+        "@antora/site-publisher": "3.1.15",
+        "@antora/ui-loader": "3.1.15",
         "@antora/user-require-helper": "~3.0"
       },
       "engines": {
         "node": ">=16.0.0"
       }
     },
-    "node_modules/@antora/site-generator/node_modules/@antora/playbook-builder": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/playbook-builder/-/playbook-builder-3.1.14.tgz",
-      "integrity": "sha512-Ss2r7In00u/n9Da+JOxEqIE8NeRosf+f+agzH3Te09JV/mpgZKxEOE5V/VuP+TNNq4ww1eu5aOS8DiU2PYwj4Q==",
+    "node_modules/@antora/site-generator/node_modules/@antora/logger": {
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/logger/-/logger-3.1.15.tgz",
+      "integrity": "sha512-txA2Nv0QQ+hIt6arc3Rrh1BiUJNlucsyNF7ZA7LgtN+rzxyjcqQPpbQ2F3tU2lOV/LOQCEvMbmz9Dj0tY8oBuA==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
-        "@iarna/toml": "~2.2",
-        "convict": "~6.2",
-        "js-yaml": "~4.1",
-        "json5": "~2.2"
+        "@antora/expand-path-helper": "~3.0",
+        "pino": "~9.2",
+        "pino-pretty": "~11.2",
+        "sonic-boom": "~4.0"
       },
       "engines": {
         "node": ">=16.0.0"
       }
     },
     "node_modules/@antora/site-mapper": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/site-mapper/-/site-mapper-3.1.14.tgz",
-      "integrity": "sha512-3qbETtwadl+fWREjzrBUxPUorMcMiZ+hdkB1El9z7it9KzKh0Yp7Je0+2uTxGX+Lov9uik48dZJ9e/mr5PeaRQ==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/site-mapper/-/site-mapper-3.1.15.tgz",
+      "integrity": "sha512-dV5zGeL1uMQ83sfkBWKg8vjaJQXz1Zh3ZSNQZYa64HnT4M7oSuQUzwDZdS0j6ZtTiYcNGulRi1ucz3uIoc9tqw==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
-        "@antora/content-classifier": "3.1.14",
+        "@antora/content-classifier": "3.1.15",
         "vinyl": "~3.0"
       },
       "engines": {
@@ -276,22 +340,22 @@
       }
     },
     "node_modules/@antora/site-publisher": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/site-publisher/-/site-publisher-3.1.14.tgz",
-      "integrity": "sha512-8apyEmgepUc7ms9CTEIPwN3tGtWwLqR6fbLMLs7hibqmOSR880Ut/4GRGb97sqcGQXSHdIyWK2oJKzRl1Akb6Q==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/site-publisher/-/site-publisher-3.1.15.tgz",
+      "integrity": "sha512-pBuNxgA+H+WB5F4gA/gim5wKx/884QwlqOl0CpOY+6Fqn7h2ooHA7Tv6O47Tra1nZzNbIe3CQRoA5pnrX6zyRw==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
-        "@antora/file-publisher": "3.1.14"
+        "@antora/file-publisher": "3.1.15"
       },
       "engines": {
         "node": ">=16.0.0"
       }
     },
     "node_modules/@antora/ui-loader": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/@antora/ui-loader/-/ui-loader-3.1.14.tgz",
-      "integrity": "sha512-LVvTdKQOB44CmJ1JQDu8sJf6rrLZMxPAWWackdg2JtGyGHHpd80/MBcv4BSFk7//cJQ13Oqm/7JCbhD51KAFjg==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/@antora/ui-loader/-/ui-loader-3.1.15.tgz",
+      "integrity": "sha512-zYjF5ID7t6mUEJuMyeNW/AYu1U0026wZj58H0siGuaT5YhVoxZrfvZYWV5iCMntpKduMqkwZW/l+D4MIqjhFYQ==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
@@ -305,7 +369,7 @@
         "should-proxy": "~1.0",
         "simple-get": "~4.0",
         "vinyl": "~3.0",
-        "yauzl": "~3.1"
+        "yauzl": "~3.3"
       },
       "engines": {
         "node": ">=16.0.0"
@@ -324,14 +388,14 @@
       }
     },
     "node_modules/@asciidoctor/core": {
-      "version": "2.2.8",
-      "resolved": "https://registry.npmjs.org/@asciidoctor/core/-/core-2.2.8.tgz",
-      "integrity": "sha512-oozXk7ZO1RAd/KLFLkKOhqTcG4GO3CV44WwOFg2gMcCsqCUTarvMT7xERIoWW2WurKbB0/ce+98r01p8xPOlBw==",
+      "version": "2.2.9",
+      "resolved": "https://registry.npmjs.org/@asciidoctor/core/-/core-2.2.9.tgz",
+      "integrity": "sha512-tIPRHo1T2SFmAm+j77cDsj0RuaszP7xJxsaVTTAF5CwKyTbazw9TnIVlpIWM5yWfIWAWcAZy92RcnPgMJwny1w==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "asciidoctor-opal-runtime": "0.3.3",
-        "unxhr": "1.0.1"
+        "asciidoctor-opal-runtime": "0.3.4",
+        "unxhr": "~1.2"
       },
       "engines": {
         "node": ">=8.11",
@@ -402,14 +466,14 @@
       }
     },
     "node_modules/antora": {
-      "version": "3.1.14",
-      "resolved": "https://registry.npmjs.org/antora/-/antora-3.1.14.tgz",
-      "integrity": "sha512-z8HshJsT6pUfdDOUJ15RGtpOM9LmL6JXU5JBshoR/9/xd+1qLmKPkOnUv+HrijAk93r1imxZOdkmIqhLcv8B8A==",
+      "version": "3.1.15",
+      "resolved": "https://registry.npmjs.org/antora/-/antora-3.1.15.tgz",
+      "integrity": "sha512-nxz8n7sbKP58hhK13Mack+r3mELxFVYJm9fUBjefUhHWP3cjU/AX3LVVoFwssDOau3Gh+/id9xDNj8Vp5rbBNA==",
       "dev": true,
       "license": "MPL-2.0",
       "dependencies": {
-        "@antora/cli": "3.1.14",
-        "@antora/site-generator": "3.1.14"
+        "@antora/cli": "3.1.15",
+        "@antora/site-generator": "3.1.15"
       },
       "bin": {
         "antora": "bin/antora"
@@ -425,14 +489,14 @@
       "dev": true
     },
     "node_modules/asciidoctor-opal-runtime": {
-      "version": "0.3.3",
-      "resolved": "https://registry.npmjs.org/asciidoctor-opal-runtime/-/asciidoctor-opal-runtime-0.3.3.tgz",
-      "integrity": "sha512-/CEVNiOia8E5BMO9FLooo+Kv18K4+4JBFRJp8vUy/N5dMRAg+fRNV4HA+o6aoSC79jVU/aT5XvUpxSxSsTS8FQ==",
+      "version": "0.3.4",
+      "resolved": "https://registry.npmjs.org/asciidoctor-opal-runtime/-/asciidoctor-opal-runtime-0.3.4.tgz",
+      "integrity": "sha512-zqd6zn1LV+PZ69AP/kEbB00zuPHMIAJY3IX8+aZV+X1qOwatYvKGjsMmdMc5ApfhtkjZ4mYkqiTPJWnEnBiMJg==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "glob": "7.1.3",
-        "unxhr": "1.0.1"
+        "fast-glob": "~3.3",
+        "unxhr": "~1.2"
       },
       "engines": {
         "node": ">=8.11"
@@ -471,9 +535,9 @@
       }
     },
     "node_modules/b4a": {
-      "version": "1.7.3",
-      "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.7.3.tgz",
-      "integrity": "sha512-5Q2mfq2WfGuFp3uS//0s6baOJLMoVduPYVeNmDYxu5OUA1/cBfvr2RIS7vi62LdNj/urk1hfmj867I3qt6uZ7Q==",
+      "version": "1.8.1",
+      "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.8.1.tgz",
+      "integrity": "sha512-aiqre1Nr0B/6DgE2N5vwTc+2/oQZ4Wh1t4NznYY4E00y8LCt6NqdRv81so00oo27D8MVKTpUa/MwUUtBLXCoDw==",
       "dev": true,
       "license": "Apache-2.0",
       "peerDependencies": {
@@ -485,17 +549,10 @@
         }
       }
     },
-    "node_modules/balanced-match": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-      "dev": true,
-      "license": "MIT"
-    },
     "node_modules/bare-events": {
-      "version": "2.8.2",
-      "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz",
-      "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==",
+      "version": "2.8.3",
+      "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.3.tgz",
+      "integrity": "sha512-HdUm8EMQBLaJvGUdidNNbqpA1kYkwNcb+MYxkxCLAPJGQzlv9J0C24h8V65Z4c5GLd/JEALDvpFCQgpLJqc0zw==",
       "dev": true,
       "license": "Apache-2.0",
       "peerDependencies": {
@@ -527,17 +584,6 @@
         }
       ]
     },
-    "node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0",
-        "concat-map": "0.0.1"
-      }
-    },
     "node_modules/braces": {
       "version": "3.0.3",
       "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
@@ -599,15 +645,15 @@
       }
     },
     "node_modules/call-bind": {
-      "version": "1.0.8",
-      "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.8.tgz",
-      "integrity": "sha512-oKlSFMcMwpUg2ednkhQ454wfWiU/ul3CkJe/PEHcTKuiX6RpbehUiFMXu13HalGZxfUwCQzZG747YXBn1im9ww==",
+      "version": "1.0.9",
+      "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.9.tgz",
+      "integrity": "sha512-a/hy+pNsFUTR+Iz8TCJvXudKVLAnz/DyeSUo10I5yvFDQJBFU2s9uqQpoSrJlroHUKoKqzg+epxyP9lqFdzfBQ==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "call-bind-apply-helpers": "^1.0.0",
-        "es-define-property": "^1.0.0",
-        "get-intrinsic": "^1.2.4",
+        "call-bind-apply-helpers": "^1.0.2",
+        "es-define-property": "^1.0.1",
+        "get-intrinsic": "^1.3.0",
         "set-function-length": "^1.2.2"
       },
       "engines": {
@@ -680,13 +726,6 @@
         "node": ">=16"
       }
     },
-    "node_modules/concat-map": {
-      "version": "0.0.1",
-      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
-      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
-      "dev": true,
-      "license": "MIT"
-    },
     "node_modules/convict": {
       "version": "6.2.5",
       "resolved": "https://registry.npmjs.org/convict/-/convict-6.2.5.tgz",
@@ -876,9 +915,9 @@
       }
     },
     "node_modules/es-object-atoms": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
-      "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.2.tgz",
+      "integrity": "sha512-HWcBoN6NileqtSydK2FqHbS/LoDd2pqrnQHLyJzBj4kOp/ky2MWMN694xOfkK8/SnUsW2DH7EfyVlydKCsm1Zw==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -962,9 +1001,9 @@
       "dev": true
     },
     "node_modules/fastq": {
-      "version": "1.19.1",
-      "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.19.1.tgz",
-      "integrity": "sha512-GwLTyxkCXjXbxqIhTsMI2Nui8huMPtnxg7krajPJAjnEG/iiOS7i+zCtWGZR9G0NBKbXKh6X9m9UIsYX/N6vvQ==",
+      "version": "1.20.1",
+      "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.20.1.tgz",
+      "integrity": "sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==",
       "dev": true,
       "license": "ISC",
       "dependencies": {
@@ -1000,13 +1039,6 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/fs.realpath": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
-      "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
-      "dev": true,
-      "license": "ISC"
-    },
     "node_modules/function-bind": {
       "version": "1.1.2",
       "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
@@ -1056,25 +1088,6 @@
         "node": ">= 0.4"
       }
     },
-    "node_modules/glob": {
-      "version": "7.1.3",
-      "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.3.tgz",
-      "integrity": "sha512-vcfuiIxogLV4DlGBHIUOwI0IbrJ8HWPc4MU7HzviGeNho/UJDfi6B5p3sHeWIQ0KGIU0Jpxi5ZHxemQfLkkAwQ==",
-      "deprecated": "Glob versions prior to v9 are no longer supported",
-      "dev": true,
-      "license": "ISC",
-      "dependencies": {
-        "fs.realpath": "^1.0.0",
-        "inflight": "^1.0.4",
-        "inherits": "2",
-        "minimatch": "^3.0.4",
-        "once": "^1.3.0",
-        "path-is-absolute": "^1.0.0"
-      },
-      "engines": {
-        "node": "*"
-      }
-    },
     "node_modules/glob-parent": {
       "version": "5.1.2",
       "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
@@ -1166,9 +1179,9 @@
       }
     },
     "node_modules/hasown": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
-      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.3.tgz",
+      "integrity": "sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -1243,18 +1256,6 @@
         "node": ">= 4"
       }
     },
-    "node_modules/inflight": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
-      "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
-      "deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
-      "dev": true,
-      "license": "ISC",
-      "dependencies": {
-        "once": "^1.3.0",
-        "wrappy": "1"
-      }
-    },
     "node_modules/inherits": {
       "version": "2.0.4",
       "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
@@ -1492,19 +1493,6 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/minimatch": {
-      "version": "3.1.5",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
-      "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
-      "dev": true,
-      "license": "ISC",
-      "dependencies": {
-        "brace-expansion": "^1.1.7"
-      },
-      "engines": {
-        "node": "*"
-      }
-    },
     "node_modules/minimist": {
       "version": "1.2.8",
       "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
@@ -1566,16 +1554,6 @@
       "dev": true,
       "license": "(MIT AND Zlib)"
     },
-    "node_modules/path-is-absolute": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
-      "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
     "node_modules/pend": {
       "version": "1.2.0",
       "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
@@ -1731,6 +1709,7 @@
       "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
       "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==",
       "dev": true,
+      "license": "MIT",
       "engines": {
         "node": ">=0.4.0"
       }
@@ -2015,9 +1994,9 @@
       }
     },
     "node_modules/streamx": {
-      "version": "2.23.0",
-      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
-      "integrity": "sha512-kn+e44esVfn2Fa/O0CPFcex27fjIL6MkVae0Mm6q+E6f0hWv578YCERbv+4m02cjxvDsPKLnmxral/rR6lBMAg==",
+      "version": "2.26.0",
+      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.26.0.tgz",
+      "integrity": "sha512-VvNG1K72Po/xwJzxZFnZ++Tbrv4lwSptsbkFuzXCJAYZvCK5nnxsvXU6ajqkv7chyiI1Y0YXq2Jh8Iy8Y7NF/A==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -2058,9 +2037,9 @@
       }
     },
     "node_modules/text-decoder": {
-      "version": "1.2.3",
-      "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz",
-      "integrity": "sha512-3/o9z3X0X0fTupwsYvR03pJ/DjWuqqrfwBgTQzdWDiQSm9KitAyz/9WqsT2JQW7KV2m+bC2ol/zqpW37NHxLaA==",
+      "version": "1.2.7",
+      "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz",
+      "integrity": "sha512-vlLytXkeP4xvEq2otHeJfSQIRyWxo/oZGEbXrtEEF9Hnmrdly59sUbzZ/QgyWuLYHctCHxFF4tRQZNQ9k60ExQ==",
       "dev": true,
       "license": "Apache-2.0",
       "dependencies": {
@@ -2134,9 +2113,9 @@
       }
     },
     "node_modules/unxhr": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/unxhr/-/unxhr-1.0.1.tgz",
-      "integrity": "sha512-MAhukhVHyaLGDjyDYhy8gVjWJyhTECCdNsLwlMoGFoNJ3o79fpQhtQuzmAE4IxCMDwraF4cW8ZjpAV0m9CRQbg==",
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/unxhr/-/unxhr-1.2.0.tgz",
+      "integrity": "sha512-6cGpm8NFXPD9QbSNx0cD2giy7teZ6xOkCUH3U89WKVkL9N9rBrWjlCwhR94Re18ZlAop4MOc3WU1M3Hv/bgpIw==",
       "dev": true,
       "license": "MIT",
       "engines": {
@@ -2167,14 +2146,14 @@
       }
     },
     "node_modules/which-typed-array": {
-      "version": "1.1.19",
-      "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.19.tgz",
-      "integrity": "sha512-rEvr90Bck4WZt9HHFC4DJMsjvu7x+r6bImz0/BrbWb7A2djJ8hnZMrWnHo9F8ssv0OMErasDhftrfROTyqSDrw==",
+      "version": "1.1.21",
+      "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.21.tgz",
+      "integrity": "sha512-zbRA8cVm6io/d5W8uIe2hblzN76/Wm3v/yiythQvr+dpBWeqhPSWIDNj4zOyHi4zKbMK6DN34Xsr9jPHJERAEw==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
         "available-typed-arrays": "^1.0.7",
-        "call-bind": "^1.0.8",
+        "call-bind": "^1.0.9",
         "call-bound": "^1.0.4",
         "for-each": "^0.3.5",
         "get-proto": "^1.0.1",
@@ -2221,9 +2200,9 @@
       }
     },
     "node_modules/yauzl": {
-      "version": "3.1.3",
-      "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-3.1.3.tgz",
-      "integrity": "sha512-JCCdmlJJWv7L0q/KylOekyRaUrdEoUxWkWVcgorosTROCFWiS9p2NNPE9Yb91ak7b1N5SxAZEliWpspbZccivw==",
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-3.3.1.tgz",
+      "integrity": "sha512-RNPCUkiE/ZgO4w8i9U5yDQVHaFDdnzaFANElRvpJteCspvmv2VqrRb9lvS6odVD+jqI/zDsxAHJVsafpcheVQQ==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
@@ -2235,13 +2214,23 @@
       }
     },
     "node_modules/yazl": {
-      "version": "2.5.1",
-      "resolved": "https://registry.npmjs.org/yazl/-/yazl-2.5.1.tgz",
-      "integrity": "sha512-phENi2PLiHnHb6QBVot+dJnaAZ0xosj7p3fWl+znIjBDlnMI2PsZCJZ306BPTFOaHf5qdDEI8x5qFrSOBN5vrw==",
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/yazl/-/yazl-3.3.1.tgz",
+      "integrity": "sha512-BbETDVWG+VcMUle37k5Fqp//7SDOK2/1+T7X8TD96M3D9G8jK5VLUdQVdVjGi8im7FGkazX7kk5hkU8X4L5Bng==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
-        "buffer-crc32": "~0.2.3"
+        "buffer-crc32": "^1.0.0"
+      }
+    },
+    "node_modules/yazl/node_modules/buffer-crc32": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-1.0.0.tgz",
+      "integrity": "sha512-Db1SbgBS/fg/392AblrMJk97KggmvYhr4pB5ZIMTWtaivCPMWLkmb7m21cJvpvgK+J3nsU2CmmixNBZx4vFj/w==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.0.0"
       }
     }
   }
diff --git a/doc/package.json b/doc/package.json
index 2d83cedc..a11179e1 100644
--- a/doc/package.json
+++ b/doc/package.json
@@ -1,8 +1,8 @@
 {
   "devDependencies": {
-    "@antora/cli": "3.1.14",
-    "@antora/site-generator": "3.1.14",
-    "antora": "3.1.14"
+    "@antora/cli": "3.1.15",
+    "@antora/site-generator": "3.1.15",
+    "antora": "3.1.15"
   },
   "dependencies": {
     "@cppalliance/antora-downloads-extension": "^0.0.2",
diff --git a/doc/plots.py b/doc/plots.py
index f1150102..5196c0ab 100644
--- a/doc/plots.py
+++ b/doc/plots.py
@@ -1,225 +1,356 @@
+#!/usr/bin/env python3
+"""Generate every Boost.Int128 benchmark graph and write it straight into the
+documentation images tree.
+
+Each entry produces two PNGs whose names match the image:: directives in the
+.adoc pages:
+
+    modules/ROOT/images/<sign>_graphs/<os>/<arch>_benchmarks.png
+    modules/ROOT/images/<sign>_graphs/<os>/<arch>_relative_performance.png
+
+To refresh a platform's numbers, edit its 'data' block here and re-run; the
+right file is overwritten automatically.
+"""
+
+import os
+
+import matplotlib
+matplotlib.use('Agg')  # headless backend: write files, never open a window
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
-"""
-# ARM64 MSVC
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'std::_Unsigned128': [878929, 32788, 33627, 68120, 925583, 1104772],
-    'uint128_t': [259725, 33723, 36799, 35334, 1020148, 1143344],
-    'boost::mp::uint128_t': [1246502, 1437452, 1648131, 1459418, 2216648, 2089105]
-}
-"""
+# Operation order shared by every dataset (matches the x-axis of all charts).
+OPERATIONS = ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo']
 
-"""
-# x86 MSVC
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'std::_Unsigned128': [4215438, 199945, 1206168, 2282869, 5516964, 4551146],
-    'uint128_t': [3883846, 208436, 210874, 2680359, 4328917, 4330152],
-    'boost::mp::uint128_t': [2852442, 3242910, 3851129, 5378001, 6948267, 6294325]
-}
-"""
+# One entry per published graph. Fields:
+#   sign  : 'u128' (unsigned) or 'i128' (signed)  -> selects the *_graphs folder
+#   os    : 'linux' | 'macos' | 'windows'         -> selects the sub-folder
+#   arch  : file stem used by the .adoc image:: directives (casing must match the
+#           image:: targets exactly; ARM stems are always upper-case, e.g. 'ARM64')
+#   title : chart heading prefix, e.g. 'GCC 14 - x64'
+#   data  : implementation -> timings in microseconds, in OPERATIONS order
+# The normalization baseline for the relative chart is detected automatically
+# (native type where present, otherwise Boost.Multiprecision).
+DATASETS = [
+    # ----------------------------- unsigned, Linux -----------------------------
+    {
+        'sign': 'u128', 'os': 'linux', 'arch': 'x64', 'title': 'GCC 16 - x64',
+        'data': {
+            'unsigned __int128': [2555576, 242772, 372481, 356366, 4481403, 3965562],
+            'uint128_t': [2404372, 241336, 260064, 312736, 4498211, 4506879],
+            'boost::mp::uint128_t': [3576079, 328546, 287267, 326328, 4602586, 4487023],
+            'absl::uint128': [2099066, 301186, 282908, 277284, 4290212, 4247367],
+        },
+    },
+    {
+        'sign': 'u128', 'os': 'linux', 'arch': 'ARM64', 'title': 'GCC 13 - ARM64',
+        'data': {
+            'unsigned __int128': [4077924, 137276, 155498, 218009, 2254781, 2274294],
+            'uint128_t': [2335044, 151553, 133470, 233811, 1819447, 1743274],
+            'boost::mp::uint128_t': [5360167, 184406, 186793, 324341, 2211225, 2324356],
+            'absl::uint128': [4184235, 151276, 149111, 293431, 2152312, 2381378],
+        },
+    },
+    {
+        'sign': 'u128', 'os': 'linux', 'arch': 's390x', 'title': 'GCC 13 - s390x',
+        'data': {
+            'unsigned __int128': [7293935, 636224, 572225, 1040424, 4191637, 4156643],
+            'uint128_t': [6198402, 707436, 350035, 741789, 2593472, 2133029],
+            'boost::mp::uint128_t': [8182815, 611849, 595266, 899957, 4106663, 4398856],
+            'absl::uint128': [13820009, 1530136, 1211168, 1843000, 4883553, 5011442],
+        },
+    },
+    {
+        'sign': 'u128', 'os': 'linux', 'arch': 'ppc64le', 'title': 'GCC 14 - ppc64le',
+        'data': {
+            'unsigned __int128': [5242604, 221776, 222894, 194494, 4821119, 4955570],
+            'uint128_t': [4450958, 193063, 175259, 192929, 4896360, 4273487],
+            'boost::mp::uint128_t': [5704848, 847504, 786659, 795187, 5344637, 5407877],
+        },
+    },
+    {
+        'sign': 'u128', 'os': 'linux', 'arch': 'x86', 'title': 'GCC 16 - x86_32',
+        'data': {
+            'uint128_t': [9545542, 686648, 618456, 859253, 8271920, 9932867],
+            'boost::mp::uint128_t': [8582001, 7261481, 7968678, 6746697, 15931092, 10242720],
+        },
+    },
+    {
+        'sign': 'u128', 'os': 'linux', 'arch': 'ARM32', 'title': 'GCC 14 - ARM32',
+        'data': {
+            'uint128_t': [5286033, 454715, 487190, 1471479, 19868087, 20332627],
+            'boost::mp::uint128_t': [4538707, 5543856, 6465126, 8246098, 32820805, 27238658],
+        },
+    },
+    # ---------------------------- unsigned, Windows ----------------------------
+    {
+        'sign': 'u128', 'os': 'windows', 'arch': 'x64', 'title': 'MSVC 14.5 - x64',
+        'data': {
+            'std::_Unsigned128': [2055229, 152603, 150576, 131223, 1476783, 1421066],
+            'uint128_t': [1714007, 116444, 116367, 123694, 1489919, 1411521],
+            'boost::mp::uint128_t': [2490543, 2596037, 2901567, 3300491, 4898388, 3793762],
+        },
+    },
+    {
+        'sign': 'u128', 'os': 'windows', 'arch': 'ARM64', 'title': 'MSVC 14.5 - ARM64',
+        'data': {
+            'std::_Unsigned128': [945196, 37403, 33927, 74384, 992963, 1087702],
+            'uint128_t': [405891, 40039, 38887, 46406, 790846, 861121],
+            'boost::mp::uint128_t': [1306884, 1351728, 1594845, 1281286, 2035065, 1702396],
+        },
+    },
+    {
+        'sign': 'u128', 'os': 'windows', 'arch': 'x86', 'title': 'MSVC 14.5 - x86_32',
+        'data': {
+            'std::_Unsigned128': [4806287, 254275, 1322877, 2327500, 5596877, 4616488],
+            'uint128_t': [3940703, 202421, 207351, 2312040, 5629510, 5696116],
+            'boost::mp::uint128_t': [2624013, 2961566, 3703369, 4375417, 6756883, 6409969],
+        },
+    },
+    # ----------------------------- unsigned, macOS -----------------------------
+    {
+        'sign': 'u128', 'os': 'macos', 'arch': 'ARM64', 'title': 'Clang 22 - ARM64',
+        'data': {
+            'unsigned __int128': [134425, 20754, 20552, 20264, 685358, 733080],
+            'uint128_t': [134742, 18389, 18573, 20150, 740877, 699666],
+            'boost::mp::uint128_t': [133107, 20653, 20590, 20181, 913877, 951657],
+            'absl::uint128': [135182, 20929, 20439, 20228, 718985, 719500],
+        },
+    },
+    # ------------------------------ signed, Linux ------------------------------
+    {
+        'sign': 'i128', 'os': 'linux', 'arch': 'x64', 'title': 'GCC 16 - x64',
+        'data': {
+            '`__int128`': [2232997, 244246, 220957, 433431, 4462364, 4803576],
+            'int128_t': [1970941, 292081, 196953, 321168, 4983165, 5257406],
+            'boost::mp::int128_t': [5478483, 650160, 1625774, 1595688, 4992819, 4988844],
+            'absl::int128': [1944089, 227720, 315611, 304069, 4986970, 5081814],
+        },
+    },
+    {
+        'sign': 'i128', 'os': 'linux', 'arch': 'ARM64', 'title': 'GCC 13 - ARM64',
+        'data': {
+            '`__int128`': [4115337, 194461, 151441, 334847, 2403064, 2235322],
+            'int128_t': [2169531, 196244, 97565, 232518, 1848517, 2159401],
+            'boost::mp::int128_t': [5914108, 543680, 1161677, 904461, 2493904, 2535438],
+            'absl::int128': [3725321, 195216, 192729, 240980, 2431322, 2321638],
+        },
+    },
+    {
+        'sign': 'i128', 'os': 'linux', 'arch': 's390x', 'title': 'GCC 13 - s390x',
+        'data': {
+            '`__int128`': [5171094, 625328, 667538, 904480, 3758577, 4218409],
+            'int128_t': [5069329, 785936, 356865, 729911, 2211087, 2330114],
+            'boost::mp::int128_t': [7457296, 1286888, 2555881, 1562062, 3095993, 3684163],
+            'absl::int128': [5343843, 670826, 741947, 786829, 3940264, 3849849],
+        },
+    },
+    {
+        'sign': 'i128', 'os': 'linux', 'arch': 'ppc64le', 'title': 'GCC 14 - ppc64le',
+        'data': {
+            '`__int128`': [4538094, 221708, 222629, 193315, 5607581, 5623562],
+            'int128_t': [5796198, 191841, 174273, 191785, 4669820, 4750314],
+            'boost::mp::int128_t': [13907323, 1177034, 1861166, 878393, 5616217, 5641480],
+        },
+    },
+    {
+        'sign': 'i128', 'os': 'linux', 'arch': 'x86', 'title': 'GCC 16 - x86_32',
+        'data': {
+            'int128_t': [10310201, 786499, 907051, 855780, 10254664, 10851123],
+            'boost::mp::int128_t': [14160000, 7379646, 7890190, 10826565, 24702433, 17348307],
+        },
+    },
+    {
+        'sign': 'i128', 'os': 'linux', 'arch': 'ARM32', 'title': 'GCC 14 - ARM32',
+        'data': {
+            'int128_t': [6149439, 457850, 488321, 1793874, 17738614, 18064819],
+            'boost::mp::int128_t': [6432579, 5669571, 7464427, 11410321, 38956122, 30144743],
+        },
+    },
+    # ----------------------------- signed, Windows -----------------------------
+    {
+        'sign': 'i128', 'os': 'windows', 'arch': 'x64', 'title': 'MSVC 14.5 - x64',
+        'data': {
+            'std::_Signed128': [1879694, 141120, 157649, 266740, 1387560, 1616895],
+            'int128_t': [1894168, 143877, 156965, 138754, 1752869, 1908345],
+            'boost::mp::int128_t': [5198915, 2846799, 3027203, 4080611, 6924406, 6397442],
+        },
+    },
+    {
+        'sign': 'i128', 'os': 'windows', 'arch': 'ARM64', 'title': 'MSVC 14.3 - ARM64',
+        'data': {
+            'std::_Signed128': [991273, 34519, 34184, 126490, 1128432, 1427629],
+            'int128_t': [391918, 48953, 36278, 36781, 1107571, 1310481],
+            'boost::mp::int128_t': [2551137, 1243326, 1387708, 1632232, 2472959, 2926904],
+        },
+    },
+    {
+        'sign': 'i128', 'os': 'windows', 'arch': 'x86', 'title': 'MSVC 14.5 - x86_32',
+        'data': {
+            'std::_Signed128': [3832024, 232554, 1198377, 2921104, 7174578, 5528639],
+            'int128_t': [3823023, 197092, 145823, 428925, 7189000, 7028725],
+            'boost::mp::int128_t': [5568151, 3488510, 4011233, 6219931, 9748526, 9205892],
+        },
+    },
+    # ------------------------------ signed, macOS ------------------------------
+    {
+        'sign': 'i128', 'os': 'macos', 'arch': 'ARM64', 'title': 'Clang 22 - ARM64',
+        'data': {
+            '`__int128`': [135259, 20399, 20156, 20654, 668004, 664356],
+            'int128_t': [134127, 18575, 18983, 20860, 659823, 662282],
+            'boost::mp::int128_t': [340037, 169575, 168041, 69443, 976248, 1026487],
+            'absl::int128': [136845, 20429, 20875, 20651, 660963, 665474],
+        },
+    },
+]
 
-"""
-# x64 MSVC
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'std::_Unsigned128': [2060556, 261475, 178724, 146063, 1332838, 1465138],
-    'uint128_t': [1921174, 106545, 124181, 136115, 1360295, 1471169],
-    'boost::mp::uint128_t': [3009890, 2710279, 3059187, 3495634, 4852899, 3926336]
-}
-"""
-"""
-# ARM64 macOS
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'unsigned __int128': [131902, 20613, 20484, 20160, 686521, 777084],
-    'uint128_t': [133564, 17912, 18237, 20580, 699201, 724648],
-    'boost::mp::uint128_t': [134182, 40176, 40311, 43285, 945928, 953117],
-    'absl::uint128': [132366, 20178, 20207, 20049, 672398, 734229]
-}
-"""
-"""
-# x64 macOS
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'unsigned __int128': [688225, 104921, 129150, 120363, 2333812, 2621949],
-    'uint128_t': [712352, 124992, 102302, 119652, 1981469, 2219481],
-    'boost::mp::uint128_t': [689146, 137819, 153484, 164100, 2784139, 2736682]
-}
-"""
+# Bar colors by speed rank within an operation: green best, yellow second, red rest.
+RANK_COLORS = {1: '#90EE90', 2: '#FFFFE0'}
+SLOW_COLOR = '#FFB6C1'
 
-# Linux x64
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'unsigned __int128': [785130, 90260, 91143, 111803, 1058435, 1003366],
-    'uint128_t': [765065, 85758, 91449, 90069, 901516, 830830],
-    'boost::mp::uint128_t': [1363581, 89958, 91224, 113559, 1040071, 1001701],
-    'absl::uint128': [766205, 89255, 89716, 89660, 1044710, 978533]
+# Baseline candidates in priority order; first one present in a dataset wins.
+BASELINE_PRIORITY = {
+    'u128': ['unsigned __int128', 'std::_Unsigned128', 'boost::mp::uint128_t'],
+    'i128': ['`__int128`', '__int128', 'std::_Signed128', 'boost::mp::int128_t'],
 }
 
-"""
-# Linux ARM64
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'unsigned __int128': [3427201, 194968, 193067, 263187, 2338258, 2260200],
-    'uint128_t': [2078586, 159662, 161903, 201333, 2247175, 2097760],
-    'boost::mp::uint128_t': [5026689, 587373, 330052, 972009, 2190856, 2227961],
-    'absl::uint128': [3753922, 194070, 140777, 244420, 2223032, 2186750]
-}
-"""
-"""
-# Linux S390x
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'unsigned __int128': [6803419, 546801, 590011, 891753, 3827125, 4925696],
-    'uint128_t': [6280326, 618774, 359100, 1192196, 3201674, 3360251],
-    'boost::mp::uint128_t': [7965082, 621572, 691515, 944289, 3997037, 5144403],
-    'absl::uint128': [10515929, 1744226, 1527622, 1839038, 4913142, 5422155]
-}
-"""
-"""
-# Linux ppc64le
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'unsigned __int128': [5242604, 221776, 222894, 194494, 4821119, 4955570],
-    'uint128_t': [4450958, 193063, 175259, 192929, 4896360, 4273487],
-    'boost::mp::uint128_t': [5704848, 847504, 786659, 795187, 5344637, 5407877]
-}
-"""
-df = pd.DataFrame(data)
-
-# Function to determine color based on ranking
-def get_colors_by_rank(row):
-    values = row[1:].values
-    ranks = np.argsort(values) + 1
-    colors = []
-    for rank in ranks:
-        if rank == 1:
-            colors.append('#90EE90')  # Light Green - Best
-        elif rank == 2:
-            colors.append('#FFFFE0')  # Light Yellow - Second
-        else:
-            colors.append('#FFB6C1')  # Light Red - Third
-    return colors
-
-# Create figure with subplots
-fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
-
-# Prepare data
-operations = df['Operation']
-x = np.arange(len(operations))
-width = 0.25
-
-# Get implementation names
-implementations = df.columns[1:]
-
-# Plot 1: Regular scale bar chart with color coding
-for i, (idx, row) in enumerate(df.iterrows()):
-    colors = get_colors_by_rank(row)
-    for j, impl in enumerate(implementations):
-        ax1.bar(x[i] + (j-1)*width, row[impl], width,
-                color=colors[j], edgecolor='black', linewidth=0.5,
-                label=impl if i == 0 else "")
-
-ax1.set_xlabel('Operations', fontsize=12)
-ax1.set_ylabel('Time (nanoseconds)', fontsize=12)
-ax1.set_title('GCC 14 - x64 Benchmark Results', fontsize=14, fontweight='bold')
-ax1.set_xticks(x)
-ax1.set_xticklabels(operations, rotation=45, ha='right')
-ax1.legend(loc='upper left')
-ax1.grid(axis='y', alpha=0.3)
-
-# Add value labels on bars
-for i, (idx, row) in enumerate(df.iterrows()):
-    for j, impl in enumerate(implementations):
-        ax1.text(x[i] + (j-1)*width, row[impl], f'{row[impl]:,}',
-                 ha='center', va='bottom', fontsize=8, rotation=90)
-
-# Plot 2: Log scale for better visualization
-for i, impl in enumerate(implementations):
-    bars = ax2.bar(x + (i-1)*width, df[impl], width, label=impl, edgecolor='black', linewidth=0.5)
-
-    # Color each bar based on its rank within operation
-    for j, bar in enumerate(bars):
-        operation_values = df.iloc[j, 1:].values
-        rank = np.argsort(operation_values).tolist().index(i) + 1
-        if rank == 1:
-            bar.set_facecolor('#90EE90')
-        elif rank == 2:
-            bar.set_facecolor('#FFFFE0')
-        else:
-            bar.set_facecolor('#FFB6C1')
-
-ax2.set_xlabel('Operations', fontsize=12)
-ax2.set_ylabel('Time (nanoseconds) - Log Scale', fontsize=12)
-ax2.set_title('GCC 14 - x64 Benchmark Results (Log Scale)', fontsize=14, fontweight='bold')
-ax2.set_yscale('log')
-ax2.set_xticks(x)
-ax2.set_xticklabels(operations, rotation=45, ha='right')
-ax2.legend(loc='upper left')
-ax2.grid(axis='y', alpha=0.3, which='both')
-
-plt.tight_layout()
-plt.savefig('x64_benchmarks.png', dpi=300, bbox_inches='tight')
-plt.show()
-
-# Create a normalized performance chart
-fig3, ax3 = plt.subplots(figsize=(10, 6))
-
-# Normalize data relative to unsigned __int128
-normalized_df = df.copy()
-for col in implementations:
-    normalized_df[col] = df[col] / df['unsigned __int128']
-
-# Plot normalized bars
-for i, impl in enumerate(implementations):
-    if impl == 'unsigned __int128':
-        continue  # Skip since it's always 1.0
-    bars = ax3.bar(x + (i-1.5)*width, normalized_df[impl], width,
-                   label=impl, edgecolor='black', linewidth=0.5)
-
-    # Add value labels
-    for j, bar in enumerate(bars):
-        height = bar.get_height()
-        ax3.text(bar.get_x() + bar.get_width()/2., height,
-                 f'{height:.2f}x', ha='center', va='bottom', fontsize=9)
-
-# Add reference line at 1.0
-ax3.axhline(y=1.0, color='red', linestyle='--', alpha=0.5, label='unsigned __int128 baseline')
-
-ax3.set_xlabel('Operations', fontsize=12)
-ax3.set_ylabel('Relative Performance (vs unsigned __int128)', fontsize=12)
-ax3.set_title('Relative Performance Comparison - x64', fontsize=14, fontweight='bold')
-ax3.set_xticks(x)
-ax3.set_xticklabels(operations, rotation=45, ha='right')
-ax3.legend()
-ax3.grid(axis='y', alpha=0.3)
-
-# Add interpretation text
-ax3.text(0.02, 0.98, 'Lower is better', transform=ax3.transAxes,
-         fontsize=10, verticalalignment='top', style='italic')
-
-plt.tight_layout()
-plt.savefig('x64_relative_performance.png', dpi=300, bbox_inches='tight')
-plt.show()
-
-# Generate summary statistics
-print("\nPerformance Summary (x64):")
-print("-" * 50)
-for impl in implementations:
-    if impl == 'unsigned __int128':
-        continue
-    avg_ratio = normalized_df[impl].mean()
-    print(f"{impl}: {avg_ratio:.2f}x average vs unsigned __int128")
-
-print("\nBest performer by operation:")
-print("-" * 50)
-for i, op in enumerate(operations):
-    row_data = df.iloc[i, 1:]
-    best_impl = row_data.idxmin()
-    best_time = row_data.min()
-    print(f"{op}: {best_impl} ({best_time:,} ns)")
 
+# Pick the column every other implementation is compared against.
+def detect_baseline(impls, sign):
+    for candidate in BASELINE_PRIORITY[sign]:
+        if candidate in impls:
+            return candidate
+    return impls[0]
+
+
+# 1-based speed rank per implementation for one operation row (1 == fastest).
+def speed_ranks(values):
+    return np.argsort(np.argsort(values)) + 1
+
+
+def color_for_rank(rank):
+    return RANK_COLORS.get(rank, SLOW_COLOR)
+
+
+# Build the two-panel benchmark figure (linear + log) and save it.
+def save_benchmark_chart(df, impls, x, width, title, path):
+    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
+
+    # Speed rank (1 == fastest) of each implementation, per operation row.
+    rank_by_op = [speed_ranks(df.iloc[op][impls].values) for op in range(len(df))]
+
+    # Linear panel: one rank-colored bar per implementation within each operation.
+    for op_idx, (_, row) in enumerate(df.iterrows()):
+        ranks = rank_by_op[op_idx]
+        for j, impl in enumerate(impls):
+            ax1.bar(x[op_idx] + (j - 1) * width, row[impl], width,
+                    color=color_for_rank(ranks[j]), edgecolor='black', linewidth=0.5,
+                    label=impl if op_idx == 0 else "")
+            ax1.text(x[op_idx] + (j - 1) * width, row[impl], f'{row[impl]:,}',
+                     ha='center', va='bottom', fontsize=8, rotation=90)
+
+    ax1.set_xlabel('Operations', fontsize=12)
+    ax1.set_ylabel('Time (microseconds)', fontsize=12)
+    ax1.set_title(f'{title} Benchmark Results', fontsize=14, fontweight='bold')
+    ax1.set_xticks(x)
+    ax1.set_xticklabels(OPERATIONS, rotation=45, ha='right')
+    ax1.legend(loc='upper left')
+    ax1.grid(axis='y', alpha=0.3)
+
+    # Log panel: same bars and rank colors, log y-axis for the wide dynamic range.
+    # Draw once per implementation (carries the legend label), then recolor each
+    # bar by its rank so the legend entry stays attached.
+    for j, impl in enumerate(impls):
+        bars = ax2.bar(x + (j - 1) * width, df[impl], width, label=impl,
+                       edgecolor='black', linewidth=0.5)
+        for op_idx, bar in enumerate(bars):
+            bar.set_facecolor(color_for_rank(rank_by_op[op_idx][j]))
+
+    ax2.set_xlabel('Operations', fontsize=12)
+    ax2.set_ylabel('Time (microseconds) - Log Scale', fontsize=12)
+    ax2.set_title(f'{title} Benchmark Results (Log Scale)', fontsize=14, fontweight='bold')
+    ax2.set_yscale('log')
+    ax2.set_xticks(x)
+    ax2.set_xticklabels(OPERATIONS, rotation=45, ha='right')
+    ax2.legend(loc='upper left')
+    ax2.grid(axis='y', alpha=0.3, which='both')
+
+    fig.tight_layout()
+    fig.savefig(path, dpi=300, bbox_inches='tight')
+    plt.close(fig)
+
+
+# Build the relative-performance figure (everything normalized to baseline) and save it.
+def save_relative_chart(df, impls, x, width, title, baseline, path):
+    fig, ax = plt.subplots(figsize=(10, 6))
+
+    normalized = df[impls].div(df[baseline], axis=0)
+    for i, impl in enumerate(impls):
+        if impl == baseline:
+            continue
+        bars = ax.bar(x + (i - 1.5) * width, normalized[impl], width,
+                      label=impl, edgecolor='black', linewidth=0.5)
+        for bar in bars:
+            height = bar.get_height()
+            ax.text(bar.get_x() + bar.get_width() / 2., height,
+                    f'{height:.2f}x', ha='center', va='bottom', fontsize=9)
+
+    # Headroom above the tallest bar so its value label and the "lower is better"
+    # note in the top-left corner never collide with the bars.
+    plotted = [impl for impl in impls if impl != baseline]
+    tallest = float(normalized[plotted].to_numpy().max())
+    ax.set_ylim(top=max(tallest * 1.20, 1.12))
+
+    ax.axhline(y=1.0, color='red', linestyle='--', alpha=0.5,
+               label=f'{baseline} baseline')
+    ax.set_xlabel('Operations', fontsize=12)
+    ax.set_ylabel(f'Relative Performance (vs {baseline})', fontsize=12)
+    ax.set_title(f'Relative Performance Comparison - {title}', fontsize=14, fontweight='bold')
+    ax.set_xticks(x)
+    ax.set_xticklabels(OPERATIONS, rotation=45, ha='right')
+    ax.legend()
+    ax.grid(axis='y', alpha=0.3)
+    ax.text(0.02, 0.98, 'Lower is better', transform=ax.transAxes,
+            fontsize=10, verticalalignment='top', style='italic')
+
+    fig.tight_layout()
+    fig.savefig(path, dpi=300, bbox_inches='tight')
+    plt.close(fig)
+
+
+# Render and save both charts for a single dataset; return the two output paths.
+def render_dataset(entry, images_dir):
+    impls = list(entry['data'].keys())
+    df = pd.DataFrame({'Operation': OPERATIONS, **entry['data']})
+    baseline = detect_baseline(impls, entry['sign'])
+
+    x = np.arange(len(OPERATIONS))
+    width = 0.25
+
+    out_dir = os.path.join(images_dir, f"{entry['sign']}_graphs", entry['os'])
+    os.makedirs(out_dir, exist_ok=True)
+    bench_path = os.path.join(out_dir, f"{entry['arch']}_benchmarks.png")
+    rel_path = os.path.join(out_dir, f"{entry['arch']}_relative_performance.png")
+
+    save_benchmark_chart(df, impls, x, width, entry['title'], bench_path)
+    save_relative_chart(df, impls, x, width, entry['title'], baseline, rel_path)
+    return bench_path, rel_path
+
+
+def main():
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    images_dir = os.path.join(script_dir, 'modules', 'ROOT', 'images')
+
+    written = 0
+    for entry in DATASETS:
+        bench_path, rel_path = render_dataset(entry, images_dir)
+        for path in (bench_path, rel_path):
+            print(f"wrote {os.path.relpath(path, script_dir)}")
+            written += 1
+
+    print(f"\nDone: {written} images across {len(DATASETS)} platforms.")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/doc/plots_32bit.py b/doc/plots_32bit.py
deleted file mode 100644
index 4e98830e..00000000
--- a/doc/plots_32bit.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-
-"""
-# Linux x86_32
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'uint128_t': [9000979, 898718, 778881, 1778273, 8496503, 9081442],
-    'boost::mp::uint128_t': [8722814, 9912175, 9773677, 8678420, 18133965, 11257837]
-}
-"""
-# Linux ARM32
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'uint128_t': [5286033, 454715, 487190, 1471479, 19868087, 20332627],
-    'boost::mp::uint128_t': [4538707, 5543856, 6465126, 8246098, 32820805, 27238658]
-}
-
-df = pd.DataFrame(data)
-
-# Function to determine color based on ranking
-def get_colors_by_rank(row):
-    values = row[1:].values
-    ranks = np.argsort(values) + 1
-    colors = []
-    for rank in ranks:
-        if rank == 1:
-            colors.append('#90EE90')  # Light Green - Best
-        elif rank == 2:
-            colors.append('#FFFFE0')  # Light Yellow - Second
-        else:
-            colors.append('#FFB6C1')  # Light Red - Third
-    return colors
-
-# Create figure with subplots
-fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
-
-# Prepare data
-operations = df['Operation']
-x = np.arange(len(operations))
-width = 0.25
-
-# Get implementation names
-implementations = df.columns[1:]
-
-# Plot 1: Regular scale bar chart with color coding
-for i, (idx, row) in enumerate(df.iterrows()):
-    colors = get_colors_by_rank(row)
-    for j, impl in enumerate(implementations):
-        ax1.bar(x[i] + (j-1)*width, row[impl], width,
-                color=colors[j], edgecolor='black', linewidth=0.5,
-                label=impl if i == 0 else "")
-
-ax1.set_xlabel('Operations', fontsize=12)
-ax1.set_ylabel('Time (nanoseconds)', fontsize=12)
-ax1.set_title('GCC 14 - ARM32 Benchmark Results', fontsize=14, fontweight='bold')
-ax1.set_xticks(x)
-ax1.set_xticklabels(operations, rotation=45, ha='right')
-ax1.legend(loc='upper left')
-ax1.grid(axis='y', alpha=0.3)
-
-# Add value labels on bars
-for i, (idx, row) in enumerate(df.iterrows()):
-    for j, impl in enumerate(implementations):
-        ax1.text(x[i] + (j-1)*width, row[impl], f'{row[impl]:,}',
-                 ha='center', va='bottom', fontsize=8, rotation=90)
-
-# Plot 2: Log scale for better visualization
-for i, impl in enumerate(implementations):
-    bars = ax2.bar(x + (i-1)*width, df[impl], width, label=impl, edgecolor='black', linewidth=0.5)
-
-    # Color each bar based on its rank within operation
-    for j, bar in enumerate(bars):
-        operation_values = df.iloc[j, 1:].values
-        rank = np.argsort(operation_values).tolist().index(i) + 1
-        if rank == 1:
-            bar.set_facecolor('#90EE90')
-        elif rank == 2:
-            bar.set_facecolor('#FFFFE0')
-        else:
-            bar.set_facecolor('#FFB6C1')
-
-ax2.set_xlabel('Operations', fontsize=12)
-ax2.set_ylabel('Time (nanoseconds) - Log Scale', fontsize=12)
-ax2.set_title('GCC 14 - ARM32 Benchmark Results (Log Scale)', fontsize=14, fontweight='bold')
-ax2.set_yscale('log')
-ax2.set_xticks(x)
-ax2.set_xticklabels(operations, rotation=45, ha='right')
-ax2.legend(loc='upper left')
-ax2.grid(axis='y', alpha=0.3, which='both')
-
-plt.tight_layout()
-plt.savefig('ARM32_benchmarks.png', dpi=300, bbox_inches='tight')
-plt.show()
-
-# Create a normalized performance chart
-fig3, ax3 = plt.subplots(figsize=(10, 6))
-
-# Normalize data relative to boost::mp::uint128_t
-normalized_df = df.copy()
-for col in implementations:
-    normalized_df[col] = df[col] / df['boost::mp::uint128_t']
-
-# Plot normalized bars
-for i, impl in enumerate(implementations):
-    if impl == 'boost::mp::uint128_t':
-        continue  # Skip since it's always 1.0
-    bars = ax3.bar(x + (i-1.5)*width, normalized_df[impl], width,
-                   label=impl, edgecolor='black', linewidth=0.5)
-
-    # Add value labels
-    for j, bar in enumerate(bars):
-        height = bar.get_height()
-        ax3.text(bar.get_x() + bar.get_width()/2., height,
-                 f'{height:.2f}x', ha='center', va='bottom', fontsize=9)
-
-# Add reference line at 1.0
-ax3.axhline(y=1.0, color='red', linestyle='--', alpha=0.5, label='boost::mp::uint128_t baseline')
-
-ax3.set_xlabel('Operations', fontsize=12)
-ax3.set_ylabel('Relative Performance (vs boost::mp::uint128_t)', fontsize=12)
-ax3.set_title('Relative Performance Comparison - ARM3232', fontsize=14, fontweight='bold')
-ax3.set_xticks(x)
-ax3.set_xticklabels(operations, rotation=45, ha='right')
-ax3.legend()
-ax3.grid(axis='y', alpha=0.3)
-
-# Add interpretation text
-ax3.text(0.02, 0.98, 'Lower is better', transform=ax3.transAxes,
-         fontsize=10, verticalalignment='top', style='italic')
-
-plt.tight_layout()
-plt.savefig('ARM32_relative_performance.png', dpi=300, bbox_inches='tight')
-plt.show()
-
-# Generate summary statistics
-print("\nPerformance Summary (x64):")
-print("-" * 50)
-for impl in implementations:
-    if impl == 'unsigned __int128':
-        continue
-    avg_ratio = normalized_df[impl].mean()
-    print(f"{impl}: {avg_ratio:.2f}x average vs unsigned __int128")
-
-print("\nBest performer by operation:")
-print("-" * 50)
-for i, op in enumerate(operations):
-    row_data = df.iloc[i, 1:]
-    best_impl = row_data.idxmin()
-    best_time = row_data.min()
-    print(f"{op}: {best_impl} ({best_time:,} ns)")
-
diff --git a/doc/signed_plots.py b/doc/signed_plots.py
deleted file mode 100644
index d34e14fb..00000000
--- a/doc/signed_plots.py
+++ /dev/null
@@ -1,223 +0,0 @@
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-"""
-# Linux x64
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    '`__int128`': [879535, 92165, 92514, 115727, 1234838, 1193529],
-    'int128_t': [748787, 92441, 88390, 90897, 1352795, 1256687],
-    'boost::mp::int128_t': [2210502, 283528, 668953, 312723, 1320695, 1287093],
-    'absl::int128': [741269, 92323, 90394, 89558, 1200439, 1293439],
-}
-"""
-"""
-# Linux ARM64
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    '`__int128`': [3495621, 191514, 131380, 236071, 2412757, 2501357],
-    'int128_t': [2279914, 133319, 193984, 234594, 2434752, 2171828],
-    'boost::mp::int128_t': [5910287, 566860, 1066509, 864526, 2508755, 2571959],
-    'absl::int128': [3749448, 164848, 193467, 237676, 2484139, 2158203]
-}
-
-"""
-
-# Linux s390x
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    '`__int128`': [14099505, 1151086, 1223119, 1904542, 8768877, 8661233],
-    'int128_t': [12588237, 1374984, 753561, 2060986, 7080113, 7180650],
-    'boost::mp::int128_t': [21074294, 3303931, 4224613, 3034387, 7306287, 8801605],
-    'absl::int128': [13972778, 1195725, 1295929, 1733150, 7968543, 8175497],
-}
-
-"""
-# Linux ppc64le
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    '`__int128`': [4538094, 221708, 222629, 193315, 5607581, 5623562],
-    'int128_t': [5796198, 191841, 174273, 191785, 4669820, 4750314],
-    'boost::mp::int128_t': [13907323, 1177034, 1861166, 878393, 5616217, 5641480]
-}
-"""
-"""
-# macos x64
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    '`__int128`': [1628142, 224648, 212849, 432205, 3924951, 3042060],
-    'int128_t': [1748005, 180393, 131062, 407829, 2409106, 2423738],
-    'boost::mp::int128_t': [4318109, 925013, 1876834, 651209, 3719183, 4443402]
-}
-"""
-"""
-# macos ARM
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    '`__int128`': [133275, 20203, 20203, 21496, 662767, 719179],
-    'int128_t': [131953, 17797, 17832, 20202, 682891, 692509],
-    'boost::mp::int128_t': [340555, 169909, 172497, 78269, 969277, 1026090],
-    'absl::int128': [133509, 20208, 22199, 20364, 663602, 717897]
-}
-"""
-"""
-# MSVC 14.3 - ARM64
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'std::_Signed128': [911829, 33233, 33411, 117586, 1127267, 1287100],
-    'int128_t': [368104, 34001, 34130, 56324, 1500725, 1548073],
-    'boost::mp::int128_t': [2376802, 121700, 1488822, 1564799, 2808293, 2997474]
-}
-"""
-"""
-# MSVC 14.3 - x64
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'std::_Signed128': [2186843, 186771, 193660, 402806, 1612873, 1637135],
-    'int128_t': [2142626, 184598, 186335, 117413, 2369701, 2218627],
-    'boost::mp::int128_t': [4854983, 2645943, 2925784, 3887479, 6437280, 6236026]
-}
-"""
-"""
-# MSVC 14.3 - x86
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'std::_Signed128': [3495288, 199936, 1089785, 2653505, 7267297, 5779771],
-    'int128_t': [3520950, 212116, 210354, 2595285, 5516460, 5842785],
-    'boost::mp::int128_t': [7877534, 3477656, 4108539, 7030276, 10229356, 9069360]
-}
-"""
-df = pd.DataFrame(data)
-
-# Function to determine color based on ranking
-def get_colors_by_rank(row):
-    values = row[1:].values
-    ranks = np.argsort(values) + 1
-    colors = []
-    for rank in ranks:
-        if rank == 1:
-            colors.append('#90EE90')  # Light Green - Best
-        elif rank == 2:
-            colors.append('#FFFFE0')  # Light Yellow - Second
-        else:
-            colors.append('#FFB6C1')  # Light Red - Third
-    return colors
-
-# Create figure with subplots
-fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
-
-# Prepare data
-operations = df['Operation']
-x = np.arange(len(operations))
-width = 0.25
-
-# Get implementation names
-implementations = df.columns[1:]
-
-# Plot 1: Regular scale bar chart with color coding
-for i, (idx, row) in enumerate(df.iterrows()):
-    colors = get_colors_by_rank(row)
-    for j, impl in enumerate(implementations):
-        ax1.bar(x[i] + (j-1)*width, row[impl], width,
-                color=colors[j], edgecolor='black', linewidth=0.5,
-                label=impl if i == 0 else "")
-
-ax1.set_xlabel('Operations', fontsize=12)
-ax1.set_ylabel('Time (nanoseconds)', fontsize=12)
-ax1.set_title('GCC 13 - s390x Benchmark Results', fontsize=14, fontweight='bold')
-ax1.set_xticks(x)
-ax1.set_xticklabels(operations, rotation=45, ha='right')
-ax1.legend(loc='upper left')
-ax1.grid(axis='y', alpha=0.3)
-
-# Add value labels on bars
-for i, (idx, row) in enumerate(df.iterrows()):
-    for j, impl in enumerate(implementations):
-        ax1.text(x[i] + (j-1)*width, row[impl], f'{row[impl]:,}',
-                 ha='center', va='bottom', fontsize=8, rotation=90)
-
-# Plot 2: Log scale for better visualization
-for i, impl in enumerate(implementations):
-    bars = ax2.bar(x + (i-1)*width, df[impl], width, label=impl, edgecolor='black', linewidth=0.5)
-
-    # Color each bar based on its rank within operation
-    for j, bar in enumerate(bars):
-        operation_values = df.iloc[j, 1:].values
-        rank = np.argsort(operation_values).tolist().index(i) + 1
-        if rank == 1:
-            bar.set_facecolor('#90EE90')
-        elif rank == 2:
-            bar.set_facecolor('#FFFFE0')
-        else:
-            bar.set_facecolor('#FFB6C1')
-
-ax2.set_xlabel('Operations', fontsize=12)
-ax2.set_ylabel('Time (nanoseconds) - Log Scale', fontsize=12)
-ax2.set_title('GCC 13 - s390x Benchmark Results (Log Scale)', fontsize=14, fontweight='bold')
-ax2.set_yscale('log')
-ax2.set_xticks(x)
-ax2.set_xticklabels(operations, rotation=45, ha='right')
-ax2.legend(loc='upper left')
-ax2.grid(axis='y', alpha=0.3, which='both')
-
-plt.tight_layout()
-plt.savefig('s390x_benchmarks.png', dpi=300, bbox_inches='tight')
-plt.show()
-
-# Create a normalized performance chart
-fig3, ax3 = plt.subplots(figsize=(10, 6))
-
-# Normalize data relative to __int128
-normalized_df = df.copy()
-for col in implementations:
-    normalized_df[col] = df[col] / df['`__int128`']
-
-# Plot normalized bars
-for i, impl in enumerate(implementations):
-    if impl == '`__int128`':
-        continue  # Skip since it's always 1.0
-    bars = ax3.bar(x + (i-1.5)*width, normalized_df[impl], width,
-                   label=impl, edgecolor='black', linewidth=0.5)
-
-    # Add value labels
-    for j, bar in enumerate(bars):
-        height = bar.get_height()
-        ax3.text(bar.get_x() + bar.get_width()/2., height,
-                 f'{height:.2f}x', ha='center', va='bottom', fontsize=9)
-
-# Add reference line at 1.0
-ax3.axhline(y=1.0, color='red', linestyle='--', alpha=0.5, label='`__int128` baseline')
-
-ax3.set_xlabel('Operations', fontsize=12)
-ax3.set_ylabel('Relative Performance (vs __int128)', fontsize=12)
-ax3.set_title('Relative Performance Comparison - s390x', fontsize=14, fontweight='bold')
-ax3.set_xticks(x)
-ax3.set_xticklabels(operations, rotation=45, ha='right')
-ax3.legend()
-ax3.grid(axis='y', alpha=0.3)
-
-# Add interpretation text
-ax3.text(0.02, 0.98, 'Lower is better', transform=ax3.transAxes,
-         fontsize=10, verticalalignment='top', style='italic')
-
-plt.tight_layout()
-plt.savefig('s390x_relative_performance.png', dpi=300, bbox_inches='tight')
-plt.show()
-
-# Generate summary statistics
-print("\nPerformance Summary (x64):")
-print("-" * 50)
-for impl in implementations:
-    if impl == '__int128':
-        continue
-    avg_ratio = normalized_df[impl].mean()
-    print(f"{impl}: {avg_ratio:.2f}x average vs __int128")
-
-print("\nBest performer by operation:")
-print("-" * 50)
-for i, op in enumerate(operations):
-    row_data = df.iloc[i, 1:]
-    best_impl = row_data.idxmin()
-    best_time = row_data.min()
-    print(f"{op}: {best_impl} ({best_time:,} ns)")
-
diff --git a/doc/signed_plots_32bit.py b/doc/signed_plots_32bit.py
deleted file mode 100644
index 73dbbdd2..00000000
--- a/doc/signed_plots_32bit.py
+++ /dev/null
@@ -1,154 +0,0 @@
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-
-"""
-# Linux x86_32
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'int128_t': [9530060, 785799, 778881, 1148024, 10337258, 10438037],
-    'boost::mp::int128_t': [12168353, 7777469, 8214089, 9477355, 22857709, 14848256]
-}
-"""
-
-# Linux ARM32
-data = {
-    'Operation': ['Comparisons', 'Addition', 'Subtraction', 'Multiplication', 'Division', 'Modulo'],
-    'int128_t': [6149439, 457850, 488321, 1793874, 17738614, 18064819],
-    'boost::mp::int128_t': [6432579, 5669571, 7464427, 11410321, 38956122, 30144743]
-}
-
-df = pd.DataFrame(data)
-
-# Function to determine color based on ranking
-def get_colors_by_rank(row):
-    values = row[1:].values
-    ranks = np.argsort(values) + 1
-    colors = []
-    for rank in ranks:
-        if rank == 1:
-            colors.append('#90EE90')  # Light Green - Best
-        elif rank == 2:
-            colors.append('#FFFFE0')  # Light Yellow - Second
-        else:
-            colors.append('#FFB6C1')  # Light Red - Third
-    return colors
-
-# Create figure with subplots
-fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
-
-# Prepare data
-operations = df['Operation']
-x = np.arange(len(operations))
-width = 0.25
-
-# Get implementation names
-implementations = df.columns[1:]
-
-# Plot 1: Regular scale bar chart with color coding
-for i, (idx, row) in enumerate(df.iterrows()):
-    colors = get_colors_by_rank(row)
-    for j, impl in enumerate(implementations):
-        ax1.bar(x[i] + (j-1)*width, row[impl], width,
-                color=colors[j], edgecolor='black', linewidth=0.5,
-                label=impl if i == 0 else "")
-
-ax1.set_xlabel('Operations', fontsize=12)
-ax1.set_ylabel('Time (nanoseconds)', fontsize=12)
-ax1.set_title('GCC 14 - ARM32 Benchmark Results', fontsize=14, fontweight='bold')
-ax1.set_xticks(x)
-ax1.set_xticklabels(operations, rotation=45, ha='right')
-ax1.legend(loc='upper left')
-ax1.grid(axis='y', alpha=0.3)
-
-# Add value labels on bars
-for i, (idx, row) in enumerate(df.iterrows()):
-    for j, impl in enumerate(implementations):
-        ax1.text(x[i] + (j-1)*width, row[impl], f'{row[impl]:,}',
-                 ha='center', va='bottom', fontsize=8, rotation=90)
-
-# Plot 2: Log scale for better visualization
-for i, impl in enumerate(implementations):
-    bars = ax2.bar(x + (i-1)*width, df[impl], width, label=impl, edgecolor='black', linewidth=0.5)
-
-    # Color each bar based on its rank within operation
-    for j, bar in enumerate(bars):
-        operation_values = df.iloc[j, 1:].values
-        rank = np.argsort(operation_values).tolist().index(i) + 1
-        if rank == 1:
-            bar.set_facecolor('#90EE90')
-        elif rank == 2:
-            bar.set_facecolor('#FFFFE0')
-        else:
-            bar.set_facecolor('#FFB6C1')
-
-ax2.set_xlabel('Operations', fontsize=12)
-ax2.set_ylabel('Time (nanoseconds) - Log Scale', fontsize=12)
-ax2.set_title('GCC 14 - ARM32 Benchmark Results (Log Scale)', fontsize=14, fontweight='bold')
-ax2.set_yscale('log')
-ax2.set_xticks(x)
-ax2.set_xticklabels(operations, rotation=45, ha='right')
-ax2.legend(loc='upper left')
-ax2.grid(axis='y', alpha=0.3, which='both')
-
-plt.tight_layout()
-plt.savefig('ARM32_benchmarks.png', dpi=300, bbox_inches='tight')
-plt.show()
-
-# Create a normalized performance chart
-fig3, ax3 = plt.subplots(figsize=(10, 6))
-
-# Normalize data relative to boost::mp::int128_t
-normalized_df = df.copy()
-for col in implementations:
-    normalized_df[col] = df[col] / df['boost::mp::int128_t']
-
-# Plot normalized bars
-for i, impl in enumerate(implementations):
-    if impl == 'boost::mp::int128_t':
-        continue  # Skip since it's always 1.0
-    bars = ax3.bar(x + (i-1.5)*width, normalized_df[impl], width,
-                   label=impl, edgecolor='black', linewidth=0.5)
-
-    # Add value labels
-    for j, bar in enumerate(bars):
-        height = bar.get_height()
-        ax3.text(bar.get_x() + bar.get_width()/2., height,
-                 f'{height:.2f}x', ha='center', va='bottom', fontsize=9)
-
-# Add reference line at 1.0
-ax3.axhline(y=1.0, color='red', linestyle='--', alpha=0.5, label='boost::mp::int128_t baseline')
-
-ax3.set_xlabel('Operations', fontsize=12)
-ax3.set_ylabel('Relative Performance (vs boost::mp::int128_t)', fontsize=12)
-ax3.set_title('Relative Performance Comparison - ARM32', fontsize=14, fontweight='bold')
-ax3.set_xticks(x)
-ax3.set_xticklabels(operations, rotation=45, ha='right')
-ax3.legend()
-ax3.grid(axis='y', alpha=0.3)
-
-# Add interpretation text
-ax3.text(0.02, 0.98, 'Lower is better', transform=ax3.transAxes,
-         fontsize=10, verticalalignment='top', style='italic')
-
-plt.tight_layout()
-plt.savefig('ARM32_relative_performance.png', dpi=300, bbox_inches='tight')
-plt.show()
-
-# Generate summary statistics
-print("\nPerformance Summary (x64):")
-print("-" * 50)
-for impl in implementations:
-    if impl == '__int128':
-        continue
-    avg_ratio = normalized_df[impl].mean()
-    print(f"{impl}: {avg_ratio:.2f}x average vs __int128")
-
-print("\nBest performer by operation:")
-print("-" * 50)
-for i, op in enumerate(operations):
-    row_data = df.iloc[i, 1:]
-    best_impl = row_data.idxmin()
-    best_time = row_data.min()
-    print(f"{op}: {best_impl} ({best_time:,} ns)")
-
diff --git a/examples/checked_arithmetic.cpp b/examples/checked_arithmetic.cpp
new file mode 100644
index 00000000..55db9834
--- /dev/null
+++ b/examples/checked_arithmetic.cpp
@@ -0,0 +1,76 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+// Individual headers
+
+#include <boost/int128/utilities.hpp>
+#include <boost/int128/iostream.hpp>
+
+// Or you can do a single header
+
+// #include <boost/int128.hpp>
+
+#include <cstdint>
+#include <limits>
+#include <iostream>
+
+int main()
+{
+    using boost::int128::uint128_t;
+    using boost::int128::int128_t;
+    using boost::int128::ckd_add;
+    using boost::int128::ckd_sub;
+    using boost::int128::ckd_mul;
+
+    std::cout << std::boolalpha;
+
+    // ckd_add, ckd_sub, and ckd_mul implement the C23 stdckdint.h contract: the
+    // operation is evaluated as if both operands had infinite range, the result
+    // is written to *result wrapped to that type's width, and the function
+    // returns true when the exact result did not fit.
+    constexpr auto u_max {std::numeric_limits<uint128_t>::max()};
+    constexpr auto i_max {std::numeric_limits<int128_t>::max()};
+    constexpr auto i_min {std::numeric_limits<int128_t>::min()};
+
+    // A result that fits returns false and holds the exact value.
+    std::cout << "=== Results That Fit ===" << std::endl;
+    int128_t r {};
+    bool overflow {ckd_add(&r, int128_t{20}, int128_t{22})};
+    std::cout << "ckd_add(20, 22): overflow=" << overflow << ", result=" << r << std::endl;
+
+    // Addition that exceeds the type wraps modulo 2^128 and reports overflow.
+    std::cout << "\n=== Addition Overflow ===" << std::endl;
+    uint128_t u {};
+    overflow = ckd_add(&u, u_max, uint128_t{1});
+    std::cout << "ckd_add(UINT128_MAX, 1): overflow=" << overflow << ", wrapped=" << u << std::endl;
+
+    // Subtracting below zero in an unsigned type wraps to the top of the range.
+    std::cout << "\n=== Subtraction Underflow ===" << std::endl;
+    overflow = ckd_sub(&u, uint128_t{0}, uint128_t{1});
+    std::cout << "ckd_sub(0, 1): overflow=" << overflow << ", wrapped=" << u << std::endl;
+
+    // Multiplication detects overflow that operator* would silently roll over,
+    // including INT128_MIN * -1, whose true result is not representable.
+    std::cout << "\n=== Multiplication Overflow ===" << std::endl;
+    overflow = ckd_mul(&r, i_max, int128_t{2});
+    std::cout << "ckd_mul(INT128_MAX, 2): overflow=" << overflow << ", wrapped=" << r << std::endl;
+    overflow = ckd_mul(&r, i_min, int128_t{-1});
+    std::cout << "ckd_mul(INT128_MIN, -1): overflow=" << overflow << ", wrapped=" << r << std::endl;
+
+    // The result type and the two operand types are independent: they may differ
+    // in width and signedness, and the exact mathematical value is always used.
+    std::cout << "\n=== Mixed Types ===" << std::endl;
+    std::int64_t narrow {};
+    overflow = ckd_add(&narrow, uint128_t{5}, int128_t{-3});
+    std::cout << "ckd_add<int64_t>(uint128_t{5}, int128_t{-3}): overflow=" << overflow
+              << ", result=" << narrow << std::endl;
+
+    // Narrow targets make the wrap-around easy to see (400 modulo 256 is 144).
+    std::uint8_t byte {};
+    overflow = ckd_mul(&byte, std::uint8_t{20}, std::uint8_t{20});
+    std::cout << "ckd_mul<uint8_t>(20, 20): overflow=" << overflow
+              << ", wrapped=" << static_cast<int>(byte) << std::endl;
+
+    return 0;
+}
diff --git a/examples/construction.cpp b/examples/construction.cpp
index d7b234b7..1ddfe4dc 100644
--- a/examples/construction.cpp
+++ b/examples/construction.cpp
@@ -29,12 +29,15 @@ int main()
     std::cout << "  Equals numeric_limits max? " << std::boolalpha
               << (max_value == std::numeric_limits<uint128_t>::max()) << std::endl;
 
-    // 3) From user-defined literals (values > 2^64 without splitting)
+    // 3) From user-defined literals.
+    // The library provides only string-form UDLs
+    // For small values like this a string is still parsed rather than direct construction
+    // Using the constructors for values that fit in (unsigned) long long should be preferred for performance
     using namespace boost::int128::literals;
-    const auto from_literal {"36893488147419103232"_U128};  // 2 * 2^64
-    std::cout << "From literal \"36893488147419103232\"_U128: " << from_literal << std::endl;
+    const auto small_literal {12345_U128};
+    std::cout << "From literal 12345_U128: " << small_literal << std::endl;
 
-    // 4) From macro (like UINT64_C but for 128-bit)
+    // 4) From macro (like UINT64_C but for 128-bit), good for values that exceed unsigned long long
     const auto from_macro {BOOST_INT128_UINT128_C(340282366920938463463374607431768211455)};
     std::cout << "From BOOST_INT128_UINT128_C(max): " << from_macro << std::endl;
 
@@ -57,12 +60,21 @@ int main()
     std::cout << "  Equals numeric_limits min? "
               << (min_value == std::numeric_limits<int128_t>::min()) << std::endl;
 
-    // Signed literals (lowercase and uppercase both work)
-    const auto negative_literal {"-99999999999999999999"_i128};
-    std::cout << "From literal \"-99999999999999999999\"_i128: " << negative_literal << std::endl;
+    // Signed literals. Values that fit in unsigned long long can be written
+    // directly; the leading minus is parsed as a unary operator on the
+    // literal result (lowercase and uppercase suffixes both work):
+    const auto negative_literal {-12345_i128};
+    std::cout << "From literal -12345_i128: " << negative_literal << std::endl;
 
-    const auto positive_literal {"99999999999999999999"_I128};
-    std::cout << "From literal \"99999999999999999999\"_I128: " << positive_literal << std::endl;
+    const auto positive_literal {12345_I128};
+    std::cout << "From literal 12345_I128: " << positive_literal << std::endl;
+
+    // For magnitudes beyond unsigned long long you can use the macro or a string literal
+    const auto large_signed {BOOST_INT128_INT128_C(-99999999999999999999)};
+    std::cout << "From BOOST_INT128_INT128_C(-99999999999999999999): " << large_signed << std::endl;
+
+    const auto large_signed_string {"-99999999999999999999"_i128};
+    std::cout << "From string literal: " << large_signed_string << std::endl;
 
     // Signed macro
     const auto from_signed_macro {BOOST_INT128_INT128_C(-170141183460469231731687303715884105728)};
@@ -71,12 +83,52 @@ int main()
     std::cout << "\n=== Default and Copy Construction ===" << std::endl;
 
     // Default construction (zero-initialized)
-    uint128_t default_constructed {};
+    constexpr uint128_t default_constructed {};
     std::cout << "Default constructed: " << default_constructed << std::endl;
 
     // Copy construction
-    uint128_t copied {from_literal};
+    const uint128_t copied {from_macro};
     std::cout << "Copy constructed: " << copied << std::endl;
 
+    std::cout << "\n=== Floating-Point Construction ===" << std::endl;
+
+    // Floating-point construction truncates toward zero, matching the behavior of
+    // a static_cast from a floating-point type to a built-in integer.
+    constexpr uint128_t from_double {12345.9};
+    std::cout << "uint128_t from 12345.9 (truncated): " << from_double << std::endl;
+
+    constexpr int128_t from_negative_double {-12345.9};
+    std::cout << "int128_t from -12345.9 (truncated toward zero): " << from_negative_double << std::endl;
+
+    // Values that exceed the 64-bit range are routed through the full 128-bit decomposition.
+    const double two_to_the_100 {1.2676506002282294e30};  // 2^100
+    const uint128_t large_from_double {two_to_the_100};
+    std::cout << "uint128_t from 2^100: " << large_from_double << std::endl;
+
+    std::cout << "\n=== Floating-Point Edge Cases ===" << std::endl;
+
+    // NaN yields zero for both signed and unsigned (mirrors libgcc's __fix(uns)Xfti).
+    const double nan_value {std::numeric_limits<double>::quiet_NaN()};
+    const uint128_t unsigned_from_nan {nan_value};
+    const int128_t signed_from_nan {nan_value};
+    std::cout << "uint128_t from NaN: " << unsigned_from_nan << std::endl;
+    std::cout << "int128_t from NaN: " << signed_from_nan << std::endl;
+
+    // Negative values are clamped to zero when constructing uint128_t.
+    const uint128_t unsigned_from_negative {-1.0};
+    std::cout << "uint128_t from -1.0 (clamped to zero): " << unsigned_from_negative << std::endl;
+
+    // Positive overflow saturates: anything >= 2^128 (including +infinity) becomes UINT128_MAX.
+    const double infinity {std::numeric_limits<double>::infinity()};
+    const uint128_t saturated_unsigned {infinity};
+    std::cout << "uint128_t from +infinity (saturates to UINT128_MAX): " << saturated_unsigned << std::endl;
+
+    // For int128_t, values >= 2^127 saturate to INT128_MAX and values <= -2^127 saturate to INT128_MIN.
+    const double huge {1e40};  // Well beyond 2^127 (~ 1.7e38)
+    const int128_t saturated_positive {huge};
+    const int128_t saturated_negative {-huge};
+    std::cout << "int128_t from 1e40 (saturates to INT128_MAX): " << saturated_positive << std::endl;
+    std::cout << "int128_t from -1e40 (saturates to INT128_MIN): " << saturated_negative << std::endl;
+
     return 0;
 }
\ No newline at end of file
diff --git a/examples/cuda.cu b/examples/cuda.cu
index ec20577c..b8eeb61c 100644
--- a/examples/cuda.cu
+++ b/examples/cuda.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/examples/math_and_random.cpp b/examples/math_and_random.cpp
index d9897028..7d63d430 100644
--- a/examples/math_and_random.cpp
+++ b/examples/math_and_random.cpp
@@ -2,9 +2,6 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-// Allowing sign conversion is a required pre-requisite for Boost.Random
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <boost/int128.hpp>
 #include <boost/int128/random.hpp> // Not included in the convenience header, but needed for boost.random interop
 
diff --git a/examples/mixed_type_arithmetic.cpp b/examples/mixed_type_arithmetic.cpp
index 1220b227..c06429aa 100644
--- a/examples/mixed_type_arithmetic.cpp
+++ b/examples/mixed_type_arithmetic.cpp
@@ -2,18 +2,15 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
 #include <boost/int128.hpp>
 #include <iostream>
 
 int main()
 {
-    // By default, mixed type arithmetic is NOT ALLOWED
-    // In order for this file to compile #define BOOST_INT128_ALLOW_SIGN_CONVERSION
-    // BEFORE the inclusion of any file of this library (uncomment the top line)
-    //
-    // Unlike builtin types, we cannot enforce sign correctness via a compiler flag,
-    // so we made it the default.
+    // Mixed-sign comparisons and arithmetic between int128_t, uint128_t, and
+    // built-in integer types of opposite signedness follow the C++ usual
+    // arithmetic conversions, identical to the built-in __int128 /
+    // unsigned __int128 types.
 
     std::cout << "=== Mixed Type Arithmetic with uint128_t ===" << std::endl;
 
diff --git a/examples/to_string.cpp b/examples/to_string.cpp
index a359d14e..a259f149 100644
--- a/examples/to_string.cpp
+++ b/examples/to_string.cpp
@@ -34,7 +34,7 @@ int main()
     std::cout << "Match: " << (u_max64_str == u_max64_std) << std::endl;
 
     // Values beyond 64-bit range
-    const auto large_unsigned {"340282366920938463463374607431768211455"_U128};
+    const auto large_unsigned {340282366920938463463374607431768211455_U128};
     std::cout << "\nuint128_t max: " << to_string(large_unsigned) << std::endl;
 
     std::cout << "\n=== to_string with int128_t ===" << std::endl;
@@ -55,10 +55,13 @@ int main()
     std::cout << "Match: " << (s_large_str == s_large_std) << std::endl;
 
     // Values beyond 64-bit range
-    const auto large_negative {"-170141183460469231731687303715884105728"_i128};
-    std::cout << "\nint128_t min: " << to_string(large_negative) << std::endl;
+    const auto large_negative {-170141183460469231731687303715884105728_i128};
+    std::cout << "\nint128_t min with string literal: " << to_string(large_negative) << std::endl;
 
-    const auto large_positive {"170141183460469231731687303715884105727"_I128};
+    const auto large_negative_c {BOOST_INT128_INT128_C(-170141183460469231731687303715884105728)};
+    std::cout << "\nint128_t min with INT128_C macro: " << to_string(large_negative_c) << std::endl;
+
+    const auto large_positive {std::numeric_limits<int128_t>::max()};
     std::cout << "int128_t max: " << to_string(large_positive) << std::endl;
 
     return 0;
diff --git a/include/boost/int128.hpp b/include/boost/int128.hpp
index a75ff5c2..7d996bd7 100644
--- a/include/boost/int128.hpp
+++ b/include/boost/int128.hpp
@@ -15,5 +15,7 @@
 #include <boost/int128/climits.hpp>
 #include <boost/int128/cstdlib.hpp>
 #include <boost/int128/string.hpp>
+#include <boost/int128/utilities.hpp>
+#include <boost/int128/hash.hpp>
 
 #endif // BOOST_INT128_HPP
diff --git a/include/boost/int128/cstdlib.hpp b/include/boost/int128/cstdlib.hpp
index 2839ac39..b90eda39 100644
--- a/include/boost/int128/cstdlib.hpp
+++ b/include/boost/int128/cstdlib.hpp
@@ -61,15 +61,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr i128div_t div(const int12
         return i128div_t{0, 0};
     }
 
-    #if defined(BOOST_INT128_HAS_INT128)
-
-    const auto builtin_x {static_cast<detail::builtin_i128>(x)};
-    const auto builtin_y {static_cast<detail::builtin_i128>(y)};
-    return i128div_t{static_cast<int128_t>(builtin_x / builtin_y),
-                     static_cast<int128_t>(builtin_x % builtin_y)};
-
-    #else
-
     const auto abs_lhs {static_cast<uint128_t>(abs(x))};
     const auto abs_rhs {static_cast<uint128_t>(abs(y))};
 
@@ -78,19 +69,29 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr i128div_t div(const int12
         return {0, x};
     }
 
-    const auto unsigned_res {div(abs_lhs, abs_rhs)};
-
     const auto negative_quot {(x.high < 0) != (y.high < 0)};
     const auto negative_rem {x.high < 0};
 
+    #if defined(BOOST_INT128_HAS_INT128)
+
+    if (abs_rhs.high != 0)
+    {
+        const auto builtin_x {static_cast<detail::builtin_i128>(x)};
+        const auto builtin_y {static_cast<detail::builtin_i128>(y)};
+        return i128div_t{static_cast<int128_t>(builtin_x / builtin_y),
+                         static_cast<int128_t>(builtin_x % builtin_y)};
+    }
+
+    #endif
+
+    const auto unsigned_res {div(abs_lhs, abs_rhs)};
+
     i128div_t res {static_cast<int128_t>(unsigned_res.quot), static_cast<int128_t>(unsigned_res.rem)};
 
     res.quot = negative_quot ? -res.quot : res.quot;
     res.rem = negative_rem ? -res.rem : res.rem;
 
     return res;
-
-    #endif
 }
 
 } // namespace int128
diff --git a/include/boost/int128/detail/common_div.hpp b/include/boost/int128/detail/common_div.hpp
index 3ad30332..0237ead5 100644
--- a/include/boost/int128/detail/common_div.hpp
+++ b/include/boost/int128/detail/common_div.hpp
@@ -7,6 +7,7 @@
 
 #include <boost/int128/detail/config.hpp>
 #include <boost/int128/detail/clz.hpp>
+#include <boost/int128/detail/common_mul.hpp>
 
 #ifndef BOOST_INT128_BUILD_MODULE
 
@@ -67,6 +68,242 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void half_word_div(
     quotient.low |= (remainder / rhs) & UINT32_MAX;
 }
 
+// Portable 128-bit by 64-bit unsigned division producing a 64-bit quotient and remainder.
+// This is the classic Hacker's Delight divlu (two 32-bit "digit" steps over 64-bit words).
+// Precondition: u1 < d so the quotient is guaranteed to fit in 64 bits. It is constexpr-safe
+// and serves as the fallback for udiv_2by1 on every target without a hardware 128/64 divide.
+BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr std::uint64_t divlu(std::uint64_t u1, std::uint64_t u0, std::uint64_t d, std::uint64_t& r) noexcept
+{
+    constexpr std::uint64_t b {UINT64_C(1) << 32U}; // Number base (2^32)
+
+    BOOST_INT128_ASSUME(u1 < d); // LCOV_EXCL_LINE
+
+    // D.1: normalize so that the divisor's most significant bit is set
+    const auto s {countl_zero(d)};
+    d <<= s;
+
+    const auto vn1 {d >> 32U};
+    const auto vn0 {d & UINT32_MAX};
+
+    // Shift the dividend left by s. The (64 - s) shift is undefined when s == 0, so guard it.
+    const auto un32 {s == 0 ? u1 : ((u1 << s) | (u0 >> (64 - s)))};
+    const auto un10 {u0 << s};
+
+    const auto un1 {un10 >> 32U};
+    const auto un0 {un10 & UINT32_MAX};
+
+    // First quotient digit
+    auto q1 {un32 / vn1};
+    auto rhat {un32 - (q1 * vn1)};
+
+    while (q1 >= b || (q1 * vn0) > ((b * rhat) + un1))
+    {
+        --q1;
+        rhat += vn1;
+        if (rhat >= b)
+        {
+            break;
+        }
+    }
+
+    const auto un21 {(un32 * b) + un1 - (q1 * d)};
+
+    // Second quotient digit
+    auto q0 {un21 / vn1};
+    rhat = un21 - (q0 * vn1);
+
+    while (q0 >= b || (q0 * vn0) > ((b * rhat) + un0))
+    {
+        --q0;
+        rhat += vn1;
+        if (rhat >= b)
+        {
+            break;
+        }
+    }
+
+    // The remainder is shifted back down by the normalization amount
+    r = ((un21 * b) + un0 - (q0 * d)) >> s;
+    return (q1 * b) + q0;
+}
+
+#if defined(BOOST_INT128_HAS_X86_64_DIVQ)
+
+// Inline asm cannot appear in a constexpr function body before C++20, so the x86-64 DIV
+// instruction is wrapped in a non-constexpr helper that udiv_2by1 only calls at runtime.
+BOOST_INT128_FORCE_INLINE std::uint64_t udiv_2by1_divq(const std::uint64_t u1, const std::uint64_t u0, const std::uint64_t d, std::uint64_t& r) noexcept
+{
+    std::uint64_t q {};
+    __asm__("divq %[d]" : "=a"(q), "=d"(r) : [d] "r"(d), "a"(u0), "d"(u1) : "cc");
+    return q;
+}
+
+#endif // BOOST_INT128_HAS_X86_64_DIVQ
+
+// Divides the 128-bit value (u1:u0) by d, returning a 64-bit quotient and the true remainder.
+// Precondition: u1 < d. Mirrors common_mul.hpp::umul: a hardware instruction at runtime where
+// one exists, and the portable divlu in constexpr evaluation and everywhere else.
+BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr std::uint64_t udiv_2by1(const std::uint64_t u1, const std::uint64_t u0, const std::uint64_t d, std::uint64_t& r) noexcept
+{
+    BOOST_INT128_ASSUME(u1 < d); // LCOV_EXCL_LINE
+
+    #if (defined(BOOST_INT128_HAS_X86_64_DIVQ) || (defined(_M_AMD64) && !defined(__GNUC__) && !defined(__clang__) && _MSC_VER >= 1920)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION)
+
+    if (!BOOST_INT128_IS_CONSTANT_EVALUATED(u1))
+    {
+        #if defined(BOOST_INT128_HAS_X86_64_DIVQ)
+
+        return udiv_2by1_divq(u1, u0, d, r);
+
+        #else
+
+        return _udiv128(u1, u0, d, &r);
+
+        #endif
+    }
+
+    #endif
+
+    return divlu(u1, u0, d, r);
+}
+
+#if defined(_MSC_VER)
+#  pragma warning(push)
+#  pragma warning(disable : 4127) // Pre c++17 the if constexpr remainder part will hit this
+#endif
+
+// Divides the 128-bit value (uh:ul) by the 128-bit divisor (vh:vl) where vh != 0. Because the
+// divisor is >= 2^64 the quotient is guaranteed to fit in a single 64-bit word, which is
+// returned. When need_remainder is true the 128-bit remainder is written to (rem_hi:rem_lo).
+//
+// This is one normalized quotient digit (Knuth Algorithm D specialized to a 2-word divisor).
+// The top-limb estimate qhat (reusing udiv_2by1, a hardware divq on x86-64) is bounded by
+// Knuth Theorem B to q <= qhat <= q + 2; the D3 refinement against d0 tightens it to q <= qhat
+// <= q + 1, and the conditional add-back then corrects the remaining off-by-one.
+template <bool need_remainder>
+BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr std::uint64_t div3by2(const std::uint64_t uh, const std::uint64_t ul,
+    const std::uint64_t vh, const std::uint64_t vl, std::uint64_t& rem_hi, std::uint64_t& rem_lo) noexcept
+{
+    BOOST_INT128_ASSUME(vh != 0); // LCOV_EXCL_LINE
+
+    // D.1: normalize so the divisor's most significant bit is set
+    const auto s {countl_zero(vh)};
+    const auto cs {64 - s};
+
+    std::uint64_t d1 {};
+    std::uint64_t d0 {};
+    std::uint64_t u2 {};
+    std::uint64_t u1 {};
+    std::uint64_t u0 {};
+
+    if (s == 0)
+    {
+        d1 = vh;
+        d0 = vl;
+        u2 = 0;
+        u1 = uh;
+        u0 = ul;
+    }
+    else
+    {
+        d1 = (vh << s) | (vl >> cs);
+        d0 = vl << s;
+        u2 = uh >> cs;
+        u1 = (uh << s) | (ul >> cs);
+        u0 = ul << s;
+    }
+
+    BOOST_INT128_ASSUME(u2 <= d1); // LCOV_EXCL_LINE
+
+    // D.3: estimate the single quotient digit qhat = floor((u2:u1) / d1), clamped to 2^64 - 1.
+    // rhat is the remainder of that estimate.
+    std::uint64_t qhat {};
+    std::uint64_t rhat {};
+    bool rhat_overflow {false};
+    if (u2 < d1)
+    {
+        qhat = udiv_2by1(u2, u1, d1, rhat);
+    }
+    else
+    {
+        // u2 == d1: floor((u2:u1)/d1) clamps to 2^64 - 1, leaving rhat == u1 + d1 (may carry).
+        qhat = UINT64_MAX;
+        rhat = u1 + d1;
+        rhat_overflow = rhat < u1;
+    }
+
+    std::uint64_t qd0_hi {};
+    auto qd0_lo {umul(qhat, d0, qd0_hi)};
+
+    // Refine qhat against d0 (Knuth D3). The top-limb estimate alone can exceed the true quotient
+    // by up to 2; this brings it down to at most one too large, which the add-back below corrects.
+    // At most two iterations run, and only while the running remainder rhat stays below 2^64.
+    if (!rhat_overflow)
+    {
+        while (qd0_hi > rhat || (qd0_hi == rhat && qd0_lo > u0))
+        {
+            --qhat;
+            rhat += d1;
+            const auto rhat_carry {rhat < d1};
+            qd0_lo = umul(qhat, d0, qd0_hi);
+            if (rhat_carry)
+            {
+                break;
+            }
+        }
+    }
+
+    // D.4: multiply and subtract (u2:u1:u0) - qhat * (d1:d0). qd0 already holds qhat * d0.
+    std::uint64_t qd1_hi {};
+    const auto qd1_lo {umul(qhat, d1, qd1_hi)};
+
+    const auto p0 {qd0_lo};
+    const auto p1 {qd0_hi + qd1_lo};
+    const auto p2 {qd1_hi + static_cast<std::uint64_t>(p1 < qd0_hi)};
+
+    const auto r0 {u0 - p0};
+    const auto borrow0 {static_cast<std::uint64_t>(u0 < p0)};
+    const auto t1 {u1 - p1};
+    auto r1 {t1 - borrow0};
+    const auto borrow1 {static_cast<std::uint64_t>(u1 < p1) + static_cast<std::uint64_t>(t1 < borrow0)};
+
+    // D.5/D.6: if the top limb borrowed, qhat was one too large. Correct it and add the divisor
+    // back into the remainder. The probability of this branch is small.
+    auto r0_final {r0};
+    if (BOOST_INT128_UNLIKELY((u2 < p2) || ((u2 - p2) < borrow1)))
+    {
+        --qhat;                                                  // LCOV_EXCL_LINE
+        const auto sum0 {r0 + d0};                               // LCOV_EXCL_LINE
+        r0_final = sum0;                                         // LCOV_EXCL_LINE
+        r1 = r1 + d1 + static_cast<std::uint64_t>(sum0 < r0);    // LCOV_EXCL_LINE
+    }
+
+    BOOST_INT128_IF_CONSTEXPR (need_remainder)
+    {
+        if (s == 0)
+        {
+            rem_hi = r1;
+            rem_lo = r0_final;
+        }
+        else
+        {
+            rem_lo = (r0_final >> s) | (r1 << cs);
+            rem_hi = r1 >> s;
+        }
+    }
+    else
+    {
+        static_cast<void>(rem_hi);
+        static_cast<void>(rem_lo);
+    }
+
+    return qhat;
+}
+
+#if defined(_MSC_VER)
+#  pragma warning(pop)
+#endif
+
 namespace impl {
 
 #if defined(_MSC_VER)
@@ -267,220 +504,57 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr T from_words(const
     return {static_cast<high_word_type>(high), low};
 }
 
-#if defined(_M_AMD64) && !defined(__GNUC__) && !defined(__clang__) && _MSC_VER >= 1920
-
-template <bool needs_mod, typename T>
-BOOST_INT128_HOST_DEVICE constexpr T div_mod_msvc(T dividend, T divisor, T& remainder)
-{
-    using high_word_type = decltype(T{}.high);
-
-    // Skip normalization if divisor is already large enough
-    // use direct division and intrinsic
-    // This is only possible in the unsigned case
-    BOOST_INT128_IF_CONSTEXPR (!std::numeric_limits<T>::is_signed)
-    {
-        constexpr auto divisor_lower_bound{UINT64_MAX >> 1};
-        if (divisor.high >= divisor_lower_bound)
-        {
-            T quotient{};
-
-            quotient.low = static_cast<std::uint64_t>(dividend.high / divisor.high);
-
-            std::uint64_t product0_high{};
-            auto product0_low{_umul128(quotient.low, divisor.low, &product0_high)};
-
-            std::uint64_t product1_high{};
-            auto product1_low{_umul128(quotient.low, static_cast<std::uint64_t>(divisor.high), &product1_high)};
-
-            T product{};
-            product.low = product0_low;
-            auto carry{BOOST_INT128_ADD_CARRY(0, product0_high, product1_low, reinterpret_cast<std::uint64_t*>(&product.high))};
-            product1_high += static_cast<std::uint64_t>(carry);
-
-            if (product1_high > 0 || product > dividend)
-            {
-                --quotient.low;
-
-                // Recalculate with adjusted quotient
-                product0_low = _umul128(quotient.low, divisor.low, &product0_high);
-                product1_low = _umul128(quotient.low, divisor.high, &product1_high);
-
-                product.low = product0_low;
-                carry = BOOST_INT128_ADD_CARRY(0, product0_high, product1_low, reinterpret_cast<std::uint64_t*>(&product.high));
-                product1_high += static_cast<std::uint64_t>(carry);
-            }
-
-            BOOST_INT128_IF_CONSTEXPR(needs_mod)
-            {
-                auto borrow{BOOST_INT128_SUB_BORROW(0, dividend.low, product.low, &remainder.low)};
-                BOOST_INT128_SUB_BORROW(borrow, dividend.high, product.high, reinterpret_cast<std::uint64_t*>(&remainder.high));
-            }
-
-            return quotient;
-        }
-    }
-
-    const auto shift_amount {countl_zero(static_cast<std::uint64_t>(divisor.high))};
-    divisor <<= shift_amount;
-
-    auto high_digit {static_cast<std::uint64_t>(shift_amount == 0 ? 0 : dividend.high >> (64 - shift_amount))};
-    dividend <<= shift_amount;
-
-    // Initial quotient estimate
-    T quotient {};
-    const bool high_digit_gte_divisor {high_digit >= static_cast<std::uint64_t>(divisor.high)};
-    quotient.high = high_digit_gte_divisor ? 1 : 0;
-    std::uint64_t remainder_estimate {};
-
-    quotient.low = _udiv128(high_digit_gte_divisor ? high_digit - divisor.high : high_digit,
-                            dividend.high, divisor.high, &remainder_estimate);
-
-    // Bounded correction loop with early exit
-    // Typically 2 is the most number of corrections we need since this is only for 2x2 division
-    // Other cases have been filtered out well before we've made it this far
-    int correction_steps {};
-    constexpr int max_corrections {2};
-
-    while (correction_steps < max_corrections)
-    {
-        T product{};
-        product.low = _umul128(quotient.low, divisor.low, reinterpret_cast<std::uint64_t*>(&product.high));
-        if (product <= T{static_cast<high_word_type>(remainder_estimate), dividend.low})
-        {
-            break;
-        }
-
-        --quotient.low;
-        const auto sum {remainder_estimate + divisor.high};
-        if (remainder_estimate > sum)
-        {
-            break;
-        }
-        remainder_estimate = sum;
-
-        correction_steps++;
-    }
-
-    // Final verification and adjustment
-    std::uint64_t product0_high{};
-    auto product_low {_umul128(quotient.low, divisor.low, &product0_high)};
-    auto borrow {BOOST_INT128_SUB_BORROW(0, dividend.low, product_low, &dividend.low)};
-
-    std::uint64_t product1_high{};
-    product_low = _umul128(quotient.low, divisor.high, &product1_high);
-    product1_high += static_cast<std::uint64_t>(BOOST_INT128_ADD_CARRY(0, product_low, product0_high, &product_low));
-
-    borrow = BOOST_INT128_SUB_BORROW(borrow, static_cast<std::uint64_t>(dividend.high), product_low, reinterpret_cast<std::uint64_t*>(&dividend.high));
-    borrow = BOOST_INT128_SUB_BORROW(borrow, high_digit, product1_high, &high_digit);
-    quotient.low -= static_cast<std::uint64_t>(borrow);
-
-    BOOST_INT128_IF_CONSTEXPR (needs_mod)
-    {
-        if (borrow)
-        {
-            auto carry { BOOST_INT128_ADD_CARRY(0, dividend.low, divisor.low, &dividend.low) };
-            BOOST_INT128_ADD_CARRY(carry, static_cast<std::uint64_t>(dividend.high), static_cast<std::uint64_t>(divisor.high), reinterpret_cast<std::uint64_t*>(&dividend.high));
-        }
-
-        dividend >>= shift_amount;
-        remainder = dividend;
-    }
-
-    return quotient;
-}
-
-#endif
-
 } // namespace impl
 
 // We only need to take the time to process the remainder in the modulo case
 // In the division case it is a waste of cycles
+//
+// 128/64 -> 128-bit quotient (and optional 64-bit remainder) by two-step long division.
+// The leading 64/64 yields the high quotient word and a remainder r < rhs, which satisfies
+// the udiv_2by1 precondition for the low quotient word. This covers every rhs (including
+// rhs <= UINT32_MAX) through the single hardware-or-portable udiv_2by1 primitive.
 
 template <typename T>
 BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::uint64_t rhs, T& quotient) noexcept
 {
-    #if defined(_M_AMD64) && !defined(__GNUC__) && !defined(__clang__) && _MSC_VER >= 1920 && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION)
-
-    if (!BOOST_INT128_IS_CONSTANT_EVALUATED(lhs))
-    {
-        using high_word_type = decltype(T{}.high);
-
-        quotient.high = static_cast<high_word_type>(static_cast<std::uint64_t>(lhs.high) / rhs);
-        auto remainder {static_cast<std::uint64_t>(lhs.high) % rhs};
-        quotient.low = _udiv128(remainder, lhs.low, rhs, &remainder);
-        return;
-    }
-
-    #endif
-
-    if (rhs <= UINT32_MAX)
-    {
-        half_word_div(lhs, static_cast<std::uint32_t>(rhs), quotient);
-    }
-    else
-    {
-        std::uint32_t u[4] {};
-        std::uint32_t v[2] {};
-        std::uint32_t q[4] {};
+    using high_word_type = decltype(T{}.high);
 
-        const auto m {impl::to_words(lhs, u)};
-        const auto n {impl::to_words(rhs, v)};
+    BOOST_INT128_ASSUME(rhs != 0); // LCOV_EXCL_LINE
 
-        impl::knuth_divide<false>(u, m, v, n, q);
+    const auto u_high {static_cast<std::uint64_t>(lhs.high)};
 
-        quotient = impl::from_words<T>(q);
-    }
+    quotient.high = static_cast<high_word_type>(u_high / rhs);
+    auto r {u_high % rhs};
+    quotient.low = udiv_2by1(r, lhs.low, rhs, r);
 }
 
 template <typename T>
 BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::uint64_t rhs, T& quotient, T& remainder) noexcept
 {
-    #if defined(_M_AMD64) && !defined(__GNUC__) && !defined(__clang__) && _MSC_VER >= 1920 && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION)
-
-    if (!BOOST_INT128_IS_CONSTANT_EVALUATED(lhs))
-    {
-        using high_word_type = decltype(T{}.high);
-
-        quotient.high = static_cast<high_word_type>(static_cast<std::uint64_t>(lhs.high) / rhs);
-        remainder.low = static_cast<std::uint64_t>(lhs.high) % rhs;
-        quotient.low = _udiv128(remainder.low, lhs.low, rhs, &remainder.low);
-        return;
-    }
-
-    #else
-
-    if (rhs <= UINT32_MAX)
-    {
-        half_word_div(lhs, static_cast<std::uint32_t>(rhs), quotient, remainder);
-    }
-    else
-    {
-        std::uint32_t u[4] {};
-        std::uint32_t v[2] {};
-        std::uint32_t q[4] {};
+    using high_word_type = decltype(T{}.high);
 
-        const auto m {impl::to_words(lhs, u)};
-        const auto n {impl::to_words(rhs, v)};
+    BOOST_INT128_ASSUME(rhs != 0); // LCOV_EXCL_LINE
 
-        impl::knuth_divide<true>(u, m, v, n, q);
+    const auto u_high {static_cast<std::uint64_t>(lhs.high)};
 
-        quotient = impl::from_words<T>(q);
-        remainder = impl::from_words<T>(u);
-    }
+    quotient.high = static_cast<high_word_type>(u_high / rhs);
+    auto r {u_high % rhs};
+    quotient.low = udiv_2by1(r, lhs.low, rhs, r);
 
-    #endif
+    remainder.high = static_cast<high_word_type>(0);
+    remainder.low = r;
 }
 
 template <typename T>
 BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::uint32_t rhs, T& quotient, T& remainder) noexcept
 {
-    half_word_div(lhs, rhs, quotient, remainder);
+    one_word_div(lhs, static_cast<std::uint64_t>(rhs), quotient, remainder);
 }
 
 template <typename T>
 BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void one_word_div(const T& lhs, const std::uint32_t rhs, T& quotient) noexcept
 {
-    half_word_div(lhs, rhs, quotient);
+    one_word_div(lhs, static_cast<std::uint64_t>(rhs), quotient);
 }
 
 #ifdef _MSC_VER
@@ -494,62 +568,33 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr T knuth_div(const T
 {
     BOOST_INT128_ASSUME(divisor != static_cast<T>(0));
 
-    #if defined(_M_AMD64) && !defined(__GNUC__) && !defined(__clang__) && _MSC_VER >= 1920
-
-    BOOST_INT128_IF_CONSTEXPR(!std::numeric_limits<T>::is_signed)
-    {
-        if (!BOOST_INT128_IS_CONSTANT_EVALUATED(dividend))
-        {
-            T remainder{};
-            return impl::div_mod_msvc<false>(dividend, divisor, remainder);
-        }
-    }
-
-    #endif
-
-    std::uint32_t u[4]{};
-    std::uint32_t v[4]{};
-    std::uint32_t q[4]{};
-
-    const auto m{ impl::to_words(dividend, u) };
-    const auto n{ impl::to_words(divisor, v) };
+    using high_word_type = decltype(T{}.high);
 
-    impl::knuth_divide<false>(u, m, v, n, q);
+    std::uint64_t rem_hi {};
+    std::uint64_t rem_lo {};
 
-    return impl::from_words<T>(q);
+    const auto q {div3by2<false>(static_cast<std::uint64_t>(dividend.high), dividend.low,
+                                 static_cast<std::uint64_t>(divisor.high), divisor.low, rem_hi, rem_lo)};
 
+    return T{static_cast<high_word_type>(0), q};
 }
 
 template <typename T>
 BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr T knuth_div(const T& dividend, const T& divisor, T& remainder) noexcept
 {
     BOOST_INT128_ASSUME(divisor != static_cast<T>(0));
-    
-    #if defined(_M_AMD64) && !defined(__GNUC__) && !defined(__clang__) && _MSC_VER >= 1920
-
-    BOOST_INT128_IF_CONSTEXPR(!std::numeric_limits<T>::is_signed)
-    {
-        if (!BOOST_INT128_IS_CONSTANT_EVALUATED(dividend))
-        {
-            return impl::div_mod_msvc<true>(dividend, divisor, remainder);
-        }
-    }
-
 
-    #endif
-
-    std::uint32_t u[4]{};
-    std::uint32_t v[4]{};
-    std::uint32_t q[4]{};
+    using high_word_type = decltype(T{}.high);
 
-    const auto m{ impl::to_words(dividend, u) };
-    const auto n{ impl::to_words(divisor, v) };
+    std::uint64_t rem_hi {};
+    std::uint64_t rem_lo {};
 
-    impl::knuth_divide<true>(u, m, v, n, q);
+    const auto q {div3by2<true>(static_cast<std::uint64_t>(dividend.high), dividend.low,
+                                static_cast<std::uint64_t>(divisor.high), divisor.low, rem_hi, rem_lo)};
 
-    remainder = impl::from_words<T>(u);
+    remainder = T{static_cast<high_word_type>(rem_hi), rem_lo};
 
-    return impl::from_words<T>(q);
+    return T{static_cast<high_word_type>(0), q};
 }
 
 #ifdef _MSC_VER
diff --git a/include/boost/int128/detail/common_mul.hpp b/include/boost/int128/detail/common_mul.hpp
index be26c763..e0c1a8e1 100644
--- a/include/boost/int128/detail/common_mul.hpp
+++ b/include/boost/int128/detail/common_mul.hpp
@@ -10,7 +10,6 @@
 #ifndef BOOST_INT128_BUILD_MODULE
 
 #include <cstdint>
-#include <cstring>
 
 #endif
 
@@ -18,85 +17,89 @@ namespace boost {
 namespace int128 {
 namespace detail {
 
-// See: The Art of Computer Programming Volume 2 (Semi-numerical algorithms) section 4.3.1
-// Algorithm M: Multiplication of Non-negative integers
-template <typename ReturnType, std::size_t u_size, std::size_t v_size>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr ReturnType knuth_multiply(const std::uint32_t (&u)[u_size],
-                                                              const std::uint32_t (&v)[v_size]) noexcept
+// High 64 bits of the 64x64 -> 128 product, computed with four 32-bit partial products
+BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr std::uint64_t umulh_generic(const std::uint64_t a, const std::uint64_t b) noexcept
 {
-    using high_word_type = decltype(ReturnType{}.high);
-
-    std::uint32_t w[u_size + v_size] {};
+    const std::uint64_t a_lo {a & UINT32_MAX};
+    const std::uint64_t a_hi {a >> 32U};
+    const std::uint64_t b_lo {b & UINT32_MAX};
+    const std::uint64_t b_hi {b >> 32U};
 
-    // M.1
-    for (std::size_t j {}; j < v_size; ++j)
-    {
-        // M.2
-        if (v[j] == 0)
-        {
-            w[j + u_size] = 0;
-            continue;
-        }
-
-        // M.3
-        std::uint64_t t {};
-        for (std::size_t i {}; i < u_size; ++i)
-        {
-            // M.4
-            t += static_cast<std::uint64_t>(u[i]) * v[j] + w[i + j];
-            w[i + j] = static_cast<std::uint32_t>(t);
-            t >>= 32u;
-        }
-
-        // M.5
-        w[j + u_size] = static_cast<std::uint32_t>(t);
-    }
+    const std::uint64_t lo_lo {a_lo * b_lo};
+    const std::uint64_t hi_lo {a_hi * b_lo};
+    const std::uint64_t lo_hi {a_lo * b_hi};
+    const std::uint64_t hi_hi {a_hi * b_hi};
 
-    const auto low {static_cast<std::uint64_t>(w[0]) | (static_cast<std::uint64_t>(w[1]) << 32)};
-    const auto high {static_cast<std::uint64_t>(w[2]) | (static_cast<std::uint64_t>(w[3]) << 32)};
+    const std::uint64_t cross {(lo_lo >> 32U) + (hi_lo & UINT32_MAX) + (lo_hi & UINT32_MAX)};
 
-    return {static_cast<high_word_type>(high), low};
+    return hi_hi + (hi_lo >> 32U) + (lo_hi >> 32U) + (cross >> 32U);
 }
 
-template <typename T>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void to_words(const T& x, std::uint32_t (&words)[4]) noexcept
+// Full 64x64 -> 128 product
+BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr std::uint64_t umul(const std::uint64_t a, const std::uint64_t b, std::uint64_t& hi) noexcept
 {
     #ifndef BOOST_INT128_NO_CONSTEVAL_DETECTION
 
-    if (!BOOST_INT128_IS_CONSTANT_EVALUATED(x))
+    if (!BOOST_INT128_IS_CONSTANT_EVALUATED(a))
     {
-        std::memcpy(&words, &x, sizeof(T));
-        return;
+        #if defined(BOOST_INT128_HAS_INT128)
+
+        const detail::builtin_u128 product {static_cast<detail::builtin_u128>(a) * static_cast<detail::builtin_u128>(b)};
+        hi = static_cast<std::uint64_t>(product >> 64U);
+        return static_cast<std::uint64_t>(product);
+
+        #elif defined(_M_AMD64) && !defined(__GNUC__) && !defined(__CUDA_ARCH__)
+
+        return _umul128(a, b, &hi);
+
+        #elif defined(_M_ARM64) && !defined(__CUDA_ARCH__)
+
+        hi = __umulh(a, b);
+        return a * b;
+
+        #endif
     }
 
     #endif
 
-    words[0] = static_cast<std::uint32_t>(x.low & UINT32_MAX);                                  // LCOV_EXCL_LINE
-    words[1] = static_cast<std::uint32_t>(x.low >> 32);                                         // LCOV_EXCL_LINE
-    words[2] = static_cast<std::uint32_t>(static_cast<std::uint64_t>(x.high) & UINT32_MAX);     // LCOV_EXCL_LINE
-    words[3] = static_cast<std::uint32_t>(static_cast<std::uint64_t>(x.high) >> 32);            // LCOV_EXCL_LINE
+    hi = umulh_generic(a, b);
+    return a * b;
 }
 
+// Low 128 bits of a 128x128 product
+template <typename ReturnType, typename T>
+BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr ReturnType low_word_mul(const T& lhs, const T& rhs) noexcept
+{
+    using high_word_type = decltype(ReturnType{}.high);
+
+    std::uint64_t result_high {};
+    const std::uint64_t result_low {umul(lhs.low, rhs.low, result_high)};
+
+    result_high += lhs.low * static_cast<std::uint64_t>(rhs.high);
+    result_high += static_cast<std::uint64_t>(lhs.high) * rhs.low;
+
+    return ReturnType{static_cast<high_word_type>(result_high), result_low};
+}
 
-BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void to_words(const std::uint64_t x, std::uint32_t (&words)[2]) noexcept
+// Low 128 bits of a 128x64 product
+template <typename ReturnType, typename T>
+BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr ReturnType low_word_mul(const T& lhs, const std::uint64_t rhs) noexcept
 {
-    #ifndef BOOST_INT128_NO_CONSTEVAL_DETECTION
+    using high_word_type = decltype(ReturnType{}.high);
 
-    if (!BOOST_INT128_IS_CONSTANT_EVALUATED(x))
-    {
-        std::memcpy(&words, &x, sizeof(std::uint64_t));
-        return;
-    }
+    std::uint64_t result_high {};
+    const std::uint64_t result_low {umul(lhs.low, rhs, result_high)};
 
-    #endif
+    result_high += static_cast<std::uint64_t>(lhs.high) * rhs;
 
-    words[0] = static_cast<std::uint32_t>(x & UINT32_MAX);  // LCOV_EXCL_LINE
-    words[1] = static_cast<std::uint32_t>(x >> 32);         // LCOV_EXCL_LINE
+    return ReturnType{static_cast<high_word_type>(result_high), result_low};
 }
 
-BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr void to_words(const std::uint32_t x, std::uint32_t (&words)[1]) noexcept
+// Low 128 bits of a 128x32 product
+template <typename ReturnType, typename T>
+BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr ReturnType low_word_mul(const T& lhs, const std::uint32_t rhs) noexcept
 {
-    words[0] = x;
+    return low_word_mul<ReturnType>(lhs, static_cast<std::uint64_t>(rhs));
 }
 
 } // namespace detail
diff --git a/include/boost/int128/detail/config.hpp b/include/boost/int128/detail/config.hpp
index 40f95ee8..4a93c2c3 100644
--- a/include/boost/int128/detail/config.hpp
+++ b/include/boost/int128/detail/config.hpp
@@ -5,10 +5,6 @@
 #ifndef BOOST_INT128_DETAIL_CONFIG_HPP
 #define BOOST_INT128_DETAIL_CONFIG_HPP
 
-#if defined(BOOST_INT128_ALLOW_SIGN_CONVERSION) && !defined(BOOST_INT128_ALLOW_SIGN_COMPARE)
-#  define BOOST_INT128_ALLOW_SIGN_COMPARE
-#endif
-
 // Use 128-bit integers
 #if defined(BOOST_HAS_INT128) || (defined(__SIZEOF_INT128__) && !defined(_MSC_VER)) && !defined(BOOST_INT128_NO_BUILTIN_INT128)
 
@@ -45,7 +41,11 @@ using builtin_u128 = unsigned __int128;
 
 #define BOOST_INT128_HAS_MSVC_INT128
 
+#if _MSC_VER >= 1945
+#define BOOST_INT128_BUILTIN_CONSTEXPR constexpr
+#else
 #define BOOST_INT128_BUILTIN_CONSTEXPR inline
+#endif
 
 namespace boost {
 namespace int128 {
@@ -169,6 +169,11 @@ using builtin_u128 = std::_Unsigned128;
 
 #endif // Platform macros
 
+// Hardware 128-bit by 64-bit unsigned division via the x86-64 DIV instruction
+#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) && !defined(_MSC_VER) && !defined(__CUDA_ARCH__)
+#  define BOOST_INT128_HAS_X86_64_DIVQ
+#endif
+
 // The builtin is only constexpr from clang-7 or GCC-10
 #ifdef __has_builtin
 #  if __has_builtin(__builtin_sub_overflow) && ((defined(__clang__) && __clang_major__ >= 7) || (defined(__GNUC__) && __GNUC__ >= 10))
diff --git a/include/boost/int128/detail/conversions.hpp b/include/boost/int128/detail/conversions.hpp
index f471d570..aca90214 100644
--- a/include/boost/int128/detail/conversions.hpp
+++ b/include/boost/int128/detail/conversions.hpp
@@ -38,20 +38,6 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t::uint128_t(const int128_t& v) noexc
 
 #endif // BOOST_INT128_ENDIAN_LITTLE_BYTE
 
-//=====================================
-// Conversion Operators
-//=====================================
-
-BOOST_INT128_HOST_DEVICE constexpr int128_t::operator uint128_t() const noexcept
-{
-    return uint128_t{static_cast<std::uint64_t>(this->high), static_cast<std::uint64_t>(this->low)};
-}
-
-BOOST_INT128_HOST_DEVICE constexpr uint128_t::operator int128_t() const noexcept
-{
-    return int128_t{static_cast<std::int64_t>(this->high), static_cast<std::uint64_t>(this->low)};
-}
-
 //=====================================
 // Comparison Operators
 //=====================================
@@ -64,291 +50,278 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t::operator int128_t() const noexcept
 template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
 BOOST_INT128_HOST_DEVICE constexpr bool operator==(const T lhs, const U rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    static_assert(std::is_same<T, U>::value, "Sign Compare Error, cast one type to the other for this operation");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #else
-
-    BOOST_INT128_IF_CONSTEXPR (std::is_same<T, int128_t>::value)
-    {
-        if (lhs < T{0})
-        {
-            return false;
-        }
-
-        return static_cast<uint128_t>(lhs) == rhs;
-    }
-    else
-    {
-        if (rhs < T{0})
-        {
-            return false;
-        }
-
-        return lhs == static_cast<uint128_t>(rhs);
-    }
-
-    #endif
+    return static_cast<uint128_t>(lhs) == static_cast<uint128_t>(rhs);
 }
 
 template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
 BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const T lhs, const U rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    static_assert(std::is_same<T, U>::value, "Sign Compare Error, cast one type to the other for this operation");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #else
-
-    BOOST_INT128_IF_CONSTEXPR (std::is_same<T, int128_t>::value)
-    {
-        if (lhs < T{0})
-        {
-            return true;
-        }
-
-        return static_cast<uint128_t>(lhs) != rhs;
-    }
-    else
-    {
-        if (rhs < T{0})
-        {
-            return true;
-        }
-
-        return lhs != static_cast<uint128_t>(rhs);
-    }
-
-    #endif
+    return static_cast<uint128_t>(lhs) != static_cast<uint128_t>(rhs);
 }
 
 template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
 BOOST_INT128_HOST_DEVICE constexpr bool operator<(const T lhs, const U rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    static_assert(std::is_same<T, U>::value, "Sign Compare Error, cast one type to the other for this operation");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
+    return static_cast<uint128_t>(lhs) < static_cast<uint128_t>(rhs);
+}
 
-    #else
+template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
+BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const T lhs, const U rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) <= static_cast<uint128_t>(rhs);
+}
 
-    BOOST_INT128_IF_CONSTEXPR (std::is_same<T, int128_t>::value)
-    {
-        if (lhs < T{0})
-        {
-            return true;
-        }
+template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
+BOOST_INT128_HOST_DEVICE constexpr bool operator>(const T lhs, const U rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) > static_cast<uint128_t>(rhs);
+}
 
-        return static_cast<uint128_t>(lhs) < rhs;
-    }
-    else
-    {
-        if (rhs < T{0})
-        {
-            return false;
-        }
+template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
+BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const T lhs, const U rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) >= static_cast<uint128_t>(rhs);
+}
 
-        return lhs < static_cast<uint128_t>(rhs);
-    }
+//=====================================
+// Arithmetic Operators
+//=====================================
 
-    #endif
+template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const T lhs, const U rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) + static_cast<uint128_t>(rhs);
 }
 
 template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const T lhs, const U rhs) noexcept
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const T lhs, const U rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    static_assert(std::is_same<T, U>::value, "Sign Compare Error, cast one type to the other for this operation");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
+    return static_cast<uint128_t>(lhs) - static_cast<uint128_t>(rhs);
+}
 
-    #else
+template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const T lhs, const U rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) * static_cast<uint128_t>(rhs);
+}
 
-    BOOST_INT128_IF_CONSTEXPR (std::is_same<T, int128_t>::value)
-    {
-        if (lhs < T{0})
-        {
-            return true;
-        }
+template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const T lhs, const U rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) / static_cast<uint128_t>(rhs);
+}
 
-        return static_cast<uint128_t>(lhs) <= rhs;
-    }
-    else
-    {
-        if (rhs < T{0})
-        {
-            return false;
-        }
+template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const T lhs, const U rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) % static_cast<uint128_t>(rhs);
+}
 
-        return lhs <= static_cast<uint128_t>(rhs);
-    }
+//=====================================
+// Cross-type Bitwise Operators
+//=====================================
 
-    #endif
+template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const T lhs, const U rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) | static_cast<uint128_t>(rhs);
 }
 
 template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr bool operator>(const T lhs, const U rhs) noexcept
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const T lhs, const U rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    static_assert(std::is_same<T, U>::value, "Sign Compare Error, cast one type to the other for this operation");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
+    return static_cast<uint128_t>(lhs) & static_cast<uint128_t>(rhs);
+}
 
-    #else
+template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const T lhs, const U rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) ^ static_cast<uint128_t>(rhs);
+}
 
-    BOOST_INT128_IF_CONSTEXPR (std::is_same<T, int128_t>::value)
-    {
-        if (lhs < T{0})
-        {
-            return false;
-        }
+//=====================================
+// Cross-type Shift Operators
+//=====================================
 
-        return static_cast<uint128_t>(lhs) > rhs;
-    }
-    else
-    {
-        if (rhs < T{0})
-        {
-            return true;
-        }
+BOOST_INT128_HOST_DEVICE constexpr int128_t operator<<(const int128_t lhs, const uint128_t rhs) noexcept
+{
+    return lhs << static_cast<int128_t>(rhs);
+}
 
-        return lhs > static_cast<uint128_t>(rhs);
-    }
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator<<(const uint128_t lhs, const int128_t rhs) noexcept
+{
+    return lhs << static_cast<uint128_t>(rhs);
+}
 
-    #endif
+BOOST_INT128_HOST_DEVICE constexpr int128_t operator>>(const int128_t lhs, const uint128_t rhs) noexcept
+{
+    return lhs >> static_cast<int128_t>(rhs);
 }
 
-template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const T lhs, const U rhs) noexcept
+BOOST_INT128_HOST_DEVICE constexpr uint128_t operator>>(const uint128_t lhs, const int128_t rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_COMPARE
+    return lhs >> static_cast<uint128_t>(rhs);
+}
 
-    static_assert(std::is_same<T, U>::value, "Sign Compare Error, cast one type to the other for this operation");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
+//=====================================
+// int128_t with builtin unsigned __int128 comparison operators
+//
+// These live here (not in int128_imp.hpp) 
+// to avoid C++20 rewritten-candidate ambiguity on MSVC
+//=====================================
 
-    #else
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-    BOOST_INT128_IF_CONSTEXPR (std::is_same<T, int128_t>::value)
-    {
-        if (lhs < T{0})
-        {
-            return false;
-        }
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) == rhs;
+}
 
-        return static_cast<uint128_t>(lhs) >= rhs;
-    }
-    else
-    {
-        if (rhs < T{0})
-        {
-            return true;
-        }
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+{
+    return lhs == static_cast<uint128_t>(rhs);
+}
 
-        return lhs >= static_cast<uint128_t>(rhs);
-    }
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) != rhs;
+}
 
-    #endif
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+{
+    return lhs != static_cast<uint128_t>(rhs);
 }
 
-//=====================================
-// Arithmetic Operators
-//=====================================
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) < rhs;
+}
 
-template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const T lhs, const U rhs) noexcept
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
+    return lhs < static_cast<uint128_t>(rhs);
+}
 
-    static_assert(std::is_same<T, U>::value, "Sign Conversion Error, cast one type to the other for this operation");
-    static_cast<void>(rhs);
-    return static_cast<uint128_t>(lhs);
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) <= rhs;
+}
 
-    #else
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+{
+    return lhs <= static_cast<uint128_t>(rhs);
+}
 
-    return static_cast<uint128_t>(lhs) + static_cast<uint128_t>(rhs);
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) > rhs;
+}
 
-    #endif
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+{
+    return lhs > static_cast<uint128_t>(rhs);
 }
 
-template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const T lhs, const U rhs) noexcept
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
+    return static_cast<uint128_t>(lhs) >= rhs;
+}
 
-    static_assert(std::is_same<T, U>::value, "Sign Conversion Error, cast one type to the other for this operation");
-    static_cast<void>(rhs);
-    return static_cast<uint128_t>(lhs);
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+{
+    return lhs >= static_cast<uint128_t>(rhs);
+}
 
-    #else
+#endif // BOOST_INT128_HAS_INT128
 
-    return static_cast<uint128_t>(lhs) - static_cast<uint128_t>(rhs);
+//=====================================
+// int128_t with builtin unsigned __int128 binary operators
+//=====================================
+
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-    #endif
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator|(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) | rhs;
 }
 
-template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const T lhs, const U rhs) noexcept
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator|(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
+    return lhs | static_cast<uint128_t>(rhs);
+}
 
-    static_assert(std::is_same<T, U>::value, "Sign Conversion Error, cast one type to the other for this operation");
-    static_cast<void>(rhs);
-    return static_cast<uint128_t>(lhs);
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator&(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) & rhs;
+}
 
-    #else
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator&(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+{
+    return lhs & static_cast<uint128_t>(rhs);
+}
 
-    return static_cast<uint128_t>(lhs) * static_cast<uint128_t>(rhs);
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator^(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) ^ rhs;
+}
 
-    #endif
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator^(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+{
+    return lhs ^ static_cast<uint128_t>(rhs);
 }
 
-template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const T lhs, const U rhs) noexcept
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
+    return static_cast<uint128_t>(lhs) + rhs;
+}
 
-    static_assert(std::is_same<T, U>::value, "Sign Conversion Error, cast one type to the other for this operation");
-    static_cast<void>(rhs);
-    return static_cast<uint128_t>(lhs);
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+{
+    return lhs + static_cast<uint128_t>(rhs);
+}
 
-    #else
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) - rhs;
+}
 
-    return static_cast<uint128_t>(lhs) / static_cast<uint128_t>(rhs);
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+{
+    return lhs - static_cast<uint128_t>(rhs);
+}
 
-    #endif
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator*(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) * rhs;
 }
 
-template <typename T, typename U, std::enable_if_t<detail::is_valid_overload_v<T> && detail::is_valid_overload_v<U> && !std::is_same<T, U>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const T lhs, const U rhs) noexcept
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator*(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
+    return lhs * static_cast<uint128_t>(rhs);
+}
 
-    static_assert(std::is_same<T, U>::value, "Sign Conversion Error, cast one type to the other for this operation");
-    static_cast<void>(rhs);
-    return static_cast<uint128_t>(lhs);
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) / rhs;
+}
 
-    #else
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+{
+    return lhs / static_cast<uint128_t>(rhs);
+}
 
-    return static_cast<uint128_t>(lhs) % static_cast<uint128_t>(rhs);
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+{
+    return static_cast<uint128_t>(lhs) % rhs;
+}
 
-    #endif
+BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+{
+    return lhs % static_cast<uint128_t>(rhs);
 }
 
+#endif // BOOST_INT128_HAS_INT128
+
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp
index a9bdd9ca..898ae962 100644
--- a/include/boost/int128/detail/int128_imp.hpp
+++ b/include/boost/int128/detail/int128_imp.hpp
@@ -56,8 +56,7 @@ int128_t
     constexpr int128_t& operator=(int128_t&&) noexcept = default;
 
     // Requires a conversion file to be implemented
-    BOOST_INT128_HOST_DEVICE explicit constexpr int128_t(const uint128_t& v) noexcept;
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator uint128_t() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr int128_t(const uint128_t& v) noexcept;
 
     // Construct from integral types
     #if BOOST_INT128_ENDIAN_LITTLE_BYTE
@@ -87,41 +86,45 @@ int128_t
     template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
     BOOST_INT128_HOST_DEVICE constexpr int128_t(const UnsignedInteger v) noexcept : high {}, low {static_cast<std::uint64_t>(v)} {}
 
-    #ifdef BOOST_INT128_HAS_INT128
+    #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-    BOOST_INT128_HOST_DEVICE constexpr int128_t(const detail::builtin_i128 v) noexcept : high {static_cast<std::int64_t>(v >> 64U)}, low {static_cast<std::uint64_t>(v & detail::low_word_mask)} {}
-    BOOST_INT128_HOST_DEVICE constexpr int128_t(const detail::builtin_u128 v) noexcept : high {static_cast<std::int64_t>(v >> 64U)}, low {static_cast<std::uint64_t>(v & detail::low_word_mask)} {}
+    BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t(const detail::builtin_i128 v) noexcept : high {static_cast<std::int64_t>(v >> 64U)}, low {static_cast<std::uint64_t>(v & detail::low_word_mask)} {}
+    BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t(const detail::builtin_u128 v) noexcept : high {static_cast<std::int64_t>(v >> 64U)}, low {static_cast<std::uint64_t>(v & detail::low_word_mask)} {}
 
     #endif // BOOST_INT128_HAS_INT128
 
     #endif // BOOST_INT128_ENDIAN_LITTLE_BYTE
 
+    // Construct from floating-point types
+    template <BOOST_INT128_DEFAULTED_FLOATING_POINT_CONCEPT>
+    BOOST_INT128_HOST_DEVICE constexpr int128_t(Float f) noexcept;
+
     // Integer Conversion operators
     BOOST_INT128_HOST_DEVICE explicit constexpr operator bool() const noexcept { return low || high; }
 
     template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator SignedInteger() const noexcept { return static_cast<SignedInteger>(low); }
+    BOOST_INT128_HOST_DEVICE constexpr operator SignedInteger() const noexcept { return static_cast<SignedInteger>(low); }
 
     template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator UnsignedInteger() const noexcept { return static_cast<UnsignedInteger>(low); }
+    BOOST_INT128_HOST_DEVICE constexpr operator UnsignedInteger() const noexcept { return static_cast<UnsignedInteger>(low); }
 
     #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-    BOOST_INT128_HOST_DEVICE explicit BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_i128() const noexcept { return static_cast<detail::builtin_i128>(static_cast<detail::builtin_u128>(high) << static_cast<detail::builtin_u128>(64)) | static_cast<detail::builtin_i128>(low); }
+    BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_i128() const noexcept { return static_cast<detail::builtin_i128>(static_cast<detail::builtin_u128>(high) << static_cast<detail::builtin_u128>(64)) | static_cast<detail::builtin_i128>(low); }
 
-    BOOST_INT128_HOST_DEVICE explicit BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_u128() const noexcept { return (static_cast<detail::builtin_u128>(high) << static_cast<detail::builtin_u128>(64)) | static_cast<detail::builtin_u128>(low); }
+    BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_u128() const noexcept { return (static_cast<detail::builtin_u128>(high) << static_cast<detail::builtin_u128>(64)) | static_cast<detail::builtin_u128>(low); }
 
     #endif // BOOST_INT128_HAS_INT128
 
     // Conversion to float
     // This is basically the same as ldexp(static_cast<T>(high), 64) + static_cast<T>(low),
     // but can be constexpr at C++11 instead of C++26
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator float() const noexcept;
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator float() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator double() const noexcept;
 
     // Long double does not exist on device
     #if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA))
-    explicit constexpr operator long double() const noexcept;
+    constexpr operator long double() const noexcept;
     #endif
 
     // Compound Or
@@ -306,6 +309,62 @@ constexpr int128_t::operator long double() const noexcept
 
 #endif
 
+//=====================================
+// Float Construction
+//=====================================
+
+// Inverse of operator(Float).
+// NaN -> 0;
+// f >= 2^127 -> INT128_MAX;
+// f < -2^127 -> INT128_MIN.
+template <BOOST_INT128_FLOATING_POINT_CONCEPT>
+BOOST_INT128_HOST_DEVICE constexpr int128_t::int128_t(Float f) noexcept
+{
+    constexpr Float two_32 {static_cast<Float>(UINT64_C(1) << 32)};
+    constexpr Float two_64 {two_32 * two_32};
+    constexpr Float two_127 {two_64 * static_cast<Float>(UINT64_C(1) << 63)};
+
+    // NaN: leave default-initialized (zero). NaN compares false to everything,
+    // so neither >= 0 nor <= 0 holds.
+    if (!(f >= Float{0}) && !(f <= Float{0}))
+    {
+        return;
+    }
+
+    if (f >= two_127)
+    {
+        high = (std::numeric_limits<std::int64_t>::max)();
+        low = UINT64_MAX;
+        return;
+    }
+
+    if (f <= -two_127)
+    {
+        high = (std::numeric_limits<std::int64_t>::min)();
+        low = UINT64_C(0);
+        return;
+    }
+
+    const bool negative {f < Float{0}};
+    const Float abs_f {negative ? -f : f};
+
+    std::uint64_t h {static_cast<std::uint64_t>(abs_f / two_64)};
+    const Float remainder {abs_f - static_cast<Float>(h) * two_64};
+    std::uint64_t l {static_cast<std::uint64_t>(remainder)};
+
+    if (negative)
+    {
+        // Two's complement negation of (h, l): new_l = -l (with wraparound),
+        // new_h = ~h if a borrow occurred (l != 0), else ~h + 1.
+        const bool low_was_zero {l == UINT64_C(0)};
+        l = UINT64_C(0) - l;
+        h = ~h + (low_was_zero ? UINT64_C(1) : UINT64_C(0));
+    }
+
+    high = static_cast<std::int64_t>(h);
+    low = l;
+}
+
 //=====================================
 // Unary Operators
 //=====================================
@@ -375,35 +434,13 @@ BOOST_INT128_HOST_DEVICE constexpr bool operator==(const SignedInteger lhs, cons
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator==(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
     return lhs.high == 0 && lhs.low == static_cast<std::uint64_t>(rhs);
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator==(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
     return rhs.high == 0 && rhs.low == static_cast<std::uint64_t>(lhs);
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
@@ -418,36 +455,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<int128_t>(lhs) == rhs;
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return lhs.high < 0 ? false : lhs == static_cast<int128_t>(rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return rhs.high < 0 ? false : static_cast<int128_t>(lhs) == rhs;
-}
-
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #endif // BOOST_INT128_HAS_INT128
 
 //=====================================
@@ -511,35 +518,13 @@ BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const SignedInteger lhs, cons
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
     return lhs.high != 0 || lhs.low != static_cast<std::uint64_t>(rhs);
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
     return rhs.high != 0 || rhs.low != static_cast<std::uint64_t>(lhs);
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
@@ -554,36 +539,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<int128_t>(lhs) != rhs;
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return lhs.high < 0 ? true : lhs != static_cast<int128_t>(rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return rhs.high < 0 ? true : static_cast<int128_t>(lhs) != rhs;
-}
-
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #endif // BOOST_INT128_HAS_INT128
 
 //=====================================
@@ -624,35 +579,13 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator<(const int1
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator<(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return lhs.high < 0 ? true : lhs.low < static_cast<std::uint64_t>(rhs);
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    return lhs.high < 0 || (lhs.high == 0 && lhs.low < static_cast<std::uint64_t>(rhs));
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator<(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return rhs.high < 0 ? false : static_cast<std::uint64_t>(lhs) < rhs.low;
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    return rhs.high > 0 || (rhs.high == 0 && static_cast<std::uint64_t>(lhs) < rhs.low);
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
@@ -700,36 +633,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<int128_t>(lhs) < rhs;
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return lhs.high < 0 ? false : lhs < static_cast<int128_t>(rhs);
-}
-
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return rhs.high < 0 ? true : static_cast<int128_t>(lhs) < rhs;
-}
-
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #endif // BOOST_INT128_HAS_INT128
 
 //=====================================
@@ -782,35 +685,13 @@ BOOST_INT128_HOST_DEVICE constexpr bool operator>(const SignedInteger lhs, const
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator>(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return lhs.high > 0 ? true : lhs.low > static_cast<std::uint64_t>(rhs);
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    return lhs.high > 0 || (lhs.high == 0 && lhs.low > static_cast<std::uint64_t>(rhs));
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator>(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return rhs.high < 0 ? true : static_cast<std::uint64_t>(lhs) > rhs.low;
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    return rhs.high < 0 || (rhs.high == 0 && static_cast<std::uint64_t>(lhs) > rhs.low);
 }
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
@@ -825,36 +706,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<int128_t>(lhs) > rhs;
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return lhs.high < 0 ? false : lhs > static_cast<int128_t>(rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return rhs.high < 0 ? true : static_cast<int128_t>(lhs) > rhs;
-}
-
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #endif // BOOST_INT128_HAS_INT128
 
 //=====================================
@@ -907,35 +758,13 @@ BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const SignedInteger lhs, cons
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return lhs.high < 0 ? true : lhs.low <= static_cast<std::uint64_t>(rhs);
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    return lhs.high < 0 || (lhs.high == 0 && lhs.low <= static_cast<std::uint64_t>(rhs));
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return rhs.high < 0 ? false : static_cast<std::uint64_t>(lhs) <= rhs.low;
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    return rhs.high > 0 || (rhs.high == 0 && static_cast<std::uint64_t>(lhs) <= rhs.low);
 }
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
@@ -950,36 +779,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<int128_t>(lhs) <= rhs;
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return lhs.high < 0 ? true : lhs <= static_cast<int128_t>(rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return rhs.high < 0 ? false : static_cast<int128_t>(lhs) <= rhs;
-}
-
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #endif // BOOST_INT128_HAS_INT128
 
 //=====================================
@@ -1032,35 +831,13 @@ BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const SignedInteger lhs, cons
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return lhs.high < 0 ? false : lhs.low >= static_cast<std::uint64_t>(rhs);
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    return lhs.high > 0 || (lhs.high == 0 && lhs.low >= static_cast<std::uint64_t>(rhs));
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return rhs.high < 0 ? true : static_cast<std::uint64_t>(lhs) >= rhs.low;
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    return rhs.high < 0 || (rhs.high == 0 && static_cast<std::uint64_t>(lhs) >= rhs.low);
 }
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
@@ -1075,36 +852,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<int128_t>(lhs) >= rhs;
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return lhs.high < 0 ? false : lhs >= static_cast<int128_t>(rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return rhs.high < 0 ? true : static_cast<int128_t>(lhs) >= rhs;
-}
-
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #endif // BOOST_INT128_HAS_INT128
 
 //=====================================
@@ -1166,8 +913,6 @@ BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const Signed
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
     if (lhs < rhs)
     {
         return std::strong_ordering::less;
@@ -1180,22 +925,11 @@ BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const int128
     {
         return std::strong_ordering::greater;
     }
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return std::strong_ordering::less;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
     if (lhs < rhs)
     {
         return std::strong_ordering::less;
@@ -1208,15 +942,6 @@ BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const Unsign
     {
         return std::strong_ordering::greater;
     }
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return std::strong_ordering::less;
-
-    #endif
 }
 
 #endif
@@ -1254,78 +979,27 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const SignedInteger lhs, c
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return {lhs.high, lhs.low | static_cast<std::uint64_t>(rhs)};
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return {rhs.high, static_cast<std::uint64_t>(lhs) | rhs.low};
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
-}
-
-#ifdef BOOST_INT128_HAS_INT128
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
-{
-    return lhs | static_cast<int128_t>(rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
-{
-    return static_cast<int128_t>(lhs) | rhs;
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator|(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
     return lhs | static_cast<int128_t>(rhs);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator|(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
 {
     return static_cast<int128_t>(lhs) | rhs;
 }
 
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 #endif // BOOST_INT128_HAS_INT128
 
@@ -1336,11 +1010,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator|(const T, const int128_t) n
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator|=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_signed_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
-    *this = *this | rhs;
+    *this = static_cast<int128_t>(*this | rhs);
     return *this;
 }
 
@@ -1355,11 +1025,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator|=(const int128_t
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator|=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
-    *this = *this | rhs;
+    *this = static_cast<int128_t>(*this | rhs);
     return *this;
 }
 
@@ -1389,92 +1055,37 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const SignedInteger lhs, c
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-    return {lhs.high, lhs.low & static_cast<std::uint64_t>(rhs)};
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
+    return {0, lhs.low & static_cast<std::uint64_t>(rhs)};
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-    return {rhs.high, static_cast<std::uint64_t>(lhs) & rhs.low};
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
+    return {0, static_cast<std::uint64_t>(lhs) & rhs.low};
 }
 
-#ifdef BOOST_INT128_HAS_INT128
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator&(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
     return lhs & static_cast<int128_t>(rhs);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator&(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
 {
     return static_cast<int128_t>(lhs) & rhs;
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+#endif // BOOST_INT128_HAS_INT128
+
+#ifdef BOOST_INT128_HAS_MSVC_INT128
+
+template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
+BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator&=(const Integer rhs) noexcept
 {
-    return lhs & static_cast<int128_t>(rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return static_cast<int128_t>(lhs) & rhs;
-}
-
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator&(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-#endif // BOOST_INT128_HAS_INT128
-
-#ifdef BOOST_INT128_HAS_MSVC_INT128
-
-template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
-BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator&=(const Integer rhs) noexcept
-{
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
-    *this = *this & rhs;
-    return *this;
+    *this = static_cast<int128_t>(*this & rhs);
+    return *this;
 }
 
 #endif // BOOST_INT128_HAS_MSVC_INT128
@@ -1486,11 +1097,7 @@ BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator&=(const Integer rhs
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator&=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_signed_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
-    *this = *this & rhs;
+    *this = static_cast<int128_t>(*this & rhs);
     return *this;
 }
 
@@ -1524,78 +1131,27 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const SignedInteger lhs, c
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return {lhs.high, lhs.low ^ static_cast<std::uint64_t>(rhs)};
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return {rhs.high, static_cast<std::uint64_t>(lhs) ^ rhs.low};
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return int128_t{};
-
-    #endif
 }
 
-#ifdef BOOST_INT128_HAS_INT128
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
-{
-    return lhs ^ static_cast<int128_t>(rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
-{
-    return static_cast<int128_t>(lhs) ^ rhs;
-}
-
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator^(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
     return lhs ^ static_cast<int128_t>(rhs);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator^(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
 {
     return static_cast<int128_t>(lhs) ^ rhs;
 }
 
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 #endif // BOOST_INT128_HAS_INT128
 
@@ -1606,11 +1162,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator^(const T, const int128_t) n
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator^=(Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_signed_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
-    *this = *this ^ rhs;
+    *this = static_cast<int128_t>(*this ^ rhs);
     return *this;
 }
 
@@ -1625,11 +1177,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator^=(int128_t rhs)
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator^=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
-    *this = *this ^ rhs;
+    *this = static_cast<int128_t>(*this ^ rhs);
     return *this;
 }
 
@@ -1706,6 +1254,7 @@ BOOST_INT128_HOST_DEVICE int128_t intrinsic_ls_impl(const int128_t lhs, const In
 
     #ifdef BOOST_INT128_HAS_INT128
 
+    // Left-shifting a negative builtin_i128 is UB pre-C++20
     #  if defined(__aarch64__)
 
     #if defined(__GNUC__) && __GNUC__ >= 8
@@ -1713,8 +1262,8 @@ BOOST_INT128_HOST_DEVICE int128_t intrinsic_ls_impl(const int128_t lhs, const In
     #  pragma GCC diagnostic ignored "-Wclass-memaccess"
     #endif
 
-    builtin_i128 value;
-    std::memcpy(&value, &lhs, sizeof(builtin_i128));
+    builtin_u128 value;
+    std::memcpy(&value, &lhs, sizeof(builtin_u128));
     const auto res {value << rhs};
 
     int128_t return_value;
@@ -1727,7 +1276,7 @@ BOOST_INT128_HOST_DEVICE int128_t intrinsic_ls_impl(const int128_t lhs, const In
 
     #  else
 
-    return static_cast<builtin_i128>(lhs) << rhs;
+    return int128_t{static_cast<builtin_u128>(lhs) << rhs};
 
     #  endif
 
@@ -1807,9 +1356,9 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator<<(const int128_t lhs, const
     return lhs << rhs.low;
 }
 
-#ifdef BOOST_INT128_HAS_INT128
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 operator<<(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR detail::builtin_u128 operator<<(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
 {
     constexpr auto bit_width {sizeof(detail::builtin_u128) * 8};
 
@@ -1817,11 +1366,11 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 oper
     {
         return 0;
     }
-
-    return lhs << rhs.low;
+    
+    return lhs << static_cast<detail::builtin_u128>(rhs.low);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 operator<<(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR detail::builtin_i128 operator<<(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
 {
     constexpr auto bit_width {sizeof(detail::builtin_i128) * 8};
 
@@ -1830,7 +1379,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 oper
         return 0;
     }
 
-    return lhs << rhs.low;
+    return lhs << static_cast<detail::builtin_u128>(rhs.low);
 }
 
 #endif
@@ -1869,7 +1418,7 @@ BOOST_INT128_HOST_DEVICE constexpr unsigned operator<<(const UnsignedInteger lhs
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator<<=(const Integer rhs) noexcept
 {
-    *this = *this << rhs;
+    *this = static_cast<int128_t>(*this << rhs);
     return *this;
 }
 
@@ -1884,11 +1433,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator<<=(const int128_
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator<<=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
-    *this = *this << rhs;
+    *this = static_cast<int128_t>(*this << rhs);
     return *this;
 }
 
@@ -2061,9 +1606,9 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator>>(const
     return lhs >> rhs.low;
 }
 
-#ifdef BOOST_INT128_HAS_INT128
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 operator>>(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR detail::builtin_u128 operator>>(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
 {
     constexpr auto bit_width {sizeof(detail::builtin_u128) * 8};
 
@@ -2072,10 +1617,10 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 oper
         return 0;
     }
 
-    return lhs >> rhs.low;
+    return lhs >> static_cast<detail::builtin_u128>(rhs.low);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 operator>>(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR detail::builtin_i128 operator>>(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
 {
     constexpr auto bit_width {sizeof(detail::builtin_i128) * 8};
 
@@ -2084,7 +1629,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 oper
         return 0;
     }
 
-    return lhs >> rhs.low;
+    return lhs >> static_cast<detail::builtin_u128>(rhs.low);
 }
 
 #endif
@@ -2123,7 +1668,7 @@ BOOST_INT128_HOST_DEVICE constexpr unsigned operator>>(const UnsignedInteger lhs
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator>>=(const Integer rhs) noexcept
 {
-    *this = *this >> rhs;
+    *this = static_cast<int128_t>(*this >> rhs);
     return *this;
 }
 
@@ -2138,11 +1683,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator>>=(const int128_
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator>>=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
-    *this = *this >> rhs;
+    *this = static_cast<int128_t>(*this >> rhs);
     return *this;
 }
 
@@ -2214,7 +1755,8 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_ad
 {
     #if (defined(__x86_64__) || (defined(__aarch64__) && !defined(__APPLE__))) && !defined(_WIN32) && defined(BOOST_INT128_HAS_INT128)
 
-    return static_cast<int128_t>(static_cast<detail::builtin_i128>(lhs) + static_cast<detail::builtin_i128>(rhs));
+    // Compute in the unsigned domain so that overflow wraps modulo 2^128
+    return int128_t{static_cast<detail::builtin_u128>(lhs) + static_cast<detail::builtin_u128>(rhs)};
 
     #elif defined(BOOST_INT128_HAS_BUILTIN_ADD_OVERFLOW)
 
@@ -2276,7 +1818,8 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_su
 
     #elif defined(__aarch64__) && !defined(__APPLE__)
 
-    return static_cast<int128_t>(static_cast<detail::builtin_i128>(lhs) - static_cast<detail::builtin_i128>(rhs));
+    // Unsigned wrap for consistent two's-complement semantics
+    return int128_t{static_cast<detail::builtin_u128>(lhs) - static_cast<detail::builtin_u128>(rhs)};
 
     #elif defined(_M_AMD64) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION)
 
@@ -2331,35 +1874,13 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return detail::default_add(lhs, rhs);
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return detail::default_add(rhs, lhs);
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
@@ -2376,35 +1897,6 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator+(const SignedInteger lhs, c
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return detail::default_add(lhs, static_cast<int128_t>(rhs));
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return detail::default_add(rhs, static_cast<int128_t>(lhs));
-}
-
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
@@ -2421,11 +1913,7 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator+(const
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator+=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_signed_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
-    *this = *this + rhs;
+    *this = static_cast<int128_t>(*this + rhs);
     return *this;
 }
 
@@ -2440,7 +1928,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator+=(const int128_t
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator+=(const Integer rhs) noexcept
 {
-    *this = *this + rhs;
+    *this = static_cast<int128_t>(*this + rhs);
     return *this;
 }
 
@@ -2458,35 +1946,13 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return detail::default_sub(lhs, rhs);
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return detail::default_add(-rhs, lhs);
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
@@ -2503,35 +1969,6 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator-(const SignedInteger lhs, c
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return lhs - static_cast<int128_t>(rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return static_cast<int128_t>(lhs) - rhs;
-}
-
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator-(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
@@ -2548,11 +1985,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int1
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator-=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_signed_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
-    *this = *this - rhs;
+    *this = static_cast<int128_t>(*this - rhs);
     return *this;
 }
 
@@ -2567,7 +2000,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator-=(const int128_t
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator-=(const Integer rhs) noexcept
 {
-    *this = *this - rhs;
+    *this = static_cast<int128_t>(*this - rhs);
     return *this;
 }
 
@@ -2579,57 +2012,14 @@ BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator-=(const Integer rhs
 
 namespace detail {
 
-BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t signed_shift_left_32(const std::uint64_t low) noexcept
-{
-    return {static_cast<std::int64_t>(low >> 32), low << 32};
-}
-
-BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t library_mul(const int128_t lhs, const int128_t rhs) noexcept
-{
-    const auto a {lhs.low >> 32U};
-    const auto b {lhs.low & UINT32_MAX};
-    const auto c {rhs.low >> 32U};
-    const auto d {rhs.low & UINT32_MAX};
-
-    int128_t result { static_cast<std::int64_t>(static_cast<std::uint64_t>(lhs.high) * rhs.low + static_cast<std::uint64_t>(lhs.low) * rhs.high + a * c), b * d };
-    result += signed_shift_left_32(a * d) + signed_shift_left_32(b * c);
-
-    return result;
-}
-
 BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, const std::uint64_t rhs) noexcept
 {
-    const auto low_res{lhs.low * rhs};
-
-    const auto a_lo{lhs.low & UINT32_MAX};
-    const auto a_high{lhs.low >> 32U};
-    const auto b_lo{rhs & UINT32_MAX};
-    const auto b_high{rhs >> 32U};
-
-    const auto lo_lo{a_lo * b_lo};
-    const auto lo_hi{a_lo * b_high};
-    const auto hi_lo{a_high * b_lo};
-    const auto hi_hi{a_high * b_high};
-
-    const auto mid{(lo_lo >> 32U) + (lo_hi & UINT32_MAX) + (hi_lo & UINT32_MAX)};
-
-    const auto carry{hi_hi + (lo_hi >> 32) + (hi_lo >> 32) + (mid >> 32)};
-
-    const auto high_res{lhs.high * static_cast<std::int64_t>(rhs) + static_cast<std::int64_t>(carry)};
-
-    return {high_res, low_res};
+    return low_word_mul<int128_t>(lhs, rhs);
 }
 
 BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_mul(const int128_t lhs, const std::uint32_t rhs) noexcept
 {
-    const auto low_res{lhs.low * rhs};
-
-    const auto a_hi{lhs.low >> 32U};
-    const auto hi_lo{a_hi * rhs};
-
-    const auto high_res{lhs.high * static_cast<std::int64_t>(rhs) + static_cast<std::int64_t>(hi_lo)};
-
-    return {high_res, low_res};
+    return low_word_mul<int128_t>(lhs, rhs);
 }
 
 #if defined(_M_AMD64) && !defined(__GNUC__)
@@ -2654,7 +2044,7 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_mu
 
     if (BOOST_INT128_IS_CONSTANT_EVALUATED(lhs))
     {
-        return library_mul(lhs, rhs);
+        return low_word_mul<int128_t>(lhs, rhs);
     }
     else
     {
@@ -2679,50 +2069,33 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr int128_t default_mu
 
     #  elif defined(BOOST_INT128_HAS_INT128)
 
-    return static_cast<int128_t>(static_cast<detail::builtin_i128>(lhs) * static_cast<detail::builtin_i128>(rhs));
+    // Unsigned wrap for consistent two's-complement semantics
+    return int128_t{static_cast<detail::builtin_u128>(lhs) * static_cast<detail::builtin_u128>(rhs)};
 
     #  else
 
-    return library_mul(lhs, rhs);
+    return low_word_mul<int128_t>(lhs, rhs);
 
     #  endif
 
     #elif defined(__aarch64__) && defined(BOOST_INT128_HAS_INT128)
 
-    return static_cast<int128_t>(static_cast<detail::builtin_i128>(lhs) * static_cast<detail::builtin_i128>(rhs));
+    return int128_t{static_cast<detail::builtin_u128>(lhs) * static_cast<detail::builtin_u128>(rhs)};
 
     #elif defined(_M_AMD64) && !defined(__GNUC__) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION)
 
     if (BOOST_INT128_IS_CONSTANT_EVALUATED(rhs))
     {
-        return library_mul(lhs, rhs); // LCOV_EXCL_LINE
+        return low_word_mul<int128_t>(lhs, rhs); // LCOV_EXCL_LINE
     }
     else
     {
         return msvc_amd64_mul(lhs, rhs);
     }
 
-    #elif (defined(_M_IX86) || defined(_M_ARM) || defined(__arm__)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION)
-
-    if (BOOST_INT128_IS_CONSTANT_EVALUATED(rhs))
-    {
-        return library_mul(lhs, rhs); // LCOV_EXCL_LINE
-    }
-    else
-    {
-        std::uint32_t lhs_words[4] {};
-        std::uint32_t rhs_words[4] {};
-
-        // Since in all likelihood this equates to memcpy we don't need to convert to non-negative integers and back
-        to_words(lhs, lhs_words);
-        to_words(rhs, rhs_words);
-
-        return knuth_multiply<int128_t>(lhs_words, rhs_words);
-    }
-
     #else
 
-    return library_mul(lhs, rhs);
+    return low_word_mul<int128_t>(lhs, rhs);
 
     #endif
 }
@@ -2737,37 +2110,15 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     using local_eval_type = detail::evaluation_type_t<UnsignedInteger>;
     return detail::default_mul(lhs, static_cast<local_eval_type>(rhs));
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     using local_eval_type = detail::evaluation_type_t<UnsignedInteger>;
     return detail::default_mul(rhs, static_cast<local_eval_type>(lhs));
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
 }
 
 #ifdef _MSC_VER
@@ -2793,44 +2144,14 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const SignedInteger lhs, c
 #  pragma warning(pop)
 #endif
 
-#ifdef BOOST_INT128_HAS_INT128
-
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return static_cast<int128_t>(static_cast<detail::builtin_i128>(lhs) * rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return static_cast<int128_t>(static_cast<detail::builtin_i128>(rhs) * lhs);
-}
-
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator*(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
     return detail::default_mul(lhs, static_cast<int128_t>(rhs));
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator*(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
 {
     return detail::default_mul(rhs, static_cast<int128_t>(lhs));
 }
@@ -2840,11 +2161,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator*(const
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator*=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_signed_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
-    *this = *this * rhs;
+    *this = static_cast<int128_t>(*this * rhs);
     return *this;
 }
 
@@ -2859,7 +2176,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator*=(const int128_t
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator*=(const Integer rhs) noexcept
 {
-    *this = *this * rhs;
+    *this = static_cast<int128_t>(*this * rhs);
     return *this;
 }
 
@@ -2889,21 +2206,16 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const
     {
         return {0,0};
     }
-    #if defined(BOOST_INT128_HAS_INT128)
-
-    return static_cast<int128_t>(static_cast<detail::builtin_i128>(lhs) / static_cast<detail::builtin_i128>(rhs));
-
-    #else
 
-    int128_t quotient {};
     const auto negative_res {(lhs.high < 0) != (rhs.high < 0)};
 
-    if (abs_rhs.high != 0)
-    {
-        quotient = detail::knuth_div(abs_lhs, abs_rhs);
-    }
-    else
+    // Narrow fast path: when the divisor magnitude fits in 64 bits, divide the magnitudes with
+    // the hardware-accelerated one_word_div and reapply the sign. This reuses the abs values
+    // computed above and beats native signed division (the out-of-line __divti3) for this case.
+    if (abs_rhs.high == 0)
     {
+        int128_t quotient {};
+
         if (abs_lhs.high == 0)
         {
             quotient = {0, abs_lhs.low / abs_rhs.low};
@@ -2912,17 +2224,25 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const
         {
             detail::one_word_div(abs_lhs, abs_rhs.low, quotient);
         }
+
+        return negative_res ? -quotient : quotient;
     }
 
+    #if defined(BOOST_INT128_HAS_INT128)
+
+    return static_cast<int128_t>(static_cast<detail::builtin_i128>(lhs) / static_cast<detail::builtin_i128>(rhs));
+
+    #else
+
+    const auto quotient {detail::knuth_div(abs_lhs, abs_rhs)};
     return negative_res ? -quotient : quotient;
+
     #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     using eval_type = detail::evaluation_type_t<UnsignedInteger>;
 
     if (BOOST_INT128_UNLIKELY(rhs == 0))
@@ -2935,22 +2255,11 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t lhs, const
     int128_t quotient {};
     detail::one_word_div(abs_lhs, static_cast<eval_type>(rhs), quotient);
     return lhs < 0 ? -quotient : quotient;
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     if (BOOST_INT128_UNLIKELY(rhs == 0))
     {
         return {0, 0};
@@ -2967,15 +2276,6 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const UnsignedInteger lhs,
         const int128_t result {0, res};
         return rhs < 0 ? -result : result;
     }
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
@@ -3028,79 +2328,21 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const SignedInteger lhs, c
     }
 }
 
-#ifdef BOOST_INT128_HAS_INT128
-
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return static_cast<int128_t>(static_cast<detail::builtin_i128>(lhs) / rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return static_cast<int128_t>(lhs / static_cast<detail::builtin_i128>(rhs));
-}
-
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator/(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
     return static_cast<int128_t>(static_cast<detail::builtin_i128>(lhs) / rhs);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator/(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator/(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
 {
     return static_cast<int128_t>(lhs / static_cast<detail::builtin_i128>(rhs));
 }
 
 #elif defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator/(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return lhs / static_cast<int128_t>(rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator/(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return static_cast<int128_t>(lhs) / rhs;
-}
-
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE inline int128_t operator/(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE inline int128_t operator/(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator/(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
@@ -3117,11 +2359,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator/(const det
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator/=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_signed_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
-    *this = *this / rhs;
+    *this = static_cast<int128_t>(*this / rhs);
     return *this;
 }
 
@@ -3136,7 +2374,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator/=(const int128_t
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator/=(const Integer rhs) noexcept
 {
-    *this = *this / rhs;
+    *this = static_cast<int128_t>(*this / rhs);
     return *this;
 }
 
@@ -3169,8 +2407,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(int128
 template <BOOST_INT128_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     using eval_type = detail::evaluation_type_t<UnsignedInteger>;
 
     if (BOOST_INT128_UNLIKELY(rhs == 0))
@@ -3186,22 +2422,11 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const
     detail::one_word_div(abs_lhs, static_cast<eval_type>(rhs), quotient, remainder);
 
     return lhs < 0 ? -remainder : remainder;
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
 }
 
 template <BOOST_INT128_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const UnsignedInteger lhs, const int128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     using eval_type = detail::evaluation_type_t<UnsignedInteger>;
 
     if (BOOST_INT128_UNLIKELY(rhs == 0))
@@ -3219,15 +2444,6 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const UnsignedInteger lhs,
     const int128_t remainder {0, static_cast<eval_type>(lhs) % abs_rhs.low};
 
     return remainder;
-
-    #else
-
-    static_assert(detail::is_signed_integer_v<UnsignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return {0, 0};
-
-    #endif
 }
 
 template <BOOST_INT128_SIGNED_INTEGER_CONCEPT>
@@ -3257,23 +2473,15 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const
     {
         return lhs;
     }
-    #if defined(BOOST_INT128_HAS_INT128)
-    else
-    {
-        return static_cast<int128_t>(static_cast<detail::builtin_i128>(lhs) % static_cast<detail::builtin_i128>(rhs));
-    }
-    #else
 
-    const auto is_neg{lhs < 0};
-    
-    int128_t remainder {};
+    const auto is_neg {lhs < 0};
 
-    if (abs_rhs.high != 0)
-    {
-        detail::knuth_div(abs_lhs, abs_rhs, remainder);
-    }
-    else
+    // Narrow fast path: when the divisor magnitude fits in 64 bits, take the remainder of the
+    // magnitudes with the hardware-accelerated one_word_div and reapply the dividend's sign.
+    if (abs_rhs.high == 0)
     {
+        int128_t remainder {};
+
         if (abs_lhs.high == 0)
         {
             remainder = int128_t{0, abs_lhs.low % abs_rhs.low};
@@ -3281,57 +2489,37 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const
         else
         {
             int128_t quotient {};
-
             detail::one_word_div(abs_lhs, abs_rhs.low, quotient, remainder);
         }
+
+        return is_neg ? -remainder : remainder;
     }
 
+    #if defined(BOOST_INT128_HAS_INT128)
+
+    return static_cast<int128_t>(static_cast<detail::builtin_i128>(lhs) % static_cast<detail::builtin_i128>(rhs));
+
+    #else
+
+    int128_t remainder {};
+    detail::knuth_div(abs_lhs, abs_rhs, remainder);
     return is_neg ? -remainder : remainder;
 
     #endif
 }
 
-#ifdef BOOST_INT128_HAS_INT128
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator%(const int128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
     return static_cast<detail::builtin_i128>(lhs) % rhs;
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR int128_t operator%(const detail::builtin_i128 lhs, const int128_t rhs) noexcept
 {
     return lhs % static_cast<detail::builtin_i128>(rhs);
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return static_cast<int128_t>(static_cast<detail::builtin_u128>(lhs) % rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return static_cast<int128_t>(lhs % static_cast<detail::builtin_u128>(rhs));
-}
-
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr int128_t operator%(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 #elif defined(BOOST_INT128_HAS_MSVC_INT128)
 
@@ -3345,46 +2533,13 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator%(const det
     return static_cast<int128_t>(lhs) % rhs;
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator%(const int128_t lhs, const detail::builtin_u128 rhs) noexcept
-{
-    return lhs % static_cast<int128_t>(rhs);
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE inline int128_t operator%(const detail::builtin_u128 lhs, const int128_t rhs) noexcept
-{
-    return static_cast<int128_t>(lhs) % rhs;
-}
-
-#else // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE inline int128_t operator%(const int128_t, const T) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_u128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE inline int128_t operator%(const T, const int128_t) noexcept
-{
-    static_assert(detail::is_signed_integer_v<T>, "Sign Compare Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 #endif // BOOST_INT128_HAS_INT128
 
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator%=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_signed_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
-    *this = *this % rhs;
+    *this = static_cast<int128_t>(*this % rhs);
     return *this;
 }
 
@@ -3399,7 +2554,7 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t& int128_t::operator%=(const int128_t
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline int128_t& int128_t::operator%=(const Integer rhs) noexcept
 {
-    *this = *this % rhs;
+    *this = static_cast<int128_t>(*this % rhs);
     return *this;
 }
 
diff --git a/include/boost/int128/detail/mini_from_chars.hpp b/include/boost/int128/detail/mini_from_chars.hpp
index a399e1d8..92c2f27c 100644
--- a/include/boost/int128/detail/mini_from_chars.hpp
+++ b/include/boost/int128/detail/mini_from_chars.hpp
@@ -127,7 +127,6 @@ BOOST_INT128_HOST_DEVICE constexpr int from_chars_integer_impl(const char* first
 
 
     overflow_value /= unsigned_base;
-    overflow_value <<= 1;
     max_digit %= unsigned_base;
 
     // If the only character was a sign abort now
@@ -138,48 +137,52 @@ BOOST_INT128_HOST_DEVICE constexpr int from_chars_integer_impl(const char* first
 
     bool overflowed = false;
 
-    std::ptrdiff_t nc = last - next;
-    constexpr std::ptrdiff_t nd = std::numeric_limits<Integer>::digits10;
+    const std::ptrdiff_t nc = last - next;
 
+    // For bases 2..10 the first digits10 characters always fit in the unsigned
+    // For bases above 10, the safe window is shorter, so we must check with each iteration
+    const std::ptrdiff_t nd {
+        base <= 10
+            ? static_cast<std::ptrdiff_t>(std::numeric_limits<Integer>::digits10)
+            : std::ptrdiff_t{0}
+    };
+
+    const std::ptrdiff_t fast_limit {nd < nc ? nd : nc};
+    std::ptrdiff_t i = 0;
+
+    for (; i < fast_limit; ++i)
     {
-        std::ptrdiff_t i = 0;
+        const auto current_digit = static_cast<Unsigned_Integer>(digit_from_char(*next));
 
-        for( ; i < nd && i < nc; ++i )
+        if (current_digit >= unsigned_base)
         {
-            // overflow is not possible in the first nd characters
+            break;
+        }
 
-            const auto current_digit = static_cast<Unsigned_Integer>(digit_from_char(*next));
+        result = static_cast<Unsigned_Integer>(result * unsigned_base + current_digit);
+        ++next;
+    }
 
-            if (current_digit >= unsigned_base)
-            {
-                break;
-            }
+    for (; i < nc; ++i)
+    {
+        const auto current_digit = static_cast<Unsigned_Integer>(digit_from_char(*next));
 
-            result = static_cast<Unsigned_Integer>(result * unsigned_base + current_digit);
-            ++next;
+        if (current_digit >= unsigned_base)
+        {
+            break;
         }
 
-        for( ; i < nc; ++i )
+        if (result < overflow_value || (result == overflow_value && current_digit <= max_digit))
         {
-            const auto current_digit = static_cast<Unsigned_Integer>(digit_from_char(*next));
-
-            if (current_digit >= unsigned_base)
-            {
-                break;
-            }
-
-            if (result < overflow_value || (result == overflow_value && current_digit <= max_digit))
-            {
-                result = static_cast<Unsigned_Integer>(result * unsigned_base + current_digit);
-            }
-            else
-            {
-                overflowed = true;
-                break;
-            }
-
-            ++next;
+            result = static_cast<Unsigned_Integer>(result * unsigned_base + current_digit);
         }
+        else
+        {
+            overflowed = true;
+            break;
+        }
+
+        ++next;
     }
 
     // Return the parsed value, adding the sign back if applicable
diff --git a/include/boost/int128/detail/traits.hpp b/include/boost/int128/detail/traits.hpp
index e6b6efd1..576b6ae0 100644
--- a/include/boost/int128/detail/traits.hpp
+++ b/include/boost/int128/detail/traits.hpp
@@ -71,10 +71,12 @@ using evaluation_type_t = std::conditional_t<sizeof(T) <= sizeof(std::uint32_t),
 #define BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT typename SignedInteger, std::enable_if_t<detail::is_signed_integer_v<SignedInteger>, bool> = true
 #define BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT typename UnsignedInteger, std::enable_if_t<detail::is_unsigned_integer_v<UnsignedInteger>, bool> = true
 #define BOOST_INT128_DEFAULTED_INTEGER_CONCEPT typename Integer, std::enable_if_t<detail::is_any_integer_v<Integer>, bool> = true
+#define BOOST_INT128_DEFAULTED_FLOATING_POINT_CONCEPT typename Float, std::enable_if_t<std::is_floating_point<Float>::value, bool> = true
 
 #define BOOST_INT128_SIGNED_INTEGER_CONCEPT typename SignedInteger, std::enable_if_t<detail::is_signed_integer_v<SignedInteger>, bool>
 #define BOOST_INT128_UNSIGNED_INTEGER_CONCEPT typename UnsignedInteger, std::enable_if_t<detail::is_unsigned_integer_v<UnsignedInteger>, bool>
 #define BOOST_INT128_INTEGER_CONCEPT typename Integer, std::enable_if_t<detail::is_any_integer_v<Integer>, bool>
+#define BOOST_INT128_FLOATING_POINT_CONCEPT typename Float, std::enable_if_t<std::is_floating_point<Float>::value, bool>
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
diff --git a/include/boost/int128/detail/uint128_imp.hpp b/include/boost/int128/detail/uint128_imp.hpp
index 3fd7c9b7..e1b8d73d 100644
--- a/include/boost/int128/detail/uint128_imp.hpp
+++ b/include/boost/int128/detail/uint128_imp.hpp
@@ -57,8 +57,7 @@ uint128_t
     constexpr uint128_t& operator=(uint128_t&&) noexcept = default;
 
     // Requires a conversion file to be implemented
-    BOOST_INT128_HOST_DEVICE explicit constexpr uint128_t(const int128_t& v) noexcept;
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator int128_t() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr uint128_t(const int128_t& v) noexcept;
 
     // Construct from integral types
     #if BOOST_INT128_ENDIAN_LITTLE_BYTE
@@ -93,13 +92,13 @@ uint128_t
     template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
     BOOST_INT128_HOST_DEVICE constexpr uint128_t(const UnsignedInteger v) noexcept : high {}, low {static_cast<std::uint64_t>(v)} {}
 
-    #ifdef BOOST_INT128_HAS_INT128
+    #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-    BOOST_INT128_HOST_DEVICE constexpr uint128_t(const detail::builtin_i128 v) noexcept :
+    BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t(const detail::builtin_i128 v) noexcept :
         high {static_cast<std::uint64_t>(static_cast<detail::builtin_u128>(v) >> 64U)},
         low {static_cast<std::uint64_t>(v)} {}
 
-    BOOST_INT128_HOST_DEVICE constexpr uint128_t(const detail::builtin_u128 v) noexcept :
+    BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t(const detail::builtin_u128 v) noexcept :
         high {static_cast<std::uint64_t>(v >> 64U)},
         low {static_cast<std::uint64_t>(v)} {}
 
@@ -107,32 +106,36 @@ uint128_t
 
     #endif // BOOST_INT128_ENDIAN_LITTLE_BYTE
 
+    // Construct from floating-point types
+    template <BOOST_INT128_DEFAULTED_FLOATING_POINT_CONCEPT>
+    BOOST_INT128_HOST_DEVICE constexpr uint128_t(Float f) noexcept;
+
     // Integer conversion operators
     BOOST_INT128_HOST_DEVICE explicit constexpr operator bool() const noexcept {return low || high; }
 
     template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator SignedInteger() const noexcept { return static_cast<SignedInteger>(low); }
+    BOOST_INT128_HOST_DEVICE constexpr operator SignedInteger() const noexcept { return static_cast<SignedInteger>(low); }
 
     template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator UnsignedInteger() const noexcept { return static_cast<UnsignedInteger>(low); }
+    BOOST_INT128_HOST_DEVICE constexpr operator UnsignedInteger() const noexcept { return static_cast<UnsignedInteger>(low); }
 
     #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-    BOOST_INT128_HOST_DEVICE explicit BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_i128() const noexcept { return static_cast<detail::builtin_i128>(static_cast<detail::builtin_u128>(high) << static_cast<detail::builtin_u128>(64)) | static_cast<detail::builtin_i128>(low); }
+    BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_i128() const noexcept { return static_cast<detail::builtin_i128>(static_cast<detail::builtin_u128>(high) << static_cast<detail::builtin_u128>(64)) | static_cast<detail::builtin_i128>(low); }
 
-    BOOST_INT128_HOST_DEVICE explicit BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_u128() const noexcept { return (static_cast<detail::builtin_u128>(high) << static_cast<detail::builtin_u128>(64)) | static_cast<detail::builtin_u128>(low); }
+    BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR operator detail::builtin_u128() const noexcept { return (static_cast<detail::builtin_u128>(high) << static_cast<detail::builtin_u128>(64)) | static_cast<detail::builtin_u128>(low); }
 
     #endif // BOOST_INT128_HAS_INT128
 
     // Conversion to float
     // This is basically the same as ldexp(static_cast<T>(high), 64) + static_cast<T>(low),
     // but can be constexpr at C++11 instead of C++26
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator float() const noexcept;
-    BOOST_INT128_HOST_DEVICE explicit constexpr operator double() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator float() const noexcept;
+    BOOST_INT128_HOST_DEVICE constexpr operator double() const noexcept;
 
     // long doubles do not exist on device
     #if !(defined(__CUDACC__) && defined(BOOST_INT128_ENABLE_CUDA))
-    explicit constexpr operator long double() const noexcept;
+    constexpr operator long double() const noexcept;
     #endif
 
     // Compound OR
@@ -308,6 +311,41 @@ constexpr uint128_t::operator long double() const noexcept
 
 #endif // __NVCC__
 
+//=====================================
+// Float Construction
+//=====================================
+
+// Inverse of operator(Float): decompose f into (high, low) by dividing by 2^64.
+// NaN/negative -> 0
+// overflow -> UINT128_MAX.
+template <BOOST_INT128_FLOATING_POINT_CONCEPT>
+BOOST_INT128_HOST_DEVICE constexpr uint128_t::uint128_t(Float f) noexcept
+{
+    constexpr Float two_32 {static_cast<Float>(UINT64_C(1) << 32)};
+    constexpr Float two_64 {two_32 * two_32};
+
+    // !(f >= 0) catches both NaN and negative values without using <cmath>
+    if (!(f >= Float{0}))
+    {
+        return;
+    }
+
+    // Overflow test: f >= 2^128 iff f / 2^64 >= 2^64. Comparing scaled values
+    // avoids materializing 2^128 as a Float, which overflows to +infinity for
+    // `float` and is therefore not constant-evaluable on older compilers.
+    const Float scaled {f / two_64};
+    if (scaled >= two_64)
+    {
+        high = UINT64_MAX;
+        low = UINT64_MAX;
+        return;
+    }
+
+    high = static_cast<std::uint64_t>(scaled);
+    const Float remainder {f - static_cast<Float>(high) * two_64};
+    low = static_cast<std::uint64_t>(remainder);
+}
+
 //=====================================
 // Unary Operators
 //=====================================
@@ -347,35 +385,15 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator==(const boo
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator==(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return rhs >= 0 && lhs.high == UINT64_C(0) && lhs.low == static_cast<std::uint64_t>(rhs);
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    const uint128_t rhs_u {rhs};
+    return lhs.high == rhs_u.high && lhs.low == rhs_u.low;
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator==(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return lhs >= 0 && rhs.high == UINT64_C(0) && rhs.low == static_cast<std::uint64_t>(lhs);
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    const uint128_t lhs_u {lhs};
+    return lhs_u.high == rhs.high && lhs_u.low == rhs.low;
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
@@ -424,8 +442,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator==(const uin
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
     return lhs == static_cast<uint128_t>(rhs);
@@ -436,24 +452,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<uint128_t>(lhs) == rhs;
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator==(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
 {
     return lhs == static_cast<uint128_t>(rhs);
@@ -483,35 +481,15 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const boo
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return rhs < 0 || lhs.high != UINT64_C(0) || lhs.low != static_cast<std::uint64_t>(rhs);
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    const uint128_t rhs_u {rhs};
+    return lhs.high != rhs_u.high || lhs.low != rhs_u.low;
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return lhs < 0 || rhs.high != UINT64_C(0) || rhs.low != static_cast<std::uint64_t>(lhs);
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    const uint128_t lhs_u {lhs};
+    return lhs_u.high != rhs.high || lhs_u.low != rhs.low;
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
@@ -560,7 +538,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator!=(const uin
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_BUILTIN_CONSTEXPR)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
@@ -572,23 +549,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<uint128_t>(lhs) != rhs;
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator!=(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
 {
@@ -609,35 +569,15 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator<(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return rhs > 0 && lhs.high == UINT64_C(0) && lhs.low < static_cast<std::uint64_t>(rhs);
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    const uint128_t rhs_u {rhs};
+    return lhs.high == rhs_u.high ? lhs.low < rhs_u.low : lhs.high < rhs_u.high;
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator<(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return lhs < 0 || rhs.high > UINT64_C(0) || static_cast<std::uint64_t>(lhs) < rhs.low;
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    const uint128_t lhs_u {lhs};
+    return lhs_u.high == rhs.high ? lhs_u.low < rhs.low : lhs_u.high < rhs.high;
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
@@ -716,7 +656,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator<(const uint
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
@@ -728,23 +667,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<uint128_t>(lhs) < rhs;
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
 {
@@ -765,35 +687,15 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return rhs >= 0 && lhs.high == UINT64_C(0) && lhs.low <= static_cast<std::uint64_t>(rhs);
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    const uint128_t rhs_u {rhs};
+    return lhs.high == rhs_u.high ? lhs.low <= rhs_u.low : lhs.high < rhs_u.high;
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return lhs < 0 || rhs.high > UINT64_C(0) || static_cast<std::uint64_t>(lhs) <= rhs.low;
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    const uint128_t lhs_u {lhs};
+    return lhs_u.high == rhs.high ? lhs_u.low <= rhs.low : lhs_u.high < rhs.high;
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
@@ -871,7 +773,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator<=(const uin
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
@@ -893,23 +794,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<uint128_t>(lhs) <= rhs;
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 #endif // BOOST_INT128_HAS_INT128
 
@@ -920,35 +804,15 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator<=(const T,
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator>(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return rhs < 0 || lhs.high > UINT64_C(0) || lhs.low > static_cast<std::uint64_t>(rhs);
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    const uint128_t rhs_u {rhs};
+    return lhs.high == rhs_u.high ? lhs.low > rhs_u.low : lhs.high > rhs_u.high;
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator>(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return lhs > 0 && rhs.high == UINT64_C(0) && static_cast<std::uint64_t>(lhs) > rhs.low;
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    const uint128_t lhs_u {lhs};
+    return lhs_u.high == rhs.high ? lhs_u.low > rhs.low : lhs_u.high > rhs.high;
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
@@ -1026,7 +890,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr bool operator>(const uint
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
@@ -1048,23 +911,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<uint128_t>(lhs) > rhs;
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 #endif // BOOST_INT128_HAS_INT128
 
@@ -1075,35 +921,15 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>(const T,
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return rhs < 0 || lhs.high > UINT64_C(0) || lhs.low >= static_cast<std::uint64_t>(rhs);
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    const uint128_t rhs_u {rhs};
+    return lhs.high == rhs_u.high ? lhs.low >= rhs_u.low : lhs.high > rhs_u.high;
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr bool operator>=(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
-    return lhs >= 0 && rhs.high == UINT64_C(0) && static_cast<std::uint64_t>(lhs) >= rhs.low;
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
+    const uint128_t lhs_u {lhs};
+    return lhs_u.high == rhs.high ? lhs_u.low >= rhs.low : lhs_u.high > rhs.high;
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
@@ -1191,7 +1017,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<uint128_t>(lhs) >= rhs;
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
 {
@@ -1203,23 +1028,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool
     return static_cast<uint128_t>(lhs) >= rhs;
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR bool operator>=(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Compare Error");
-    return true;
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 #endif // BOOST_INT128_HAS_INT128
 
@@ -1282,8 +1090,6 @@ BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const Unsign
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
     if (lhs < rhs)
     {
         return std::strong_ordering::less;
@@ -1296,22 +1102,11 @@ BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const Signed
     {
         return std::strong_ordering::greater;
     }
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return std::strong_ordering::less;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_COMPARE
-
     if (lhs < rhs)
     {
         return std::strong_ordering::less;
@@ -1324,15 +1119,6 @@ BOOST_INT128_HOST_DEVICE constexpr std::strong_ordering operator<=>(const uint12
     {
         return std::strong_ordering::greater;
     }
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Compare Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return std::strong_ordering::less;
-
-    #endif
 }
 
 #endif
@@ -1353,35 +1139,13 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator~(const
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return {lhs.high | (rhs < 0 ? ~UINT64_C(0) : UINT64_C(0)), lhs.low | static_cast<std::uint64_t>(rhs)};
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return {rhs.high | (lhs < 0 ? ~UINT64_C(0) : UINT64_C(0)), rhs.low | static_cast<std::uint64_t>(lhs)};
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
@@ -1401,44 +1165,26 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const
     return {lhs.high | rhs.high, lhs.low | rhs.low};
 }
 
-#ifdef BOOST_INT128_HAS_INT128
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator|(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
     return lhs | static_cast<uint128_t>(rhs);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator|(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept
 {
     return static_cast<uint128_t>(lhs) | rhs;
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
 
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator|(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
 {
     return lhs | static_cast<uint128_t>(rhs);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator|(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept
 {
     return static_cast<uint128_t>(lhs) | rhs;
 }
@@ -1448,10 +1194,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator|(const
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator|=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_unsigned_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
     *this = *this | rhs;
     return *this;
 }
@@ -1466,10 +1208,6 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator|=(const uint12
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator|=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(!std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
     *this = *this | rhs;
     return *this;
 }
@@ -1483,47 +1221,25 @@ BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator|=(const Integer r
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return {lhs.high & (rhs < 0 ? ~UINT64_C(0) : UINT64_C(0)), lhs.low & static_cast<std::uint64_t>(rhs)};
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return {rhs.high & (lhs < 0 ? ~UINT64_C(0) : UINT64_C(0)), rhs.low & static_cast<std::uint64_t>(lhs)};
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t lhs, const UnsignedInteger rhs) noexcept
 {
-    return {lhs.high, lhs.low & static_cast<std::uint64_t>(rhs)};
+    return {UINT64_C(0), lhs.low & static_cast<std::uint64_t>(rhs)};
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const UnsignedInteger lhs, const uint128_t rhs) noexcept
 {
-    return {rhs.high, rhs.low & static_cast<std::uint64_t>(lhs)};
+    return {UINT64_C(0), rhs.low & static_cast<std::uint64_t>(lhs)};
 }
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t lhs, const uint128_t rhs) noexcept
@@ -1531,44 +1247,26 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const
     return {lhs.high & rhs.high, lhs.low & rhs.low};
 }
 
-#ifdef BOOST_INT128_HAS_INT128
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator&(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
     return lhs & static_cast<uint128_t>(rhs);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator&(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept
 {
     return static_cast<uint128_t>(lhs) & rhs;
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator&(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
 {
     return lhs & static_cast<uint128_t>(rhs);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator&(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept
 {
     return static_cast<uint128_t>(lhs) & rhs;
 }
@@ -1578,10 +1276,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator&(const
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator&=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_unsigned_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
     *this = *this & rhs;
     return *this;
 }
@@ -1597,10 +1291,6 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator&=(const uint12
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator&=(Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(!std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
     *this = *this & rhs;
     return *this;
 }
@@ -1615,35 +1305,13 @@ BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator&=(Integer rhs) no
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return {lhs.high ^ (rhs < 0 ? ~UINT64_C(0) : UINT64_C(0)), lhs.low ^ static_cast<std::uint64_t>(rhs)};
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return {rhs.high ^ (lhs < 0 ? ~UINT64_C(0) : UINT64_C(0)), rhs.low ^ static_cast<std::uint64_t>(lhs)};
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
@@ -1663,44 +1331,26 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const
     return {lhs.high ^ rhs.high, lhs.low ^ rhs.low};
 }
 
-#ifdef BOOST_INT128_HAS_INT128
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator^(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
     return lhs ^ static_cast<uint128_t>(rhs);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator^(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept
 {
     return static_cast<uint128_t>(lhs) ^ rhs;
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
 
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator^(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
 {
     return lhs ^ static_cast<uint128_t>(rhs);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator^(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept
 {
     return static_cast<uint128_t>(lhs) ^ rhs;
 }
@@ -1710,10 +1360,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator^(const
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator^=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_unsigned_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
     *this = *this ^ rhs;
     return *this;
 }
@@ -1729,10 +1375,6 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator^=(const uint12
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator^=(Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(!std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
     *this = *this ^ rhs;
     return *this;
 }
@@ -1891,9 +1533,9 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator<<(cons
     return lhs << rhs.low;
 }
 
-#ifdef BOOST_INT128_HAS_INT128
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 operator<<(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR detail::builtin_u128 operator<<(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept
 {
     constexpr auto bit_width {sizeof(detail::builtin_u128 ) * 8};
 
@@ -1901,11 +1543,10 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 oper
     {
         return 0;
     }
-
-    return lhs << rhs.low;
+    return lhs << static_cast<detail::builtin_u128>(rhs.low);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 operator<<(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR detail::builtin_i128 operator<<(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept
 {
     constexpr auto bit_width {sizeof(detail::builtin_u128) * 8};
 
@@ -1914,7 +1555,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 oper
         return 0;
     }
 
-    return lhs << rhs.low;
+    return lhs << static_cast<detail::builtin_u128>(rhs.low);
 }
 
 #endif
@@ -2114,9 +1755,9 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator>>(cons
     return lhs >> rhs.low;
 }
 
-#ifdef BOOST_INT128_HAS_INT128
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 operator>>(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR detail::builtin_u128 operator>>(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept
 {
     constexpr auto bit_width = sizeof(detail::builtin_u128) * 8;
 
@@ -2125,10 +1766,10 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_u128 oper
         return 0;
     }
 
-    return lhs >> rhs.low;
+    return lhs >> static_cast<detail::builtin_u128>(rhs.low);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 operator>>(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR detail::builtin_i128 operator>>(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept
 {
     constexpr auto bit_width = sizeof(detail::builtin_i128) * 8;
 
@@ -2137,7 +1778,7 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr detail::builtin_i128 oper
         return 0;
     }
 
-    return lhs >> rhs.low;
+    return lhs >> static_cast<detail::builtin_u128>(rhs.low);
 }
 
 #endif
@@ -2348,37 +1989,15 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr uint128_t default_s
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return rhs < 0 ? impl::default_sub(lhs, -static_cast<std::uint64_t>(rhs)) :
                      impl::default_add(lhs, static_cast<std::uint64_t>(rhs));
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return lhs < 0 ? impl::default_sub(rhs, -static_cast<std::uint64_t>(lhs)) :
                      impl::default_add(rhs, static_cast<std::uint64_t>(lhs));
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 #ifdef _MSC_VER
@@ -2404,7 +2023,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator+(const
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
@@ -2416,23 +2034,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint
     return impl::default_add(static_cast<uint128_t>(lhs), rhs);
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator+(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
 {
@@ -2449,10 +2050,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator+=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_unsigned_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
     *this = *this + rhs;
     return *this;
 }
@@ -2468,10 +2065,6 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator+=(const uint12
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator+=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(!std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
     *this = *this + rhs;
     return *this;
 }
@@ -2491,37 +2084,15 @@ BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator+=(const Integer r
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return rhs < 0 ? impl::default_add(lhs, -static_cast<std::uint64_t>(rhs)) :
                      impl::default_sub(lhs, static_cast<std::uint64_t>(rhs));
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     return lhs < 0 ? impl::default_sub(-rhs, -static_cast<std::uint64_t>(lhs)) :
                      impl::default_add(-rhs, static_cast<std::uint64_t>(lhs));
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 #ifdef _MSC_VER
@@ -2547,7 +2118,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator-(const
 
 #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
@@ -2559,23 +2129,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint
     return static_cast<uint128_t>(lhs) - rhs;
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator-(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
 {
@@ -2592,10 +2145,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator-=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_unsigned_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
     *this = *this - rhs;
     return *this;
 }
@@ -2611,10 +2160,6 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator-=(const uint12
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator-=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(!std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
     *this = *this - rhs;
     return *this;
 }
@@ -2726,11 +2271,9 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr uint128_t default_m
 
     #  endif
 
-    #elif (defined(__s390x__) || defined(__s390x__)) && defined(__GNUC__)
-    #  define BOOST_INT128_HIDE_MUL
-
-        return static_cast<uint128_t>(static_cast<builtin_u128>(lhs) * static_cast<builtin_u128>(rhs));
-
+    // s390x intentionally falls through to the synthetic low_word_mul below. Casting to builtin_u128
+    // makes GCC reconstruct the value through a vector-unit stack round-trip that is several times
+    // slower, and the memcpy path is unsafe for the narrow (scalar rhs) overloads on big-endian.
     #elif ((defined(_M_AMD64) && !defined(__GNUC__)) || defined(_M_ARM64)) && !defined(BOOST_INT128_NO_CONSTEVAL_DETECTION)
 
     if (!BOOST_INT128_IS_CONSTANT_EVALUATED(lhs))
@@ -2743,18 +2286,7 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr uint128_t default_m
     // We need to hide this if we use a non-const eval method above to avoid a litany of cross-platform warnings
     #ifndef BOOST_INT128_HIDE_MUL
 
-    constexpr std::size_t rhs_words_needed {std::is_same<UnsignedInteger, std::uint32_t>::value ? 1 :
-                                            std::is_same<UnsignedInteger, std::uint64_t>::value ? 2 :
-                                            std::is_same<UnsignedInteger, uint128_t>::value ? 4 : 0};
-
-    static_assert(rhs_words_needed != 0, "Must be 32, 64 or 128 bit unsigned integer");
-
-    std::uint32_t lhs_words[4] {};
-    std::uint32_t rhs_words[rhs_words_needed] {};
-    to_words(lhs, lhs_words);
-    to_words(rhs, rhs_words);
-
-    return knuth_multiply<uint128_t>(lhs_words, rhs_words);
+    return low_word_mul<uint128_t>(lhs, rhs);
 
     #else
     #undef BOOST_INT128_HIDE_MUL
@@ -2775,45 +2307,23 @@ BOOST_INT128_HOST_DEVICE BOOST_INT128_FORCE_INLINE constexpr uint128_t default_m
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     using eval_type = detail::evaluation_type_t<SignedInteger>;
 
     const auto abs_rhs {rhs < 0 ? -static_cast<eval_type>(rhs) : static_cast<eval_type>(rhs)};
     const auto res {detail::default_mul(lhs, abs_rhs)};
 
     return rhs < 0 ? -res : res;
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     using eval_type = detail::evaluation_type_t<SignedInteger>;
 
     const auto abs_lhs {lhs < 0 ? -static_cast<eval_type>(lhs) : static_cast<eval_type>(lhs)};
     const auto res {detail::default_mul(rhs, abs_lhs)};
 
     return lhs < 0 ? -res : res;
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 BOOST_INT128_EXPORT template <BOOST_INT128_DEFAULTED_UNSIGNED_INTEGER_CONCEPT>
@@ -2837,50 +2347,38 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const
     return detail::default_mul(lhs, rhs);
 }
 
-#ifdef BOOST_INT128_HAS_INT128
+#if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INT128)
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator*(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
-    const auto abs_rhs {rhs < 0 ? -static_cast<uint128_t>(rhs) : static_cast<uint128_t>(rhs)};
-    const auto res {lhs * abs_rhs};
+    const detail::builtin_u128 rhs_bits {static_cast<detail::builtin_u128>(rhs)};
+    const bool rhs_negative {static_cast<std::int64_t>(static_cast<std::uint64_t>(rhs_bits >> static_cast<detail::builtin_u128>(64U))) < 0};
+    const uint128_t rhs_u {rhs_bits};
+    const uint128_t abs_rhs {rhs_negative ? -rhs_u : rhs_u};
+    const uint128_t res {lhs * abs_rhs};
 
-    return rhs < 0 ? -res : res;
+    return rhs_negative ? -res : res;
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator*(const detail::builtin_i128 lhs, const uint128_t rhs) noexcept
 {
-    const auto abs_lhs {lhs < 0 ? -static_cast<uint128_t>(lhs) : static_cast<uint128_t>(lhs)};
-    const auto res {abs_lhs * rhs};
+    const detail::builtin_u128 lhs_bits {static_cast<detail::builtin_u128>(lhs)};
+    const bool lhs_negative {static_cast<std::int64_t>(static_cast<std::uint64_t>(lhs_bits >> static_cast<detail::builtin_u128>(64U))) < 0};
+    const uint128_t lhs_u {lhs_bits};
+    const uint128_t abs_lhs {lhs_negative ? -lhs_u : lhs_u};
+    const uint128_t res {abs_lhs * rhs};
 
-    return lhs < 0 ? -res : res;
+    return lhs_negative ? -res : res;
 }
 
-#else
 
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator*(const uint128_t lhs, const detail::builtin_u128 rhs) noexcept
 {
     return lhs * static_cast<uint128_t>(rhs);
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator*(const detail::builtin_u128 lhs, const uint128_t rhs) noexcept
 {
     return static_cast<uint128_t>(lhs) * rhs;
 }
@@ -2890,10 +2388,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator*(const
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator*=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_unsigned_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
     *this = *this * rhs;
     return *this;
 }
@@ -2909,10 +2403,6 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator*=(const uint12
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator*=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(!std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
     *this = *this * rhs;
     return *this;
 }
@@ -2941,37 +2431,15 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(uint1
 template <BOOST_INT128_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     using eval_type = detail::evaluation_type_t<SignedInteger>;
     return rhs < 0 ? lhs / static_cast<uint128_t>(rhs) : lhs / static_cast<eval_type>(rhs);
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 template <BOOST_INT128_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     using eval_type = detail::evaluation_type_t<SignedInteger>;
     return lhs < 0 ? static_cast<uint128_t>(lhs) / rhs : static_cast<eval_type>(lhs) / rhs;
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 template <BOOST_INT128_UNSIGNED_INTEGER_CONCEPT>
@@ -3025,31 +2493,30 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t operator/(const uint128_t lhs, cons
     {
         return {0, 0};
     }
-    #if defined(BOOST_INT128_HAS_INT128) && !defined(__s390__) && !defined(__s390x__)
-    else
-    {
-        return static_cast<uint128_t>(static_cast<detail::builtin_u128>(lhs) / static_cast<detail::builtin_u128>(rhs));
-    }
-    #else
-    else if (rhs.high != 0U)
-    {
-        return detail::knuth_div(lhs, rhs);
-    }
-    else
+
+    // A divisor that fits in 64 bits is handled by the hardware-accelerated narrow path. This
+    // beats the native 128/128 divide for this common case on every platform (it avoids the
+    // out-of-line __udivti3 call on GCC/Clang and uses divq / _udiv128 directly where present).
+    if (rhs.high == 0U)
     {
         if (lhs.high == 0U)
         {
             return {0, lhs.low / rhs.low};
         }
-        else
-        {
-            uint128_t quotient {};
 
-            detail::one_word_div(lhs, rhs.low, quotient);
-
-            return quotient;
-        }
+        uint128_t quotient {};
+        detail::one_word_div(lhs, rhs.low, quotient);
+        return quotient;
     }
+
+    #if defined(BOOST_INT128_HAS_INT128) && !defined(__s390__) && !defined(__s390x__)
+
+    return static_cast<uint128_t>(static_cast<detail::builtin_u128>(lhs) / static_cast<detail::builtin_u128>(rhs));
+
+    #else
+
+    return detail::knuth_div(lhs, rhs);
+
     #endif
 }
 
@@ -3065,7 +2532,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint
     return static_cast<uint128_t>(lhs) / rhs;
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
@@ -3077,33 +2543,12 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint
     return static_cast<uint128_t>(lhs) / rhs;
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator/(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 #endif // BOOST_INT128_HAS_INT128
 
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator/=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_unsigned_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
     *this = *this / rhs;
     return *this;
 }
@@ -3119,10 +2564,6 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator/=(const uint12
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator/=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(!std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
     *this = *this / rhs;
     return *this;
 }
@@ -3151,37 +2592,15 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(uint1
 template <BOOST_INT128_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const uint128_t lhs, const SignedInteger rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     using eval_type = detail::evaluation_type_t<SignedInteger>;
     return rhs < 0 ? lhs % static_cast<uint128_t>(rhs) : lhs % static_cast<eval_type>(rhs);
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 template <BOOST_INT128_SIGNED_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const SignedInteger lhs, const uint128_t rhs) noexcept
 {
-    #ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
-
     using eval_type = detail::evaluation_type_t<SignedInteger>;
     return lhs < 0 ? static_cast<uint128_t>(lhs) % rhs : static_cast<eval_type>(lhs) % rhs;
-
-    #else
-
-    static_assert(detail::is_unsigned_integer_v<SignedInteger>, "Sign Conversion Error");
-    static_cast<void>(lhs);
-    static_cast<void>(rhs);
-    return true;
-
-    #endif
 }
 
 template <BOOST_INT128_UNSIGNED_INTEGER_CONCEPT>
@@ -3232,38 +2651,36 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t operator%(const uint128_t lhs, cons
     {
         return {0, 0};
     }
-    else if (rhs > lhs)
+    if (rhs > lhs)
     {
         return lhs;
     }
-    #if defined(BOOST_INT128_HAS_INT128) && !defined(__s390__) && !defined(__s390x__)
-    else
-    {
-        return static_cast<uint128_t>(static_cast<detail::builtin_u128>(lhs) % static_cast<detail::builtin_u128>(rhs));
-    }
-    #else
-    else if (rhs.high != 0U)
-    {
-        uint128_t remainder {};
-        detail::knuth_div(lhs, rhs, remainder);
-        return remainder;
-    }
-    else
+
+    // A divisor that fits in 64 bits is handled by the hardware-accelerated narrow path, which
+    // beats the native 128/128 divide for this common case on every platform.
+    if (rhs.high == 0U)
     {
         if (lhs.high == 0U)
         {
             return {0, lhs.low % rhs.low};
         }
-        else
-        {
-            uint128_t quotient {};
-            uint128_t remainder {};
-
-            detail::one_word_div(lhs, rhs.low, quotient, remainder);
 
-            return remainder;
-        }
+        uint128_t quotient {};
+        uint128_t remainder {};
+        detail::one_word_div(lhs, rhs.low, quotient, remainder);
+        return remainder;
     }
+
+    #if defined(BOOST_INT128_HAS_INT128) && !defined(__s390__) && !defined(__s390x__)
+
+    return static_cast<uint128_t>(static_cast<detail::builtin_u128>(lhs) % static_cast<detail::builtin_u128>(rhs));
+
+    #else
+
+    uint128_t remainder {};
+    detail::knuth_div(lhs, rhs, remainder);
+    return remainder;
+
     #endif
 }
 
@@ -3279,7 +2696,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint
     return static_cast<uint128_t>(lhs) % rhs;
 }
 
-#ifdef BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const uint128_t lhs, const detail::builtin_i128 rhs) noexcept
 {
@@ -3291,33 +2707,12 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint
     return static_cast<uint128_t>(lhs) % rhs;
 }
 
-#else
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const uint128_t, const T) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-BOOST_INT128_EXPORT template <typename T, std::enable_if_t<std::is_same<T, detail::builtin_i128>::value, bool> = true>
-BOOST_INT128_HOST_DEVICE BOOST_INT128_BUILTIN_CONSTEXPR uint128_t operator%(const T, const uint128_t) noexcept
-{
-    static_assert(detail::is_unsigned_integer_v<T>, "Sign Conversion Error");
-    return {0, 0};
-}
-
-#endif // BOOST_INT128_ALLOW_SIGN_CONVERSION
 
 #endif // BOOST_INT128_HAS_INT128
 
 template <BOOST_INT128_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator%=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(detail::is_unsigned_integer_v<Integer>, "Sign Conversion Error");
-    #endif
-
     *this = *this % rhs;
     return *this;
 }
@@ -3333,10 +2728,6 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t& uint128_t::operator%=(const uint12
 template <BOOST_INT128_128BIT_INTEGER_CONCEPT>
 BOOST_INT128_HOST_DEVICE inline uint128_t& uint128_t::operator%=(const Integer rhs) noexcept
 {
-    #ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-    static_assert(!std::numeric_limits<Integer>::is_signed, "Sign Conversion Error");
-    #endif
-
     * this = *this % rhs;
     return *this;
 }
diff --git a/include/boost/int128/hash.hpp b/include/boost/int128/hash.hpp
new file mode 100644
index 00000000..9286c0fd
--- /dev/null
+++ b/include/boost/int128/hash.hpp
@@ -0,0 +1,79 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#ifndef BOOST_INT128_HASH_HPP
+#define BOOST_INT128_HASH_HPP
+
+#include <boost/int128/int128.hpp>
+
+#ifndef BOOST_INT128_BUILD_MODULE
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+
+#endif
+
+namespace boost {
+namespace int128 {
+namespace detail {
+
+// The cast is only useless for 64-bit platforms
+// Without we get an implicit conversion warning which is arguably worse
+#if defined(__GNUC__) && !defined(__clang__)
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wuseless-cast"
+#endif
+
+// splitmix64 finalizer: mixes all 64 input bits into the result before any narrowing to size_t.
+// This is required for correctness on platforms where size_t is 32 bits
+inline std::size_t hash_finalize_64(std::uint64_t v) noexcept
+{
+    v ^= v >> 30;
+    v *= UINT64_C(0xbf58476d1ce4e5b9);
+    v ^= v >> 27;
+    v *= UINT64_C(0x94d049bb133111eb);
+    v ^= v >> 31;
+    return static_cast<std::size_t>(v);
+}
+
+#if defined(__GNUC__) && !defined(__clang__)
+#  pragma GCC diagnostic pop
+#endif
+
+} // namespace detail
+} // namespace int128
+} // namespace boost
+
+namespace std {
+
+template <>
+struct hash<boost::int128::int128_t>
+{
+    auto operator()(const boost::int128::int128_t v) const noexcept -> std::size_t
+    {
+        const std::size_t low_hash {boost::int128::detail::hash_finalize_64(v.low)};
+        const std::size_t high_hash {boost::int128::detail::hash_finalize_64(static_cast<std::uint64_t>(v.high))};
+
+        // boost::hash_combine style mixing of the two finalized halves
+        return low_hash ^ (high_hash + static_cast<std::size_t>(0x9e3779b9) + (low_hash << 6) + (low_hash >> 2));
+    }
+};
+
+template <>
+struct hash<boost::int128::uint128_t>
+{
+    auto operator()(const boost::int128::uint128_t v) const noexcept -> std::size_t
+    {
+        const std::size_t low_hash {boost::int128::detail::hash_finalize_64(v.low)};
+        const std::size_t high_hash {boost::int128::detail::hash_finalize_64(v.high)};
+
+        // boost::hash_combine style mixing of the two finalized halves
+        return low_hash ^ (high_hash + static_cast<std::size_t>(0x9e3779b9) + (low_hash << 6) + (low_hash >> 2));
+    }
+};
+
+} // namespace std
+
+#endif // BOOST_INT128_HASH_HPP
diff --git a/include/boost/int128/literals.hpp b/include/boost/int128/literals.hpp
index 9497f20b..ecb85bf8 100644
--- a/include/boost/int128/literals.hpp
+++ b/include/boost/int128/literals.hpp
@@ -43,16 +43,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_U12
     return result;
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_u128(unsigned long long v) noexcept
-{
-    return uint128_t{v};
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t operator ""_U128(unsigned long long v) noexcept
-{
-    return uint128_t{v};
-}
-
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_i128(const char* str) noexcept
 {
     int128_t result {};
@@ -67,16 +57,6 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_I128(const char* str) no
     return result;
 }
 
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_i128(unsigned long long v) noexcept
-{
-    return int128_t{v};
-}
-
-BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_I128(unsigned long long v) noexcept
-{
-    return int128_t{v};
-}
-
 BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t operator ""_i128(const char* str, std::size_t len) noexcept
 {
     int128_t result {};
diff --git a/include/boost/int128/numeric.hpp b/include/boost/int128/numeric.hpp
index 0699cd8b..c38491f7 100644
--- a/include/boost/int128/numeric.hpp
+++ b/include/boost/int128/numeric.hpp
@@ -11,6 +11,7 @@
 #ifndef BOOST_INT128_BUILD_MODULE
 
 #include <limits>
+#include <utility>
 
 #endif
 
diff --git a/include/boost/int128/utilities.hpp b/include/boost/int128/utilities.hpp
new file mode 100644
index 00000000..ffb29b66
--- /dev/null
+++ b/include/boost/int128/utilities.hpp
@@ -0,0 +1,438 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#ifndef BOOST_INT128_UTILITIES_HPP
+#define BOOST_INT128_UTILITIES_HPP
+
+#include <boost/int128/int128.hpp>
+#include <boost/int128/bit.hpp>
+#include <boost/int128/detail/config.hpp>
+
+#ifndef BOOST_INT128_BUILD_MODULE
+
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+#endif
+
+namespace boost {
+namespace int128 {
+
+namespace detail {
+
+// Modular addition for 128-bit operands assuming 0 <= a, b < m
+BOOST_INT128_HOST_DEVICE constexpr uint128_t addmod(const uint128_t a, const uint128_t b, const uint128_t m) noexcept
+{
+    const uint128_t s {a + b};
+
+    if (s < a || s >= m)
+    {
+        return s - m;
+    }
+
+    return s;
+}
+
+// Modular multiplication via shift-and-add for the full 128-bit modulus case
+BOOST_INT128_HOST_DEVICE constexpr uint128_t mulmod_shift(uint128_t a, uint128_t b, const uint128_t m) noexcept
+{
+    uint128_t result {0};
+
+    while (b != 0U)
+    {
+        if (static_cast<bool>(b.low & 1U))
+        {
+            result = addmod(result, a, m);
+        }
+
+        a = addmod(a, a, m);
+        b >>= 1;
+    }
+
+    return result;
+}
+
+// Modular multiplication when the modulus fits in 64 bits
+BOOST_INT128_HOST_DEVICE constexpr std::uint64_t mulmod_word(const std::uint64_t a, const std::uint64_t b, const std::uint64_t m) noexcept
+{
+    return ((uint128_t{a} * uint128_t{b}) % uint128_t{m}).low;
+}
+
+} // namespace detail
+
+// Computes (base ^ exp) mod m using fast modular exponentiation with
+// optimizations specific to the boost::int128 library types
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t powm(uint128_t base, uint128_t exp, const uint128_t m) noexcept
+{
+    if (BOOST_INT128_UNLIKELY(m == 0U))
+    {
+        return uint128_t{0};
+    }
+
+    if (m == 1U)
+    {
+        return uint128_t{0};
+    }
+
+    if (exp == 0U)
+    {
+        return uint128_t{1};
+    }
+
+    base %= m;
+
+    if (base == 0U)
+    {
+        return uint128_t{0};
+    }
+
+    // Power-of-two modulus: reduction is just a bitmask.
+    if (has_single_bit(m))
+    {
+        const uint128_t mask {m - 1U};
+        uint128_t result {1};
+
+        while (exp != 0U)
+        {
+            if (static_cast<bool>(exp.low & 1U))
+            {
+                result = (result * base) & mask;
+            }
+
+            base = (base * base) & mask;
+            exp >>= 1;
+        }
+
+        return result;
+    }
+
+    // Modulus fits in 64 bits: stay in 64-bit lanes.
+    if (m.high == 0U)
+    {
+        const auto mm {m.low};
+        std::uint64_t result {1};
+        auto b {base.low};
+
+        while (exp != 0U)
+        {
+            if (static_cast<bool>(exp.low & 1U))
+            {
+                result = detail::mulmod_word(result, b, mm);
+            }
+
+            b = detail::mulmod_word(b, b, mm);
+            exp >>= 1;
+        }
+
+        return uint128_t{result};
+    }
+
+    // General 128-bit modulus: shift-and-add for each squaring keeps every
+    // intermediate strictly below m without needing a 256-bit product.
+    uint128_t result {1};
+
+    while (exp != 0U)
+    {
+        if (static_cast<bool>(exp.low & 1U))
+        {
+            result = detail::mulmod_shift(result, base, m);
+        }
+
+        base = detail::mulmod_shift(base, base, m);
+        exp >>= 1;
+    }
+
+    return result;
+}
+
+// Signed overload. Returns the non-negative residue in [0, m)
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t powm(const int128_t base, const int128_t exp, const int128_t m) noexcept
+{
+    if (BOOST_INT128_UNLIKELY(m <= 0 || exp < 0))
+    {
+        return int128_t{0};
+    }
+
+    const uint128_t um {static_cast<uint128_t>(m)};
+
+    uint128_t ub {};
+
+    if (base.high < 0)
+    {
+        const uint128_t magnitude {static_cast<uint128_t>(abs(base))};
+        const uint128_t r {magnitude % um};
+        ub = r == 0U ? uint128_t{0} : static_cast<uint128_t>(um - r);
+    }
+    else
+    {
+        ub = static_cast<uint128_t>(base) % um;
+    }
+
+    return static_cast<int128_t>(powm(ub, static_cast<uint128_t>(exp), um));
+}
+
+// Computes base^exp using exponentiation by squaring. The result is reduced
+// modulo 2^128, mirroring the wrap-around behavior of operator*.
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t ipow(uint128_t base, std::uint64_t exp) noexcept
+{
+    uint128_t result {1};
+
+    while (exp != 0U)
+    {
+        if (static_cast<bool>(exp & 1U))
+        {
+            result *= base;
+        }
+
+        exp >>= 1;
+
+        if (exp != 0U)
+        {
+            base *= base;
+        }
+    }
+
+    return result;
+}
+
+// Signed overload. Wraps modulo 2^128 on overflow, matching operator*.
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t ipow(int128_t base, std::uint64_t exp) noexcept
+{
+    int128_t result {1};
+
+    while (exp != 0U)
+    {
+        if (static_cast<bool>(exp & 1U))
+        {
+            result *= base;
+        }
+
+        exp >>= 1;
+
+        if (exp != 0U)
+        {
+            base *= base;
+        }
+    }
+
+    return result;
+}
+
+// Integer square root: returns floor(sqrt(n)).
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t isqrt(const uint128_t n) noexcept
+{
+    if (n < 2U)
+    {
+        return n;
+    }
+
+    // 2^ceil(bit_width(n)/2) is the smallest power of two whose square exceeds n.
+    uint128_t x {uint128_t{1} << ((bit_width(n) + 1) / 2)};
+
+    while (true)
+    {
+        const uint128_t y {(x + n / x) >> 1};
+
+        if (y >= x)
+        {
+            return x;
+        }
+
+        x = y;
+    }
+}
+
+// Signed overload. Negative inputs are documented to return 0.
+BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t isqrt(const int128_t n) noexcept
+{
+    if (BOOST_INT128_UNLIKELY(n < 0))
+    {
+        return int128_t{0};
+    }
+
+    return static_cast<int128_t>(isqrt(static_cast<uint128_t>(n)));
+}
+
+namespace detail {
+
+// The C23 checked integer macros accept any integer type for their operands
+// except bool, plain char, enumerated types, and bit-precise (_BitInt) types.
+template <typename T>
+struct valid_checked_type : std::integral_constant<bool, std::is_integral<T>::value &&
+                                                         !std::is_same<T, bool>::value &&
+                                                         !std::is_same<T, char>::value> {};
+
+template <>
+struct valid_checked_type<int128_t> : std::true_type {};
+
+template <>
+struct valid_checked_type<uint128_t> : std::true_type {};
+
+// Widen an integer operand to its 128-bit two's complement bit pattern, returned as a uint128_t
+template <typename T>
+BOOST_INT128_HOST_DEVICE constexpr uint128_t ckd_widen(const T value) noexcept
+{
+    BOOST_INT128_IF_CONSTEXPR (std::numeric_limits<T>::is_signed)
+    {
+        return static_cast<uint128_t>(static_cast<int128_t>(value));
+    }
+    else
+    {
+        return static_cast<uint128_t>(value);
+    }
+}
+
+// Sign and magnitude of an operand together with its 128-bit two's complement
+// image. magnitude is the absolute value; negative records the sign.
+struct ckd_operand
+{
+    uint128_t raw;
+    uint128_t magnitude;
+    bool negative;
+};
+
+template <typename T>
+BOOST_INT128_HOST_DEVICE constexpr ckd_operand ckd_decompose(const T value) noexcept
+{
+    const uint128_t raw {ckd_widen(value)};
+    const bool negative {std::numeric_limits<T>::is_signed && ((raw >> 127) != 0U)};
+    return ckd_operand{raw, negative ? uint128_t{0} - raw : raw, negative};
+}
+
+// Exact signed sum of two operands given as (magnitude, sign). carry marks a
+// 129th bit, which no 128-bit or narrower target can represent.
+struct ckd_sum_result
+{
+    uint128_t magnitude;
+    bool negative;
+    bool carry;
+};
+
+BOOST_INT128_HOST_DEVICE constexpr ckd_sum_result ckd_signed_sum(const uint128_t a_magnitude, const bool a_negative,
+                                                                 const uint128_t b_magnitude, const bool b_negative) noexcept
+{
+    if (a_negative == b_negative)
+    {
+        // Equal signs: magnitudes add and may overflow into a 129th bit.
+        const uint128_t magnitude {a_magnitude + b_magnitude};
+        return ckd_sum_result{magnitude, a_negative, magnitude < a_magnitude};
+    }
+
+    // Opposite signs: the smaller magnitude is subtracted and never carries.
+    if (a_magnitude >= b_magnitude)
+    {
+        return ckd_sum_result{a_magnitude - b_magnitude, a_negative, false};
+    }
+
+    return ckd_sum_result{b_magnitude - a_magnitude, b_negative, false};
+}
+
+// Whether a result of the given sign and magnitude fits in T1. exceeds_width
+// forces overflow when the true magnitude does not even fit in 128 bits.
+template <typename T1>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_overflows(const uint128_t magnitude, const bool negative, const bool exceeds_width) noexcept
+{
+    if (exceeds_width)
+    {
+        return true;
+    }
+
+    const uint128_t max_magnitude {static_cast<uint128_t>((std::numeric_limits<T1>::max)())};
+
+    if (negative)
+    {
+        const uint128_t min_magnitude {std::numeric_limits<T1>::is_signed ? max_magnitude + uint128_t{1} : uint128_t{0}};
+        return magnitude > min_magnitude;
+    }
+
+    return magnitude > max_magnitude;
+}
+
+} // namespace detail
+
+// Checked addition following the C23 <stdckdint.h> ckd_add contract.
+//
+// Computes a + b as if both operands were represented in a signed integer
+// type of infinite range and then converts that exact result to the type
+// pointed to by result. *result always receives the exact result wrapped
+// around to the width of *result. Returns false when *result represents the
+// exact mathematical sum, and true when the sum did not fit and wrap-around
+// occurred.
+BOOST_INT128_EXPORT template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_add(T1* result, const T2 a, const T3 b) noexcept
+{
+    static_assert(detail::valid_checked_type<T1>::value &&
+                  detail::valid_checked_type<T2>::value &&
+                  detail::valid_checked_type<T3>::value,
+                  "ckd_add operands must be integer types other than bool and plain char.");
+
+    const auto op_a {detail::ckd_decompose(a)};
+    const auto op_b {detail::ckd_decompose(b)};
+
+    // The modular sum of the widened images is the exact sum mod 2^128, which
+    // is all the wrapped result needs for any target no wider than 128 bits.
+    *result = static_cast<T1>(op_a.raw + op_b.raw);
+
+    const auto sum {detail::ckd_signed_sum(op_a.magnitude, op_a.negative, op_b.magnitude, op_b.negative)};
+    return detail::ckd_overflows<T1>(sum.magnitude, sum.negative, sum.carry);
+}
+
+// Checked subtraction following the C23 <stdckdint.h> ckd_sub contract.
+//
+// Behaves as ckd_add for a - b: *result receives the exact difference wrapped
+// to its width, and the return value reports whether that difference did not
+// fit.
+BOOST_INT128_EXPORT template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_sub(T1* result, const T2 a, const T3 b) noexcept
+{
+    static_assert(detail::valid_checked_type<T1>::value &&
+                  detail::valid_checked_type<T2>::value &&
+                  detail::valid_checked_type<T3>::value,
+                  "ckd_sub operands must be integer types other than bool and plain char.");
+
+    const auto op_a {detail::ckd_decompose(a)};
+    const auto op_b {detail::ckd_decompose(b)};
+
+    *result = static_cast<T1>(op_a.raw - op_b.raw);
+
+    // a - b is a + (-b): negating b flips its sign while keeping its magnitude.
+    const auto difference {detail::ckd_signed_sum(op_a.magnitude, op_a.negative, op_b.magnitude, !op_b.negative)};
+    return detail::ckd_overflows<T1>(difference.magnitude, difference.negative, difference.carry);
+}
+
+// Checked multiplication following the C23 <stdckdint.h> ckd_mul contract.
+//
+// Computes a * b as if both operands had infinite range, stores the result
+// wrapped to the width of *result, and returns true when the exact product did
+// not fit.
+BOOST_INT128_EXPORT template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_mul(T1* result, const T2 a, const T3 b) noexcept
+{
+    static_assert(detail::valid_checked_type<T1>::value &&
+                  detail::valid_checked_type<T2>::value &&
+                  detail::valid_checked_type<T3>::value,
+                  "ckd_mul operands must be integer types other than bool and plain char.");
+
+    const auto op_a {detail::ckd_decompose(a)};
+    const auto op_b {detail::ckd_decompose(b)};
+
+    *result = static_cast<T1>(op_a.raw * op_b.raw);
+
+    // The product magnitude needs more than 128 bits exactly when it exceeds
+    // UINT128_MAX. Dividing the maximum by one magnitude tests that without
+    // forming a 256-bit product.
+    const bool exceeds_width {op_a.magnitude != 0U &&
+                              op_b.magnitude > ((std::numeric_limits<uint128_t>::max)() / op_a.magnitude)};
+
+    const uint128_t product_magnitude {op_a.magnitude * op_b.magnitude};
+    const bool product_negative {op_a.negative != op_b.negative};
+
+    return detail::ckd_overflows<T1>(product_magnitude, product_negative, exceeds_width);
+}
+
+} // namespace int128
+} // namespace boost
+
+#endif // BOOST_INT128_UTILITIES_HPP
diff --git a/test/Jamfile b/test/Jamfile
index 4c18efb3..2be537df 100644
--- a/test/Jamfile
+++ b/test/Jamfile
@@ -44,6 +44,13 @@ project : requirements
     <toolset>clang:<warnings-as-errors>on
     <toolset>gcc:<warnings-as-errors>on
 
+    # The b2 sanitizer features only add -fsanitize flags; define the macros the
+    # tests use to skip checks that intentionally exercise UB (e.g. shift tests).
+    <undefined-sanitizer>on:<define>UBSAN=1
+    <undefined-sanitizer>norecover:<define>UBSAN=1
+    <address-sanitizer>on:<define>ASAN=1
+    <address-sanitizer>norecover:<define>ASAN=1
+
   [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
   ;
 
@@ -65,34 +72,43 @@ run test_climits.cpp ;
 
 run test_bit.cpp ;
 run test_literals.cpp ;
+run test_from_chars_bases.cpp ;
 run test_stream.cpp ;
 
-compile-fail test_mixed_type_ops.cpp ;
-compile-fail test_mixed_arithmetic.cpp ;
 run test_mixed_type_sign_compare.cpp ;
 run test_mixed_type_sign_conversion.cpp ;
+run test_cross_type_assign.cpp ;
+run test_builtin_parity.cpp ;
 
 run test_consteval_funcs.cpp ;
 run test_sign_compare.cpp ;
-compile-fail test_fail_sign_compare.cpp ;
 run test_x64_msvc_div.cpp ;
 
 run test_gcd_lcm.cpp ;
 run test_midpoint.cpp ;
+run test_powm.cpp ;
+run test_ipow.cpp ;
+run test_isqrt.cpp ;
+run test_ckd.cpp ;
 
 run test_format.cpp ;
 run test_fmt_format.cpp ;
 
 run test_div.cpp ;
+run test_div_primitives.cpp ;
 
 run test_num_digits.cpp ;
 run test_spaceship_operator.cpp ;
 run test_to_string.cpp ;
 
+# Warnings about padding propagate out of <utility>
+run test_hash.cpp : : : <toolset>msvc:<cxxflags>/wd4324 ;
+
 # Make sure we run the examples as well
 run ../examples/construction.cpp ;
 run ../examples/bit.cpp ;
 run ../examples/saturating_arithmetic.cpp ;
+run ../examples/checked_arithmetic.cpp ;
 run ../examples/mixed_type_arithmetic.cpp ;
 run ../examples/stream.cpp ;
 run ../examples/basic_arithmetic.cpp ;
@@ -122,6 +138,7 @@ compile compile_tests/charconv_compile.cpp ;
 compile compile_tests/climits_compile.cpp ;
 compile compile_tests/cstdlib_compile.cpp ;
 compile compile_tests/format_compile.cpp ;
+compile compile_tests/hash_compile.cpp ;
 compile compile_tests/int128_compile.cpp ;
 compile compile_tests/iostream_compile.cpp ;
 compile compile_tests/limits_compile.cpp ;
@@ -129,3 +146,4 @@ compile compile_tests/literals_compile.cpp ;
 compile compile_tests/numeric_compile.cpp ;
 compile compile_tests/string_compile.cpp ;
 compile compile_tests/random_compile.cpp ;
+compile compile_tests/utilities_compile.cpp ;
diff --git a/test/benchmark_i128.cpp b/test/benchmark_i128.cpp
index ba0a4f0d..4433a217 100644
--- a/test/benchmark_i128.cpp
+++ b/test/benchmark_i128.cpp
@@ -319,6 +319,36 @@ BOOST_INT128_NO_INLINE void test_two_element_operation(const std::vector<T>& dat
     std::cerr << operation << "<" << std::left << std::setw(11) << type << ">: " << std::setw( 10 ) << ( t2 - t1 ) / 1us << " us (s=" << s << ")\n";
 }
 
+// Benchmarks the narrow division overloads (128-bit divided by a 64-bit or 32-bit value),
+// which exercise the hardware-accelerated one_word_div path rather than the full 128/128 divide.
+template <bool HalfWord, typename T>
+BOOST_INT128_NO_INLINE void test_narrow_division(const std::vector<T>& data_vec, const char* operation, const char* type)
+{
+    const auto t1 = std::chrono::steady_clock::now();
+    std::int64_t s = 0; // discard variable
+
+    for (std::size_t k {}; k < K; ++k)
+    {
+        for (std::size_t i {}; i < data_vec.size() - 1U; ++i)
+        {
+            if (HalfWord)
+            {
+                const auto divisor = static_cast<std::uint32_t>(data_vec[i + 1]) | 1U;
+                s += static_cast<std::int64_t>(data_vec[i] / divisor);
+            }
+            else
+            {
+                const auto divisor = static_cast<std::uint64_t>(data_vec[i + 1]) | UINT64_C(1);
+                s += static_cast<std::int64_t>(data_vec[i] / divisor);
+            }
+        }
+    }
+
+    const auto t2 = std::chrono::steady_clock::now();
+
+    std::cerr << operation << "<" << std::left << std::setw(11) << type << ">: " << std::setw( 10 ) << ( t2 - t1 ) / 1us << " us (s=" << s << ")\n";
+}
+
 std::vector<int> generate_shift_vector()
 {
     std::random_device rd;
@@ -473,6 +503,32 @@ int main()
         #endif
 
         std::cerr << std::endl;
+
+        #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INTERNAL_I128)
+        test_narrow_division<false>(builtin_vector, "div64", "Builtin");
+        #endif
+
+        test_narrow_division<false>(library_vector, "div64", "Library");
+        test_narrow_division<false>(mp_vector, "div64", "mp::i128");
+
+        #ifdef BOOST_INT128_BENCHMARK_ABSL
+        test_narrow_division<false>(absl_vector, "div64", "absl::i128");
+        #endif
+
+        std::cerr << std::endl;
+
+        #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INTERNAL_I128)
+        test_narrow_division<true>(builtin_vector, "div32", "Builtin");
+        #endif
+
+        test_narrow_division<true>(library_vector, "div32", "Library");
+        test_narrow_division<true>(mp_vector, "div32", "mp::i128");
+
+        #ifdef BOOST_INT128_BENCHMARK_ABSL
+        test_narrow_division<true>(absl_vector, "div32", "absl::i128");
+        #endif
+
+        std::cerr << std::endl;
     }
     // Single word operations
     {
diff --git a/test/benchmark_u128.cpp b/test/benchmark_u128.cpp
index a8a88996..767a1bb5 100644
--- a/test/benchmark_u128.cpp
+++ b/test/benchmark_u128.cpp
@@ -337,6 +337,36 @@ BOOST_INT128_NO_INLINE void test_two_element_operation(const std::vector<T>& dat
     std::cerr << operation << "<" << std::left << std::setw(11) << type << ">: " << std::setw( 10 ) << ( t2 - t1 ) / 1us << " us (s=" << s << ")\n";
 }
 
+// Benchmarks the narrow division overloads (128-bit divided by a 64-bit or 32-bit value),
+// which exercise the hardware-accelerated one_word_div path rather than the full 128/128 divide.
+template <bool HalfWord, typename T>
+BOOST_INT128_NO_INLINE void test_narrow_division(const std::vector<T>& data_vec, const char* operation, const char* type)
+{
+    const auto t1 = std::chrono::steady_clock::now();
+    std::uint64_t s = 0; // discard variable
+
+    for (std::size_t k {}; k < K; ++k)
+    {
+        for (std::size_t i {}; i < data_vec.size() - 1U; ++i)
+        {
+            if (HalfWord)
+            {
+                const auto divisor = static_cast<std::uint32_t>(data_vec[i + 1]) | 1U;
+                s += static_cast<std::uint64_t>(data_vec[i] / divisor);
+            }
+            else
+            {
+                const auto divisor = static_cast<std::uint64_t>(data_vec[i + 1]) | UINT64_C(1);
+                s += static_cast<std::uint64_t>(data_vec[i] / divisor);
+            }
+        }
+    }
+
+    const auto t2 = std::chrono::steady_clock::now();
+
+    std::cerr << operation << "<" << std::left << std::setw(11) << type << ">: " << std::setw( 10 ) << ( t2 - t1 ) / 1us << " us (s=" << s << ")\n";
+}
+
 template <typename T>
 BOOST_INT128_NO_INLINE void test_gcd(const std::vector<T>& data_vec, const char* type)
 {
@@ -586,6 +616,32 @@ int main()
 
         std::cerr << std::endl;
 
+        #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INTERNAL_I128)
+        test_narrow_division<false>(builtin_vector, "div64", "Builtin");
+        #endif
+
+        test_narrow_division<false>(library_vector, "div64", "Library");
+        test_narrow_division<false>(mp_vector, "div64", "mp::u128");
+
+        #ifdef BOOST_INT128_BENCHMARK_ABSL
+        test_narrow_division<false>(absl_vector, "div64", "absl::u128");
+        #endif
+
+        std::cerr << std::endl;
+
+        #if defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INTERNAL_I128)
+        test_narrow_division<true>(builtin_vector, "div32", "Builtin");
+        #endif
+
+        test_narrow_division<true>(library_vector, "div32", "Library");
+        test_narrow_division<true>(mp_vector, "div32", "mp::u128");
+
+        #ifdef BOOST_INT128_BENCHMARK_ABSL
+        test_narrow_division<true>(absl_vector, "div32", "absl::u128");
+        #endif
+
+        std::cerr << std::endl;
+
         #if (defined(BOOST_INT128_HAS_INT128) || defined(BOOST_INT128_HAS_MSVC_INTERNAL_I128)) && defined(BOOST_INT128_BENCHMARK_BUILTIN_GCD)
         //test_gcd(builtin_vector, "Builtin");
         #endif
diff --git a/test/compile_tests/charconv_compile.cpp b/test/compile_tests/charconv_compile.cpp
index 464ffd97..f38923d3 100644
--- a/test/compile_tests/charconv_compile.cpp
+++ b/test/compile_tests/charconv_compile.cpp
@@ -2,7 +2,6 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
 #include <boost/int128/charconv.hpp>
 
 int main()
diff --git a/test/compile_tests/hash_compile.cpp b/test/compile_tests/hash_compile.cpp
new file mode 100644
index 00000000..bf248891
--- /dev/null
+++ b/test/compile_tests/hash_compile.cpp
@@ -0,0 +1,10 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#include <boost/int128/hash.hpp>
+
+int main()
+{
+    return 0;
+}
diff --git a/test/compile_tests/utilities_compile.cpp b/test/compile_tests/utilities_compile.cpp
new file mode 100644
index 00000000..96e31e21
--- /dev/null
+++ b/test/compile_tests/utilities_compile.cpp
@@ -0,0 +1,10 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#include <boost/int128/utilities.hpp>
+
+int main()
+{
+    return 0;
+}
diff --git a/test/github_issue_377.cpp b/test/github_issue_377.cpp
index 44f21817..11ba3eb4 100644
--- a/test/github_issue_377.cpp
+++ b/test/github_issue_377.cpp
@@ -4,7 +4,6 @@
 //
 // See: https://github.com/cppalliance/int128/issues/377
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
 #include <boost/int128.hpp>
 #include <boost/core/lightweight_test.hpp>
 #include <limits>
diff --git a/test/test_bit_ceil.cu b/test/test_bit_ceil.cu
index a660eec8..ea0a1d25 100644
--- a/test/test_bit_ceil.cu
+++ b/test/test_bit_ceil.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_bit_floor.cu b/test/test_bit_floor.cu
index 171108d4..1031b471 100644
--- a/test/test_bit_floor.cu
+++ b/test/test_bit_floor.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_bit_width.cu b/test/test_bit_width.cu
index d6c13c7d..46917d89 100644
--- a/test/test_bit_width.cu
+++ b/test/test_bit_width.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_builtin_parity.cpp b/test/test_builtin_parity.cpp
new file mode 100644
index 00000000..5ad84e3a
--- /dev/null
+++ b/test/test_builtin_parity.cpp
@@ -0,0 +1,349 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+//
+// Verifies that boost::int128::int128_t and boost::int128::uint128_t produce
+// results identical to the built-in __int128 / unsigned __int128 types under
+// the C++ usual arithmetic conversions, for every operator x type-pair.
+
+#include <boost/int128.hpp>
+#include <boost/core/lightweight_test.hpp>
+#include <cstdint>
+#include <random>
+
+#ifdef BOOST_INT128_HAS_INT128
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wsign-compare"
+#  pragma GCC diagnostic ignored "-Wsign-conversion"
+#  pragma GCC diagnostic ignored "-Wconversion"
+#endif
+
+using boost::int128::int128_t;
+using boost::int128::uint128_t;
+using boost::int128::detail::builtin_i128;
+using boost::int128::detail::builtin_u128;
+
+static std::mt19937_64 rng{42};
+static constexpr std::size_t N {256U};
+
+template <typename T>
+static T random_value()
+{
+    return static_cast<T>(rng());
+}
+
+template <>
+builtin_u128 random_value<builtin_u128>()
+{
+    return (static_cast<builtin_u128>(rng()) << 64) | static_cast<builtin_u128>(rng());
+}
+
+template <>
+builtin_i128 random_value<builtin_i128>()
+{
+    return static_cast<builtin_i128>(random_value<builtin_u128>());
+}
+
+// =========================================================================
+// uint128_t vs small signed/unsigned built-in integers
+// =========================================================================
+
+template <typename SignedT>
+void test_uint128_vs_signed_small()
+{
+    for (std::size_t i {0}; i < N; ++i)
+    {
+        const auto raw_lhs {random_value<builtin_u128>()};
+        const auto raw_rhs {random_value<SignedT>()};
+        const uint128_t lib_lhs {raw_lhs};
+        const SignedT s_rhs {raw_rhs};
+
+        // Builtin path: usual arithmetic conversion converts SignedT to unsigned __int128
+        const builtin_u128 oracle_lhs {raw_lhs};
+        const builtin_u128 oracle_rhs = static_cast<builtin_u128>(static_cast<builtin_i128>(s_rhs));
+
+        // Comparisons
+        BOOST_TEST_EQ(lib_lhs == s_rhs, oracle_lhs == oracle_rhs);
+        BOOST_TEST_EQ(lib_lhs != s_rhs, oracle_lhs != oracle_rhs);
+        BOOST_TEST_EQ(lib_lhs <  s_rhs, oracle_lhs <  oracle_rhs);
+        BOOST_TEST_EQ(lib_lhs <= s_rhs, oracle_lhs <= oracle_rhs);
+        BOOST_TEST_EQ(lib_lhs >  s_rhs, oracle_lhs >  oracle_rhs);
+        BOOST_TEST_EQ(lib_lhs >= s_rhs, oracle_lhs >= oracle_rhs);
+
+        BOOST_TEST_EQ(s_rhs == lib_lhs, oracle_rhs == oracle_lhs);
+        BOOST_TEST_EQ(s_rhs != lib_lhs, oracle_rhs != oracle_lhs);
+        BOOST_TEST_EQ(s_rhs <  lib_lhs, oracle_rhs <  oracle_lhs);
+        BOOST_TEST_EQ(s_rhs <= lib_lhs, oracle_rhs <= oracle_lhs);
+        BOOST_TEST_EQ(s_rhs >  lib_lhs, oracle_rhs >  oracle_lhs);
+        BOOST_TEST_EQ(s_rhs >= lib_lhs, oracle_rhs >= oracle_lhs);
+
+        // Arithmetic
+        BOOST_TEST_EQ(lib_lhs + s_rhs, uint128_t{oracle_lhs + oracle_rhs});
+        BOOST_TEST_EQ(lib_lhs - s_rhs, uint128_t{oracle_lhs - oracle_rhs});
+        BOOST_TEST_EQ(lib_lhs * s_rhs, uint128_t{oracle_lhs * oracle_rhs});
+        if (s_rhs != 0)
+        {
+            BOOST_TEST_EQ(lib_lhs / s_rhs, uint128_t{oracle_lhs / oracle_rhs});
+            BOOST_TEST_EQ(lib_lhs % s_rhs, uint128_t{oracle_lhs % oracle_rhs});
+        }
+        if (raw_lhs != 0)
+        {
+            BOOST_TEST_EQ(s_rhs + lib_lhs, uint128_t{oracle_rhs + oracle_lhs});
+            BOOST_TEST_EQ(s_rhs - lib_lhs, uint128_t{oracle_rhs - oracle_lhs});
+            BOOST_TEST_EQ(s_rhs * lib_lhs, uint128_t{oracle_rhs * oracle_lhs});
+            BOOST_TEST_EQ(s_rhs / lib_lhs, uint128_t{oracle_rhs / oracle_lhs});
+            BOOST_TEST_EQ(s_rhs % lib_lhs, uint128_t{oracle_rhs % oracle_lhs});
+        }
+
+        // Bitwise
+        BOOST_TEST_EQ(lib_lhs | s_rhs, uint128_t{oracle_lhs | oracle_rhs});
+        BOOST_TEST_EQ(lib_lhs & s_rhs, uint128_t{oracle_lhs & oracle_rhs});
+        BOOST_TEST_EQ(lib_lhs ^ s_rhs, uint128_t{oracle_lhs ^ oracle_rhs});
+    }
+}
+
+// =========================================================================
+// int128_t vs small signed/unsigned built-in integers
+// =========================================================================
+
+template <typename UnsignedT>
+void test_int128_vs_unsigned_small()
+{
+    for (std::size_t i {0}; i < N; ++i)
+    {
+        const auto raw_lhs {random_value<builtin_i128>()};
+        const auto raw_rhs {random_value<UnsignedT>()};
+        const int128_t lib_lhs {raw_lhs};
+        const UnsignedT u_rhs {raw_rhs};
+
+        // Builtin path: int128_t has higher rank and can represent UnsignedT,
+        // so both promote to __int128 (signed) and the result type is signed.
+        // Comparisons use the signed oracle directly. Arithmetic and bitwise
+        // are performed in the unsigned domain (well-defined wrap-around) and
+        // reinterpreted as signed for the result type, matching the library's
+        // wrap-around semantics and avoiding UB-on-overflow that UBSan flags.
+        const builtin_i128 oracle_lhs {raw_lhs};
+        const builtin_i128 oracle_rhs = static_cast<builtin_i128>(u_rhs);
+        const builtin_u128 oracle_lhs_u = static_cast<builtin_u128>(oracle_lhs);
+        const builtin_u128 oracle_rhs_u = static_cast<builtin_u128>(oracle_rhs);
+
+        BOOST_TEST_EQ(lib_lhs == u_rhs, oracle_lhs == oracle_rhs);
+        BOOST_TEST_EQ(lib_lhs != u_rhs, oracle_lhs != oracle_rhs);
+        BOOST_TEST_EQ(lib_lhs <  u_rhs, oracle_lhs <  oracle_rhs);
+        BOOST_TEST_EQ(lib_lhs <= u_rhs, oracle_lhs <= oracle_rhs);
+        BOOST_TEST_EQ(lib_lhs >  u_rhs, oracle_lhs >  oracle_rhs);
+        BOOST_TEST_EQ(lib_lhs >= u_rhs, oracle_lhs >= oracle_rhs);
+
+        BOOST_TEST_EQ(u_rhs == lib_lhs, oracle_rhs == oracle_lhs);
+        BOOST_TEST_EQ(u_rhs != lib_lhs, oracle_rhs != oracle_lhs);
+        BOOST_TEST_EQ(u_rhs <  lib_lhs, oracle_rhs <  oracle_lhs);
+        BOOST_TEST_EQ(u_rhs <= lib_lhs, oracle_rhs <= oracle_lhs);
+        BOOST_TEST_EQ(u_rhs >  lib_lhs, oracle_rhs >  oracle_lhs);
+        BOOST_TEST_EQ(u_rhs >= lib_lhs, oracle_rhs >= oracle_lhs);
+
+        BOOST_TEST_EQ(lib_lhs + u_rhs, int128_t{static_cast<builtin_i128>(oracle_lhs_u + oracle_rhs_u)});
+        BOOST_TEST_EQ(lib_lhs - u_rhs, int128_t{static_cast<builtin_i128>(oracle_lhs_u - oracle_rhs_u)});
+        BOOST_TEST_EQ(lib_lhs * u_rhs, int128_t{static_cast<builtin_i128>(oracle_lhs_u * oracle_rhs_u)});
+        if (u_rhs != 0)
+        {
+            BOOST_TEST_EQ(lib_lhs / u_rhs, int128_t{oracle_lhs / oracle_rhs});
+            BOOST_TEST_EQ(lib_lhs % u_rhs, int128_t{oracle_lhs % oracle_rhs});
+        }
+        if (raw_lhs != 0)
+        {
+            BOOST_TEST_EQ(u_rhs + lib_lhs, int128_t{static_cast<builtin_i128>(oracle_rhs_u + oracle_lhs_u)});
+            BOOST_TEST_EQ(u_rhs - lib_lhs, int128_t{static_cast<builtin_i128>(oracle_rhs_u - oracle_lhs_u)});
+            BOOST_TEST_EQ(u_rhs * lib_lhs, int128_t{static_cast<builtin_i128>(oracle_rhs_u * oracle_lhs_u)});
+            BOOST_TEST_EQ(u_rhs / lib_lhs, int128_t{oracle_rhs / oracle_lhs});
+            BOOST_TEST_EQ(u_rhs % lib_lhs, int128_t{oracle_rhs % oracle_lhs});
+        }
+
+        BOOST_TEST_EQ(lib_lhs | u_rhs, int128_t{static_cast<builtin_i128>(oracle_lhs_u | oracle_rhs_u)});
+        BOOST_TEST_EQ(lib_lhs & u_rhs, int128_t{static_cast<builtin_i128>(oracle_lhs_u & oracle_rhs_u)});
+        BOOST_TEST_EQ(lib_lhs ^ u_rhs, int128_t{static_cast<builtin_i128>(oracle_lhs_u ^ oracle_rhs_u)});
+    }
+}
+
+// =========================================================================
+// uint128_t vs int128_t (cross-type)
+// =========================================================================
+
+void test_cross_type()
+{
+    for (std::size_t i {0}; i < N; ++i)
+    {
+        const auto raw_u {random_value<builtin_u128>()};
+        const auto raw_i {random_value<builtin_i128>()};
+        const uint128_t lib_u {raw_u};
+        const int128_t lib_i {raw_i};
+
+        // Both promote to unsigned __int128 (same rank, signed -> unsigned)
+        const builtin_u128 oracle_u {raw_u};
+        const builtin_u128 oracle_i = static_cast<builtin_u128>(raw_i);
+
+        BOOST_TEST_EQ(lib_u == lib_i, oracle_u == oracle_i);
+        BOOST_TEST_EQ(lib_u != lib_i, oracle_u != oracle_i);
+        BOOST_TEST_EQ(lib_u <  lib_i, oracle_u <  oracle_i);
+        BOOST_TEST_EQ(lib_u <= lib_i, oracle_u <= oracle_i);
+        BOOST_TEST_EQ(lib_u >  lib_i, oracle_u >  oracle_i);
+        BOOST_TEST_EQ(lib_u >= lib_i, oracle_u >= oracle_i);
+
+        BOOST_TEST_EQ(lib_i == lib_u, oracle_i == oracle_u);
+        BOOST_TEST_EQ(lib_i != lib_u, oracle_i != oracle_u);
+        BOOST_TEST_EQ(lib_i <  lib_u, oracle_i <  oracle_u);
+        BOOST_TEST_EQ(lib_i <= lib_u, oracle_i <= oracle_u);
+        BOOST_TEST_EQ(lib_i >  lib_u, oracle_i >  oracle_u);
+        BOOST_TEST_EQ(lib_i >= lib_u, oracle_i >= oracle_u);
+
+        BOOST_TEST_EQ(lib_u + lib_i, uint128_t{oracle_u + oracle_i});
+        BOOST_TEST_EQ(lib_u - lib_i, uint128_t{oracle_u - oracle_i});
+        BOOST_TEST_EQ(lib_u * lib_i, uint128_t{oracle_u * oracle_i});
+        if (oracle_i != 0)
+        {
+            BOOST_TEST_EQ(lib_u / lib_i, uint128_t{oracle_u / oracle_i});
+            BOOST_TEST_EQ(lib_u % lib_i, uint128_t{oracle_u % oracle_i});
+        }
+        if (oracle_u != 0)
+        {
+            BOOST_TEST_EQ(lib_i / lib_u, uint128_t{oracle_i / oracle_u});
+            BOOST_TEST_EQ(lib_i % lib_u, uint128_t{oracle_i % oracle_u});
+        }
+
+        // Bitwise: same-rank, signed -> unsigned, result uint128_t
+        BOOST_TEST_EQ(lib_u | lib_i, uint128_t{oracle_u | oracle_i});
+        BOOST_TEST_EQ(lib_u & lib_i, uint128_t{oracle_u & oracle_i});
+        BOOST_TEST_EQ(lib_u ^ lib_i, uint128_t{oracle_u ^ oracle_i});
+        BOOST_TEST_EQ(lib_i | lib_u, uint128_t{oracle_i | oracle_u});
+        BOOST_TEST_EQ(lib_i & lib_u, uint128_t{oracle_i & oracle_u});
+        BOOST_TEST_EQ(lib_i ^ lib_u, uint128_t{oracle_i ^ oracle_u});
+
+        // Shifts: result type follows LHS. Compute the int128 left-shift via
+        // the unsigned domain (well-defined wrap-around) and reinterpret as
+        // signed to avoid UB when `raw_i` is negative or the result overflows.
+        const std::uint64_t shift_amount {static_cast<std::uint64_t>(rng()) % 128};
+        const uint128_t lib_u_shift {shift_amount};
+        const int128_t lib_i_shift {static_cast<std::int64_t>(shift_amount)};
+        const builtin_u128 raw_i_u = static_cast<builtin_u128>(raw_i);
+        BOOST_TEST_EQ(lib_i << lib_u_shift, int128_t{static_cast<builtin_i128>(raw_i_u << shift_amount)});
+        BOOST_TEST_EQ(lib_u << lib_i_shift, uint128_t{raw_u << shift_amount});
+        BOOST_TEST_EQ(lib_i >> lib_u_shift, int128_t{raw_i >> shift_amount});
+        BOOST_TEST_EQ(lib_u >> lib_i_shift, uint128_t{raw_u >> shift_amount});
+    }
+}
+
+// =========================================================================
+// uint128_t vs builtin_i128
+// =========================================================================
+
+void test_uint128_vs_builtin_i128()
+{
+    for (std::size_t i {0}; i < N; ++i)
+    {
+        const auto raw_u {random_value<builtin_u128>()};
+        const auto raw_i {random_value<builtin_i128>()};
+        const uint128_t lib_u {raw_u};
+
+        const builtin_u128 oracle_u {raw_u};
+        const builtin_u128 oracle_i = static_cast<builtin_u128>(raw_i);
+
+        BOOST_TEST_EQ(lib_u == raw_i, oracle_u == oracle_i);
+        BOOST_TEST_EQ(lib_u != raw_i, oracle_u != oracle_i);
+        BOOST_TEST_EQ(lib_u <  raw_i, oracle_u <  oracle_i);
+        BOOST_TEST_EQ(lib_u <= raw_i, oracle_u <= oracle_i);
+        BOOST_TEST_EQ(lib_u >  raw_i, oracle_u >  oracle_i);
+        BOOST_TEST_EQ(lib_u >= raw_i, oracle_u >= oracle_i);
+
+        BOOST_TEST_EQ(lib_u + raw_i, uint128_t{oracle_u + oracle_i});
+        BOOST_TEST_EQ(lib_u - raw_i, uint128_t{oracle_u - oracle_i});
+        BOOST_TEST_EQ(lib_u * raw_i, uint128_t{oracle_u * oracle_i});
+        if (oracle_i != 0)
+        {
+            BOOST_TEST_EQ(lib_u / raw_i, uint128_t{oracle_u / oracle_i});
+            BOOST_TEST_EQ(lib_u % raw_i, uint128_t{oracle_u % oracle_i});
+        }
+
+        BOOST_TEST_EQ(lib_u | raw_i, uint128_t{oracle_u | oracle_i});
+        BOOST_TEST_EQ(lib_u & raw_i, uint128_t{oracle_u & oracle_i});
+        BOOST_TEST_EQ(lib_u ^ raw_i, uint128_t{oracle_u ^ oracle_i});
+
+        const unsigned shift_amount {static_cast<unsigned>(rng() % 128)};
+        BOOST_TEST_EQ(lib_u << shift_amount, uint128_t{oracle_u << shift_amount});
+        BOOST_TEST_EQ(lib_u >> shift_amount, uint128_t{oracle_u >> shift_amount});
+    }
+}
+
+// =========================================================================
+// int128_t vs builtin_u128
+// =========================================================================
+
+void test_int128_vs_builtin_u128()
+{
+    for (std::size_t i {0}; i < N; ++i)
+    {
+        const auto raw_i {random_value<builtin_i128>()};
+        const auto raw_u {random_value<builtin_u128>()};
+        const int128_t lib_i {raw_i};
+
+        // Both promote to unsigned __int128 (same rank, signed -> unsigned).
+        // Result of arithmetic is unsigned __int128 (uint128_t in library form).
+        const builtin_u128 oracle_i = static_cast<builtin_u128>(raw_i);
+        const builtin_u128 oracle_u {raw_u};
+
+        BOOST_TEST_EQ(lib_i == raw_u, oracle_i == oracle_u);
+        BOOST_TEST_EQ(lib_i != raw_u, oracle_i != oracle_u);
+        BOOST_TEST_EQ(lib_i <  raw_u, oracle_i <  oracle_u);
+        BOOST_TEST_EQ(lib_i <= raw_u, oracle_i <= oracle_u);
+        BOOST_TEST_EQ(lib_i >  raw_u, oracle_i >  oracle_u);
+        BOOST_TEST_EQ(lib_i >= raw_u, oracle_i >= oracle_u);
+
+        BOOST_TEST_EQ(lib_i + raw_u, uint128_t{oracle_i + oracle_u});
+        BOOST_TEST_EQ(lib_i - raw_u, uint128_t{oracle_i - oracle_u});
+        BOOST_TEST_EQ(lib_i * raw_u, uint128_t{oracle_i * oracle_u});
+        if (oracle_u != 0)
+        {
+            BOOST_TEST_EQ(lib_i / raw_u, uint128_t{oracle_i / oracle_u});
+            BOOST_TEST_EQ(lib_i % raw_u, uint128_t{oracle_i % oracle_u});
+        }
+
+        BOOST_TEST_EQ(lib_i | raw_u, uint128_t{oracle_i | oracle_u});
+        BOOST_TEST_EQ(lib_i & raw_u, uint128_t{oracle_i & oracle_u});
+        BOOST_TEST_EQ(lib_i ^ raw_u, uint128_t{oracle_i ^ oracle_u});
+
+        // Shifts: result type follows LHS (int128_t for `lib_i << count`).
+        // Left shift: compute via unsigned (well-defined wrap-around) and
+        // reinterpret as signed, since `signed << count` overflowing or
+        // shifting a negative value is UB pre-C++20 (UBSan flags it).
+        // Right shift: keep signed for arithmetic-shift semantics.
+        const unsigned shift_amount {static_cast<unsigned>(rng() % 128)};
+        const builtin_u128 raw_i_u = static_cast<builtin_u128>(raw_i);
+        BOOST_TEST_EQ(lib_i << shift_amount, int128_t{static_cast<builtin_i128>(raw_i_u << shift_amount)});
+        BOOST_TEST_EQ(lib_i >> shift_amount, int128_t{raw_i >> shift_amount});
+    }
+}
+
+#endif // BOOST_INT128_HAS_INT128
+
+int main()
+{
+    #ifdef BOOST_INT128_HAS_INT128
+
+    test_uint128_vs_signed_small<std::int8_t>();
+    test_uint128_vs_signed_small<std::int16_t>();
+    test_uint128_vs_signed_small<std::int32_t>();
+    test_uint128_vs_signed_small<std::int64_t>();
+
+    test_int128_vs_unsigned_small<std::uint8_t>();
+    test_int128_vs_unsigned_small<std::uint16_t>();
+    test_int128_vs_unsigned_small<std::uint32_t>();
+    test_int128_vs_unsigned_small<std::uint64_t>();
+
+    test_cross_type();
+    test_uint128_vs_builtin_i128();
+    test_int128_vs_builtin_u128();
+
+    #endif
+
+    return boost::report_errors();
+}
diff --git a/test/test_byteswap.cu b/test/test_byteswap.cu
index 9cb83f91..5fd7094f 100644
--- a/test/test_byteswap.cu
+++ b/test/test_byteswap.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_ckd.cpp b/test/test_ckd.cpp
new file mode 100644
index 00000000..e79343cc
--- /dev/null
+++ b/test/test_ckd.cpp
@@ -0,0 +1,561 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#include <boost/int128.hpp>
+#include <boost/core/lightweight_test.hpp>
+#include <random>
+#include <cstdint>
+#include <limits>
+
+using boost::int128::ckd_add;
+using boost::int128::ckd_sub;
+using boost::int128::ckd_mul;
+using boost::int128::int128_t;
+using boost::int128::uint128_t;
+
+constexpr std::size_t N {4096};
+static std::mt19937_64 rng {42};
+static std::uniform_int_distribution<std::uint64_t> dist {0, UINT64_MAX};
+
+// Small magnitudes exercise the no-overflow path for narrow targets, where a
+// purely full-range distribution would almost always overflow.
+static std::uniform_int_distribution<int> small_dist {-1000, 1000};
+
+//
+// Oracle-based testing for the standard integer types. Addition and subtraction
+// are checked against __builtin_add_overflow / __builtin_sub_overflow, which
+// implement the C23 contract exactly (exact result, wrapped into the
+// destination, true on overflow) and so are an independent reference.
+//
+// Multiplication uses a hand-rolled reference instead. __builtin_mul_overflow
+// returns the wrong result for signed operands with an unsigned destination on
+// GCC 7, and on Clang it lowers a 128-bit checked multiply to __muloti4, a
+// compiler-rt symbol that is not always linked. ref_std_mul_overflow forms the
+// exact product from 32-bit limbs (no 128-bit type, no runtime helper) so it is
+// correct and links on every supported toolchain.
+//
+#if defined(__GNUC__) || defined(__clang__)
+
+// 64x64 -> 128 bit unsigned product, returned as hi:lo, built from 32-bit limbs.
+// This needs neither a 128-bit type nor a runtime helper such as __muloti4, so
+// it links on every target including 32-bit ones.
+static void mul_64_to_128(const std::uint64_t a, const std::uint64_t b,
+                          std::uint64_t& hi, std::uint64_t& lo) noexcept
+{
+    const std::uint64_t mask {UINT64_C(0xFFFFFFFF)};
+    const std::uint64_t a0 {a & mask};
+    const std::uint64_t a1 {a >> 32};
+    const std::uint64_t b0 {b & mask};
+    const std::uint64_t b1 {b >> 32};
+
+    const std::uint64_t p00 {a0 * b0};
+    const std::uint64_t p01 {a0 * b1};
+    const std::uint64_t p10 {a1 * b0};
+    const std::uint64_t p11 {a1 * b1};
+
+    const std::uint64_t mid {(p00 >> 32) + (p01 & mask) + (p10 & mask)};
+    lo = (p00 & mask) | (mid << 32);
+    hi = p11 + (p01 >> 32) + (p10 >> 32) + (mid >> 32);
+}
+
+// Signedness usable for the standard integer types and, through the
+// specializations in the 128-bit section below, the native extended types.
+template <typename T>
+struct oracle_is_signed : std::is_signed<T> {};
+
+template <typename T, std::enable_if_t<oracle_is_signed<T>::value, int> = 0>
+std::uint64_t std_magnitude(const T value, bool& negative) noexcept
+{
+    negative = value < 0;
+    const std::uint64_t image {static_cast<std::uint64_t>(value)};
+    return negative ? (std::uint64_t{0} - image) : image;
+}
+
+template <typename T, std::enable_if_t<!oracle_is_signed<T>::value, int> = 0>
+std::uint64_t std_magnitude(const T value, bool& negative) noexcept
+{
+    negative = false;
+    return static_cast<std::uint64_t>(value);
+}
+
+template <typename R, std::enable_if_t<!oracle_is_signed<R>::value, int> = 0>
+bool oracle_overflows_std(const std::uint64_t magnitude, const bool negative) noexcept
+{
+    const std::uint64_t r_max {static_cast<std::uint64_t>((std::numeric_limits<R>::max)())};
+    return negative ? (magnitude != 0U) : (magnitude > r_max);
+}
+
+template <typename R, std::enable_if_t<oracle_is_signed<R>::value, int> = 0>
+bool oracle_overflows_std(const std::uint64_t magnitude, const bool negative) noexcept
+{
+    const std::uint64_t r_max {static_cast<std::uint64_t>((std::numeric_limits<R>::max)())};
+    const std::uint64_t min_magnitude {r_max + 1U};
+    return negative ? (magnitude > min_magnitude) : (magnitude > r_max);
+}
+
+// Independent reference for the C23 ckd_mul contract on the standard integer
+// types: forms the exact product, wraps it into *r, and reports whether the
+// destination cannot represent the exact value.
+template <typename A, typename B, typename R>
+bool ref_std_mul_overflow(const A a, const B b, R* r) noexcept
+{
+    bool a_negative {};
+    bool b_negative {};
+    const std::uint64_t a_magnitude {std_magnitude(a, a_negative)};
+    const std::uint64_t b_magnitude {std_magnitude(b, b_negative)};
+
+    std::uint64_t hi {};
+    std::uint64_t lo {};
+    mul_64_to_128(a_magnitude, b_magnitude, hi, lo);
+
+    const bool negative {a_negative != b_negative};
+    const std::uint64_t wrapped {negative ? (std::uint64_t{0} - lo) : lo};
+    *r = static_cast<R>(wrapped);
+
+    if (hi != 0U)
+    {
+        return true;
+    }
+
+    return oracle_overflows_std<R>(lo, negative);
+}
+
+template <typename T1, typename T2, typename T3, typename Ref, typename Ckd>
+void check_op(const T2 lhs, const T3 rhs, Ref ref_overflow, Ckd ckd_overflow)
+{
+    T1 expected {};
+    const bool expected_overflow {ref_overflow(lhs, rhs, &expected)};
+
+    T1 got {};
+    const bool got_overflow {ckd_overflow(&got, lhs, rhs)};
+
+    BOOST_TEST_EQ(got_overflow, expected_overflow);
+    BOOST_TEST(got == expected);
+}
+
+template <typename T1, typename T2, typename T3, typename Ref, typename Ckd>
+void fuzz_op(Ref ref_overflow, Ckd ckd_overflow)
+{
+    for (std::size_t i {0}; i < N; ++i)
+    {
+        check_op<T1, T2, T3>(static_cast<T2>(dist(rng)),       static_cast<T3>(dist(rng)),       ref_overflow, ckd_overflow);
+        check_op<T1, T2, T3>(static_cast<T2>(small_dist(rng)), static_cast<T3>(small_dist(rng)), ref_overflow, ckd_overflow);
+        check_op<T1, T2, T3>(static_cast<T2>(dist(rng)),       static_cast<T3>(small_dist(rng)), ref_overflow, ckd_overflow);
+        check_op<T1, T2, T3>(static_cast<T2>(small_dist(rng)), static_cast<T3>(dist(rng)),       ref_overflow, ckd_overflow);
+    }
+}
+
+template <typename Ref, typename Ckd>
+void fuzz_all_triples(Ref ref_overflow, Ckd ckd_overflow)
+{
+    fuzz_op<std::int32_t,  std::int32_t,  std::int32_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint32_t, std::uint32_t, std::uint32_t>(ref_overflow, ckd_overflow);
+    fuzz_op<std::int8_t,   std::int32_t,  std::int32_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint8_t,  std::int32_t,  std::int32_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::int16_t,  std::int16_t,  std::uint16_t>(ref_overflow, ckd_overflow);
+    fuzz_op<std::int64_t,  std::int32_t,  std::uint32_t>(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint64_t, std::int64_t,  std::int64_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::int32_t,  std::int64_t,  std::int64_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint32_t, std::int8_t,   std::int8_t  >(ref_overflow, ckd_overflow);
+    fuzz_op<std::int64_t,  std::uint64_t, std::uint64_t>(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint16_t, std::int64_t,  std::int32_t >(ref_overflow, ckd_overflow);
+}
+
+void test_standard_oracle()
+{
+    fuzz_all_triples(
+        [](auto a, auto b, auto* r) { return __builtin_add_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_add(r, a, b); });
+
+    fuzz_all_triples(
+        [](auto a, auto b, auto* r) { return __builtin_sub_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_sub(r, a, b); });
+
+    fuzz_all_triples(
+        [](auto a, auto b, auto* r) { return ref_std_mul_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_mul(r, a, b); });
+}
+
+#else
+
+void test_standard_oracle() {}
+
+#endif
+
+//
+// Oracle-based testing at the full 128-bit width using the native compiler
+// type. Addition and subtraction again use the builtins; multiplication uses
+// ref_native_mul_overflow, which assembles the 256-bit product from 64-bit limb
+// products so that no 128-bit multiply (hence no __muloti4) is emitted. This is
+// the only place products genuinely exceed 128 bits, exercising the width check.
+//
+#if defined(__SIZEOF_INT128__) && (defined(__GNUC__) || defined(__clang__))
+
+static uint128_t lib_u(const unsigned __int128 v)
+{
+    return uint128_t{static_cast<std::uint64_t>(v >> 64), static_cast<std::uint64_t>(v)};
+}
+
+static int128_t lib_s(const __int128 v)
+{
+    return static_cast<int128_t>(lib_u(static_cast<unsigned __int128>(v)));
+}
+
+static unsigned __int128 rand_native()
+{
+    return (static_cast<unsigned __int128>(dist(rng)) << 64) | static_cast<unsigned __int128>(dist(rng));
+}
+
+// The extended integer types are not guaranteed entries in std::is_signed under
+// a strict -std flag, so their signedness is stated explicitly.
+template <>
+struct oracle_is_signed<__int128> : std::true_type {};
+
+template <>
+struct oracle_is_signed<unsigned __int128> : std::false_type {};
+
+template <typename T, std::enable_if_t<oracle_is_signed<T>::value, int> = 0>
+unsigned __int128 native_magnitude(const T value, bool& negative) noexcept
+{
+    negative = value < 0;
+    const unsigned __int128 image {static_cast<unsigned __int128>(value)};
+    return negative ? (static_cast<unsigned __int128>(0) - image) : image;
+}
+
+template <typename T, std::enable_if_t<!oracle_is_signed<T>::value, int> = 0>
+unsigned __int128 native_magnitude(const T value, bool& negative) noexcept
+{
+    negative = false;
+    return static_cast<unsigned __int128>(value);
+}
+
+template <typename R, std::enable_if_t<!oracle_is_signed<R>::value, int> = 0>
+bool oracle_overflows_128(const unsigned __int128 magnitude, const bool negative) noexcept
+{
+    // A magnitude that fits in 128 bits fits an unsigned 128-bit target exactly;
+    // only a non-zero negative value is unrepresentable.
+    return negative && magnitude != 0U;
+}
+
+template <typename R, std::enable_if_t<oracle_is_signed<R>::value, int> = 0>
+bool oracle_overflows_128(const unsigned __int128 magnitude, const bool negative) noexcept
+{
+    const unsigned __int128 positive_max {(static_cast<unsigned __int128>(1) << 127) - 1};
+    const unsigned __int128 negative_max {static_cast<unsigned __int128>(1) << 127};
+    return negative ? (magnitude > negative_max) : (magnitude > positive_max);
+}
+
+// Independent reference for the C23 ckd_mul contract at the full 128-bit width.
+// The 256-bit product is assembled from 64-bit limb products so that no 128-bit
+// multiply (and therefore no __muloti4) is emitted; only native add, shift, and
+// compare on unsigned __int128 are used.
+template <typename A, typename B, typename R>
+bool ref_native_mul_overflow(const A a, const B b, R* r) noexcept
+{
+    bool a_negative {};
+    bool b_negative {};
+    const unsigned __int128 a_magnitude {native_magnitude(a, a_negative)};
+    const unsigned __int128 b_magnitude {native_magnitude(b, b_negative)};
+
+    const std::uint64_t a0 {static_cast<std::uint64_t>(a_magnitude)};
+    const std::uint64_t a1 {static_cast<std::uint64_t>(a_magnitude >> 64)};
+    const std::uint64_t b0 {static_cast<std::uint64_t>(b_magnitude)};
+    const std::uint64_t b1 {static_cast<std::uint64_t>(b_magnitude >> 64)};
+
+    std::uint64_t h00 {};
+    std::uint64_t l00 {};
+    std::uint64_t h01 {};
+    std::uint64_t l01 {};
+    std::uint64_t h10 {};
+    std::uint64_t l10 {};
+    std::uint64_t h11 {};
+    std::uint64_t l11 {};
+    mul_64_to_128(a0, b0, h00, l00);
+    mul_64_to_128(a0, b1, h01, l01);
+    mul_64_to_128(a1, b0, h10, l10);
+    mul_64_to_128(a1, b1, h11, l11);
+
+    const unsigned __int128 p00 {(static_cast<unsigned __int128>(h00) << 64) | l00};
+    const unsigned __int128 p01 {(static_cast<unsigned __int128>(h01) << 64) | l01};
+    const unsigned __int128 p10 {(static_cast<unsigned __int128>(h10) << 64) | l10};
+    const unsigned __int128 p11 {(static_cast<unsigned __int128>(h11) << 64) | l11};
+
+    // product = p11 * 2^128 + (p01 + p10) * 2^64 + p00, split into a low and a
+    // high 128-bit half with the carries tracked explicitly.
+    const unsigned __int128 cross {p01 + p10};
+    const bool cross_carry {cross < p01};
+    const unsigned __int128 low128 {p00 + (cross << 64)};
+    const bool low_carry {low128 < p00};
+    const unsigned __int128 high128 {p11 + (cross >> 64) +
+                                     (static_cast<unsigned __int128>(cross_carry) << 64) +
+                                     static_cast<unsigned __int128>(low_carry)};
+
+    const bool negative {a_negative != b_negative};
+    const unsigned __int128 wrapped {negative ? (static_cast<unsigned __int128>(0) - low128) : low128};
+    *r = static_cast<R>(wrapped);
+
+    if (high128 != 0U)
+    {
+        return true;
+    }
+
+    return oracle_overflows_128<R>(low128, negative);
+}
+
+template <typename Ref, typename Ckd>
+void native_fuzz(Ref ref_overflow, Ckd ckd_overflow)
+{
+    for (std::size_t i {0}; i < N; ++i)
+    {
+        const unsigned __int128 ua {rand_native()};
+        const unsigned __int128 ub {rand_native()};
+        const __int128 sa {static_cast<__int128>(ua)};
+        const __int128 sb {static_cast<__int128>(ub)};
+
+        // uint128_t target, unsigned operands
+        {
+            unsigned __int128 ref {};
+            const bool ref_of {ref_overflow(ua, ub, &ref)};
+            uint128_t got {};
+            const bool got_of {ckd_overflow(&got, lib_u(ua), lib_u(ub))};
+            BOOST_TEST_EQ(got_of, ref_of);
+            BOOST_TEST(got == lib_u(ref));
+        }
+
+        // int128_t target, signed operands
+        {
+            __int128 ref {};
+            const bool ref_of {ref_overflow(sa, sb, &ref)};
+            int128_t got {};
+            const bool got_of {ckd_overflow(&got, lib_s(sa), lib_s(sb))};
+            BOOST_TEST_EQ(got_of, ref_of);
+            BOOST_TEST(got == lib_s(ref));
+        }
+
+        // int128_t target, mixed-sign operands (unsigned + signed)
+        {
+            __int128 ref {};
+            const bool ref_of {ref_overflow(ua, sb, &ref)};
+            int128_t got {};
+            const bool got_of {ckd_overflow(&got, lib_u(ua), lib_s(sb))};
+            BOOST_TEST_EQ(got_of, ref_of);
+            BOOST_TEST(got == lib_s(ref));
+        }
+
+        // uint128_t target, mixed-sign operands (signed + unsigned)
+        {
+            unsigned __int128 ref {};
+            const bool ref_of {ref_overflow(sa, ub, &ref)};
+            uint128_t got {};
+            const bool got_of {ckd_overflow(&got, lib_s(sa), lib_u(ub))};
+            BOOST_TEST_EQ(got_of, ref_of);
+            BOOST_TEST(got == lib_u(ref));
+        }
+    }
+}
+
+void test_native_oracle()
+{
+    native_fuzz(
+        [](auto a, auto b, auto* r) { return __builtin_add_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_add(r, a, b); });
+
+    native_fuzz(
+        [](auto a, auto b, auto* r) { return __builtin_sub_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_sub(r, a, b); });
+
+    native_fuzz(
+        [](auto a, auto b, auto* r) { return ref_native_mul_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_mul(r, a, b); });
+}
+
+#else
+
+void test_native_oracle() {}
+
+#endif
+
+//
+// Hand-verified edge cases that run on every platform, including those without
+// a native 128-bit type.
+//
+constexpr auto u_max {(std::numeric_limits<uint128_t>::max)()};
+constexpr auto i_max {(std::numeric_limits<int128_t>::max)()};
+constexpr auto i_min {(std::numeric_limits<int128_t>::min)()};
+
+void test_add_edges()
+{
+    uint128_t u {0};
+    BOOST_TEST_EQ(ckd_add(&u, u_max, uint128_t{1}), true);   // 2^128 wraps to 0
+    BOOST_TEST(u == uint128_t{0});
+    BOOST_TEST_EQ(ckd_add(&u, uint128_t{5}, int128_t{-3}), false);
+    BOOST_TEST(u == uint128_t{2});
+    BOOST_TEST_EQ(ckd_add(&u, uint128_t{3}, int128_t{-5}), true);   // -2 wraps
+    BOOST_TEST(u == u_max - uint128_t{1});
+    BOOST_TEST_EQ(ckd_add(&u, u_max, u_max), true);          // carry past 2^128
+
+    int128_t i {0};
+    BOOST_TEST_EQ(ckd_add(&i, i_max, int128_t{1}), true);    // INT128_MAX + 1 -> INT128_MIN
+    BOOST_TEST(i == i_min);
+    BOOST_TEST_EQ(ckd_add(&i, i_max, i_min), false);
+    BOOST_TEST(i == int128_t{-1});
+    BOOST_TEST_EQ(ckd_add(&i, u_max, uint128_t{0}), true);   // 2^128 - 1 unfit in signed
+    BOOST_TEST(i == int128_t{-1});
+}
+
+void test_sub_edges()
+{
+    uint128_t u {0};
+    BOOST_TEST_EQ(ckd_sub(&u, uint128_t{0}, uint128_t{1}), true);   // -1 wraps to 2^128 - 1
+    BOOST_TEST(u == u_max);
+    BOOST_TEST_EQ(ckd_sub(&u, uint128_t{5}, uint128_t{3}), false);
+    BOOST_TEST(u == uint128_t{2});
+    BOOST_TEST_EQ(ckd_sub(&u, u_max, int128_t{-1}), true);          // 2^128 wraps to 0
+    BOOST_TEST(u == uint128_t{0});
+
+    int128_t i {0};
+    BOOST_TEST_EQ(ckd_sub(&i, i_min, int128_t{1}), true);           // INT128_MIN - 1 -> INT128_MAX
+    BOOST_TEST(i == i_max);
+    BOOST_TEST_EQ(ckd_sub(&i, i_max, int128_t{-1}), true);          // -> INT128_MIN
+    BOOST_TEST(i == i_min);
+    BOOST_TEST_EQ(ckd_sub(&i, i_max, i_max), false);
+    BOOST_TEST(i == int128_t{0});
+    BOOST_TEST_EQ(ckd_sub(&i, i_min, i_min), false);
+    BOOST_TEST(i == int128_t{0});
+
+    // Narrow targets.
+    std::int32_t r32 {0};
+    BOOST_TEST_EQ(ckd_sub(&r32, int128_t{1000}, int128_t{2000}), false);
+    BOOST_TEST_EQ(r32, -1000);
+
+    std::uint8_t r8 {0};
+    BOOST_TEST_EQ(ckd_sub(&r8, uint128_t{0}, uint128_t{1}), true);
+    BOOST_TEST_EQ(static_cast<int>(r8), 255);
+}
+
+void test_mul_edges()
+{
+    int128_t i {0};
+    BOOST_TEST_EQ(ckd_mul(&i, i_min, int128_t{-1}), true);          // 2^127 wraps to INT128_MIN
+    BOOST_TEST(i == i_min);
+    BOOST_TEST_EQ(ckd_mul(&i, i_max, int128_t{2}), true);           // 2^128 - 2 -> -2
+    BOOST_TEST(i == int128_t{-2});
+    BOOST_TEST_EQ(ckd_mul(&i, i_min, int128_t{1}), false);
+    BOOST_TEST(i == i_min);
+    BOOST_TEST_EQ(ckd_mul(&i, int128_t{-3}, int128_t{4}), false);
+    BOOST_TEST(i == int128_t{-12});
+    BOOST_TEST_EQ(ckd_mul(&i, int128_t{0}, int128_t{-7}), false);
+    BOOST_TEST(i == int128_t{0});
+
+    uint128_t u {0};
+    BOOST_TEST_EQ(ckd_mul(&u, u_max, uint128_t{2}), true);          // 2^129 - 2 wraps
+    BOOST_TEST(u == u_max - uint128_t{1});
+    const uint128_t two_64 {1U, 0U};                                // 2^64
+    BOOST_TEST_EQ(ckd_mul(&u, two_64, two_64), true);               // 2^128 wraps to 0
+    BOOST_TEST(u == uint128_t{0});
+    BOOST_TEST_EQ(ckd_mul(&u, uint128_t{0}, u_max), false);
+    BOOST_TEST(u == uint128_t{0});
+    BOOST_TEST_EQ(ckd_mul(&u, uint128_t{6}, uint128_t{7}), false);
+    BOOST_TEST(u == uint128_t{42});
+    BOOST_TEST_EQ(ckd_mul(&u, u_max, int128_t{-1}), true);          // negative result in unsigned
+    BOOST_TEST(u == uint128_t{1});
+
+    // Narrow targets.
+    std::int32_t r32 {0};
+    BOOST_TEST_EQ(ckd_mul(&r32, int128_t{1000}, int128_t{1000}), false);
+    BOOST_TEST_EQ(r32, 1000000);
+
+    std::uint8_t r8 {0};
+    BOOST_TEST_EQ(ckd_mul(&r8, uint128_t{20}, uint128_t{20}), true);    // 400 wraps mod 256
+    BOOST_TEST_EQ(static_cast<int>(r8), 144);
+
+    std::int8_t r8s {0};
+    BOOST_TEST_EQ(ckd_mul(&r8s, int128_t{-5}, int128_t{20}), false);
+    BOOST_TEST_EQ(static_cast<int>(r8s), -100);
+}
+
+//
+// constexpr usability for all three operations.
+//
+
+#if defined(__GNUC__) && __GNUC__ <= 7 && !defined(__clang__) && !defined(__SIZEOF_INT128__)
+#  define BOOST_INT128_TEST_CKD_NO_CONSTEXPR_128
+#endif
+
+constexpr bool add_overflows_int_max()
+{
+    int r {0};
+    return ckd_add(&r, (std::numeric_limits<int>::max)(), 1);
+}
+
+constexpr bool sub_overflows_int_min()
+{
+    int r {0};
+    return ckd_sub(&r, (std::numeric_limits<int>::min)(), 1);
+}
+
+constexpr bool mul_overflows_int_max()
+{
+    int r {0};
+    return ckd_mul(&r, (std::numeric_limits<int>::max)(), 2);
+}
+
+constexpr int sub_value()
+{
+    int r {0};
+    ckd_sub(&r, 5, 3);
+    return r;
+}
+
+constexpr int mul_value()
+{
+    int r {0};
+    ckd_mul(&r, 6, 7);
+    return r;
+}
+
+constexpr bool mul_overflows_i128_min()
+{
+    int128_t r {0};
+    return ckd_mul(&r, (std::numeric_limits<int128_t>::min)(), int128_t{-1});
+}
+
+#ifndef BOOST_INT128_TEST_CKD_NO_CONSTEXPR_128
+
+// MSVC 14.1 warns of integral overflow
+#ifdef _MSC_VER
+#  pragma warning(push)
+#  pragma warning(disable: 4307)
+#endif
+
+void test_constexpr()
+{
+    static_assert(add_overflows_int_max(),  "INT_MAX + 1 overflows int");
+    static_assert(sub_overflows_int_min(),  "INT_MIN - 1 overflows int");
+    static_assert(mul_overflows_int_max(),  "INT_MAX * 2 overflows int");
+    static_assert(sub_value() == 2,         "5 - 3 == 2");
+    static_assert(mul_value() == 42,        "6 * 7 == 42");
+    static_assert(mul_overflows_i128_min(), "INT128_MIN * -1 overflows int128_t");
+}
+
+#ifdef _MSC_VER
+#  pragma warning(pop)
+#endif
+
+#endif
+
+int main()
+{
+    test_standard_oracle();
+    test_native_oracle();
+    test_add_edges();
+    test_sub_edges();
+    test_mul_edges();
+
+    #ifndef BOOST_INT128_TEST_CKD_NO_CONSTEXPR_128
+    test_constexpr();
+    #endif
+
+    return boost::report_errors();
+}
diff --git a/test/test_consteval_funcs.cpp b/test/test_consteval_funcs.cpp
index afab09c4..64f1e79d 100644
--- a/test/test_consteval_funcs.cpp
+++ b/test/test_consteval_funcs.cpp
@@ -2,9 +2,14 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
 #include <boost/int128.hpp>
 
+// Only warns on MSVC 14.1
+#ifdef _MSC_VER
+#  pragma warning(push)
+#  pragma warning(disable:4307)
+#endif
+
 #if defined(__cpp_consteval) && __cpp_consteval >= 201811L
 #  define BOOST_INT128_CONSTEVAL consteval
 #else
diff --git a/test/test_countl_one.cu b/test/test_countl_one.cu
index ed76e92f..9bf23038 100644
--- a/test/test_countl_one.cu
+++ b/test/test_countl_one.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_countl_zero.cu b/test/test_countl_zero.cu
index 829584de..6f211909 100644
--- a/test/test_countl_zero.cu
+++ b/test/test_countl_zero.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_countr_one.cu b/test/test_countr_one.cu
index 9024cc29..8f9c0439 100644
--- a/test/test_countr_one.cu
+++ b/test/test_countr_one.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_countr_zero.cu b/test/test_countr_zero.cu
index 2cbf5b7d..4c6e5b34 100644
--- a/test/test_countr_zero.cu
+++ b/test/test_countr_zero.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_cross_type_assign.cpp b/test/test_cross_type_assign.cpp
new file mode 100644
index 00000000..1bd0919a
--- /dev/null
+++ b/test/test_cross_type_assign.cpp
@@ -0,0 +1,272 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#include <boost/int128.hpp>
+#include <boost/core/lightweight_test.hpp>
+#include <type_traits>
+#include <utility>
+#include <limits>
+#include <cmath>
+
+using namespace boost::int128;
+
+void test_implicit_conversion_traits()
+{
+    static_assert(std::is_convertible<int128_t, uint128_t>::value, "int128_t -> uint128_t should be implicit");
+    static_assert(std::is_convertible<uint128_t, int128_t>::value, "uint128_t -> int128_t should be implicit");
+    static_assert(std::is_assignable<int128_t&, uint128_t>::value, "uint128_t should be assignable to int128_t");
+    static_assert(std::is_assignable<uint128_t&, int128_t>::value, "int128_t should be assignable to uint128_t");
+
+    // Implicit conversions to builtin integer types (matches __int128 behavior)
+    static_assert(std::is_convertible<int128_t, int>::value, "int128_t -> int should be implicit");
+    static_assert(std::is_convertible<int128_t, unsigned int>::value, "int128_t -> unsigned int should be implicit");
+    static_assert(std::is_convertible<int128_t, std::int64_t>::value, "int128_t -> int64_t should be implicit");
+    static_assert(std::is_convertible<int128_t, std::uint64_t>::value, "int128_t -> uint64_t should be implicit");
+    static_assert(std::is_convertible<uint128_t, int>::value, "uint128_t -> int should be implicit");
+    static_assert(std::is_convertible<uint128_t, unsigned int>::value, "uint128_t -> unsigned int should be implicit");
+
+    // Implicit conversions to floating-point types
+    static_assert(std::is_convertible<int128_t, float>::value, "int128_t -> float should be implicit");
+    static_assert(std::is_convertible<int128_t, double>::value, "int128_t -> double should be implicit");
+    static_assert(std::is_convertible<uint128_t, float>::value, "uint128_t -> float should be implicit");
+    static_assert(std::is_convertible<uint128_t, double>::value, "uint128_t -> double should be implicit");
+
+#if defined(BOOST_INT128_HAS_INT128)
+    // Implicit conversions to builtin __int128
+    static_assert(std::is_convertible<int128_t, detail::builtin_i128>::value, "int128_t -> __int128 should be implicit");
+    static_assert(std::is_convertible<int128_t, detail::builtin_u128>::value, "int128_t -> unsigned __int128 should be implicit");
+    static_assert(std::is_convertible<uint128_t, detail::builtin_i128>::value, "uint128_t -> __int128 should be implicit");
+    static_assert(std::is_convertible<uint128_t, detail::builtin_u128>::value, "uint128_t -> unsigned __int128 should be implicit");
+#endif
+}
+
+void test_implicit_conversions_runtime()
+{
+    const int128_t i {0, 42U};
+
+    const int as_int = i;
+    BOOST_TEST_EQ(as_int, 42);
+
+    const std::uint64_t as_u64 = i;
+    BOOST_TEST_EQ(as_u64, 42U);
+
+    const double as_double = i;
+    BOOST_TEST_EQ(static_cast<int>(as_double), 42);
+
+    const uint128_t u {0U, 100U};
+    const unsigned int as_uint = u;
+    BOOST_TEST_EQ(as_uint, 100U);
+
+    const float as_float = u;
+    BOOST_TEST_EQ(static_cast<int>(as_float), 100);
+
+#if defined(BOOST_INT128_HAS_INT128)
+    const detail::builtin_i128 as_native_i = int128_t{1, 2U};
+    BOOST_TEST(as_native_i == ((static_cast<detail::builtin_i128>(1) << 64) | 2));
+
+    const detail::builtin_u128 as_native_u = uint128_t{3U, 4U};
+    BOOST_TEST(as_native_u == ((static_cast<detail::builtin_u128>(3) << 64) | 4));
+#endif
+}
+
+void test_uint_to_int_construction()
+{
+    const uint128_t u {1U, 42U};
+
+    // Copy construction with braces
+    const int128_t a {u};
+    BOOST_TEST_EQ(a.low, u.low);
+    BOOST_TEST_EQ(static_cast<std::uint64_t>(a.high), u.high);
+
+    // Copy-initialization (implicit conversion)
+    const int128_t b = u;
+    BOOST_TEST_EQ(b.low, u.low);
+    BOOST_TEST_EQ(static_cast<std::uint64_t>(b.high), u.high);
+
+    // Move construction
+    uint128_t u_movable {1U, 42U};
+    const int128_t c {std::move(u_movable)};
+    BOOST_TEST_EQ(c.low, 42U);
+    BOOST_TEST_EQ(c.high, 1);
+}
+
+void test_int_to_uint_construction()
+{
+    const int128_t i {-1, 0xFFFFFFFFFFFFFFFFULL};
+
+    const uint128_t a {i};
+    BOOST_TEST_EQ(a.low, i.low);
+    BOOST_TEST_EQ(a.high, static_cast<std::uint64_t>(i.high));
+
+    const uint128_t b = i;
+    BOOST_TEST_EQ(b.low, i.low);
+    BOOST_TEST_EQ(b.high, static_cast<std::uint64_t>(i.high));
+
+    int128_t i_movable {-1, 0xFFFFFFFFFFFFFFFFULL};
+    const uint128_t c {std::move(i_movable)};
+    BOOST_TEST_EQ(c.high, 0xFFFFFFFFFFFFFFFFULL);
+    BOOST_TEST_EQ(c.low, 0xFFFFFFFFFFFFFFFFULL);
+}
+
+void test_uint_to_int_assignment()
+{
+    const uint128_t u {7U, 99U};
+
+    // Copy assignment via implicit conversion
+    int128_t a {};
+    a = u;
+    BOOST_TEST_EQ(a.low, 99U);
+    BOOST_TEST_EQ(a.high, 7);
+
+    // Move assignment via implicit conversion
+    int128_t b {};
+    b = uint128_t{7U, 99U};
+    BOOST_TEST_EQ(b.low, 99U);
+    BOOST_TEST_EQ(b.high, 7);
+}
+
+void test_int_to_uint_assignment()
+{
+    const int128_t i {-2, 0x1234U};
+
+    uint128_t a {};
+    a = i;
+    BOOST_TEST_EQ(a.low, 0x1234U);
+    BOOST_TEST_EQ(a.high, static_cast<std::uint64_t>(-2));
+
+    uint128_t b {};
+    b = int128_t{-2, 0x1234U};
+    BOOST_TEST_EQ(b.low, 0x1234U);
+    BOOST_TEST_EQ(b.high, static_cast<std::uint64_t>(-2));
+}
+
+void test_constexpr_cross_type()
+{
+    constexpr uint128_t u {1U, 42U};
+    constexpr int128_t a {u};
+    static_assert(a.low == 42U, "constexpr cross-type construction");
+
+    constexpr int128_t i {-1, 7U};
+    constexpr uint128_t b {i};
+    static_assert(b.low == 7U, "constexpr cross-type construction");
+}
+
+template <typename Float>
+void test_uint_from_float()
+{
+    // Basic positive values
+    BOOST_TEST_EQ(uint128_t{Float{0}}.low, 0U);
+    BOOST_TEST_EQ(uint128_t{Float{0}}.high, 0U);
+    BOOST_TEST_EQ(uint128_t{Float{42}}.low, 42U);
+    BOOST_TEST_EQ(uint128_t{Float{42}}.high, 0U);
+
+    // Truncation toward zero. Use Float{N}/Float{D} rather than a double literal
+    // so the test compiles cleanly for float and long double without precision warnings.
+    BOOST_TEST_EQ((uint128_t{Float{37} / Float{10}}.low), 3U);  // ~3.7 -> 3
+    BOOST_TEST_EQ((uint128_t{Float{99} / Float{100}}.low), 0U); // ~0.99 -> 0
+
+    // NaN -> 0
+    const Float nan {std::numeric_limits<Float>::quiet_NaN()};
+    BOOST_TEST_EQ(uint128_t{nan}.low, 0U);
+    BOOST_TEST_EQ(uint128_t{nan}.high, 0U);
+
+    // Negative -> 0 (matches libgcc)
+    BOOST_TEST_EQ(uint128_t{Float{-1}}.low, 0U);
+    BOOST_TEST_EQ(uint128_t{Float{-1}}.high, 0U);
+
+    // Saturation on overflow: infinity (or any value >= 2^128) -> UINT128_MAX.
+    // For float, 2^128 itself is +infinity since the 8-bit exponent saturates.
+    const Float two_64 {static_cast<Float>(UINT64_C(1) << 32) * static_cast<Float>(UINT64_C(1) << 32)};
+    const uint128_t saturated {std::numeric_limits<Float>::infinity()};
+    BOOST_TEST_EQ(saturated.low, UINT64_MAX);
+    BOOST_TEST_EQ(saturated.high, UINT64_MAX);
+
+    // 2^127 should fit (representable in float, double, long double)
+    const Float two_127 {two_64 * static_cast<Float>(UINT64_C(1) << 63)};
+    const uint128_t large {two_127};
+    BOOST_TEST_EQ(large.low, 0U);
+    BOOST_TEST_EQ(large.high, UINT64_C(1) << 63);
+
+    // Round-trip for an exactly representable mid-range value
+    const Float round_trip_src {two_64};  // 2^64
+    const uint128_t round_trip {round_trip_src};
+    BOOST_TEST_EQ(round_trip.low, 0U);
+    BOOST_TEST_EQ(round_trip.high, 1U);
+}
+
+template <typename Float>
+void test_int_from_float()
+{
+    // Basic positive and negative
+    BOOST_TEST_EQ(int128_t{Float{0}}.low, 0U);
+    BOOST_TEST_EQ(int128_t{Float{0}}.high, 0);
+    BOOST_TEST_EQ(int128_t{Float{42}}.low, 42U);
+    BOOST_TEST_EQ(int128_t{Float{-42}}.low, static_cast<std::uint64_t>(-42));
+    BOOST_TEST_EQ(int128_t{Float{-42}}.high, -1);
+
+    // Truncation toward zero (see note in test_uint_from_float on the literal style).
+    BOOST_TEST_EQ((int128_t{Float{37} / Float{10}}.low), 3U);                                 // ~3.7 -> 3
+    BOOST_TEST_EQ((int128_t{Float{-37} / Float{10}}.low), static_cast<std::uint64_t>(-3));    // ~-3.7 -> -3
+
+    // NaN -> 0
+    const Float nan {std::numeric_limits<Float>::quiet_NaN()};
+    BOOST_TEST_EQ(int128_t{nan}.low, 0U);
+    BOOST_TEST_EQ(int128_t{nan}.high, 0);
+
+    // Positive saturation: f >= 2^127 -> INT128_MAX
+    const Float two_64 {static_cast<Float>(UINT64_C(1) << 32) * static_cast<Float>(UINT64_C(1) << 32)};
+    const Float two_127 {two_64 * static_cast<Float>(UINT64_C(1) << 63)};
+    const int128_t pos_sat {two_127};
+    BOOST_TEST_EQ(pos_sat.high, (std::numeric_limits<std::int64_t>::max)());
+    BOOST_TEST_EQ(pos_sat.low, UINT64_MAX);
+
+    // Negative saturation: f <= -2^127 -> INT128_MIN
+    const int128_t neg_sat {-two_127};
+    BOOST_TEST_EQ(neg_sat.high, (std::numeric_limits<std::int64_t>::min)());
+    BOOST_TEST_EQ(neg_sat.low, 0U);
+
+    // Just below the positive boundary should not saturate.
+    const int128_t near_max {two_127 / Float{2}};  // 2^126
+    BOOST_TEST_EQ(near_max.high, UINT64_C(1) << 62);
+    BOOST_TEST_EQ(near_max.low, 0U);
+
+    // Round-trip a negative power of two through the two's-complement path
+    const int128_t neg_round_trip {-two_64};  // -2^64
+    BOOST_TEST_EQ(neg_round_trip.low, 0U);
+    BOOST_TEST_EQ(neg_round_trip.high, -1);
+}
+
+void test_constexpr_float_construction()
+{
+    constexpr uint128_t u {42.5};
+    static_assert(u.low == 42U, "constexpr uint from double");
+
+    constexpr int128_t i {-7.9};
+    static_assert(i.high == -1, "constexpr int from double sign");
+
+    // NaN -> 0 is exercised at runtime in test_uint_from_float / test_int_from_float.
+    // It cannot be constant-evaluated on GCC 9, which rejects NaN comparisons in
+    // constexpr contexts.
+}
+
+int main()
+{
+    test_implicit_conversion_traits();
+    test_uint_to_int_construction();
+    test_int_to_uint_construction();
+    test_uint_to_int_assignment();
+    test_int_to_uint_assignment();
+    test_constexpr_cross_type();
+    test_implicit_conversions_runtime();
+
+    test_uint_from_float<float>();
+    test_uint_from_float<double>();
+    test_uint_from_float<long double>();
+    test_int_from_float<float>();
+    test_int_from_float<double>();
+    test_int_from_float<long double>();
+    test_constexpr_float_construction();
+
+    return boost::report_errors();
+}
diff --git a/test/test_div_primitives.cpp b/test/test_div_primitives.cpp
new file mode 100644
index 00000000..694f0fef
--- /dev/null
+++ b/test/test_div_primitives.cpp
@@ -0,0 +1,266 @@
+// Copyright 2025 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+// Validates the low-level division building blocks in detail/common_div.hpp:
+//   * udiv_2by1 / divlu : 128/64 -> 64-bit quotient + remainder
+//   * div3by2           : 128/128 (divisor >= 2^64) -> single 64-bit quotient + 128-bit remainder
+//
+// div3by2 is cross-checked against the independent 32-bit-limb Knuth Algorithm D
+// (impl::knuth_divide), which is a completely separate implementation, so this check is valid
+// on every platform. Where a native 128-bit integer exists it is also used as an oracle.
+
+#include <boost/int128/int128.hpp>
+#include <boost/int128/cstdlib.hpp>
+#include <boost/int128/iostream.hpp>
+#include <boost/int128/detail/common_div.hpp>
+#include <boost/core/lightweight_test.hpp>
+#include <random>
+#include <cstdint>
+
+using namespace boost::int128;
+
+static std::mt19937_64 rng(0xC0FFEEULL);
+static std::uniform_int_distribution<std::uint64_t> dist(0, UINT64_MAX);
+
+// Independent oracle for 128/128 division with divisor >= 2^64, using the 32-bit-limb
+// Knuth Algorithm D that the library retains. Returns quotient and remainder.
+static void knuth_oracle(const std::uint64_t uh, const std::uint64_t ul,
+                         const std::uint64_t vh, const std::uint64_t vl,
+                         uint128_t& quot, uint128_t& rem)
+{
+    const uint128_t u_val {uh, ul};
+    const uint128_t v_val {vh, vl};
+
+    if (u_val < v_val)
+    {
+        quot = uint128_t{UINT64_C(0)};
+        rem = u_val;
+        return;
+    }
+
+    std::uint32_t u[4] {};
+    std::uint32_t v[4] {};
+    std::uint32_t q[4] {};
+
+    const auto m {detail::impl::to_words(u_val, u)};
+    const auto n {detail::impl::to_words(v_val, v)};
+
+    detail::impl::knuth_divide<true>(u, m, v, n, q);
+
+    quot = detail::impl::from_words<uint128_t>(q);
+    rem = detail::impl::from_words<uint128_t>(u);
+}
+
+static void check_div3by2(const std::uint64_t uh, const std::uint64_t ul,
+                          const std::uint64_t vh, const std::uint64_t vl)
+{
+    std::uint64_t rem_hi {};
+    std::uint64_t rem_lo {};
+    const auto q {detail::div3by2<true>(uh, ul, vh, vl, rem_hi, rem_lo)};
+
+    uint128_t expected_q {};
+    uint128_t expected_r {};
+    knuth_oracle(uh, ul, vh, vl, expected_q, expected_r);
+
+    // The quotient always fits in 64 bits when the divisor is >= 2^64
+    BOOST_TEST_EQ(expected_q.high, UINT64_C(0));
+    BOOST_TEST_EQ(q, expected_q.low);
+    BOOST_TEST_EQ(uint128_t(rem_hi, rem_lo), expected_r);
+}
+
+static void test_div3by2_random()
+{
+    constexpr int iters {2000000};
+    for (int i {}; i < iters; ++i)
+    {
+        const auto uh {dist(rng)};
+        const auto ul {dist(rng)};
+        auto vh {dist(rng)};
+        const auto vl {dist(rng)};
+
+        if (vh == 0)
+        {
+            vh = 1; // divisor must be >= 2^64 for div3by2
+        }
+
+        check_div3by2(uh, ul, vh, vl);
+    }
+}
+
+static void test_div3by2_edges()
+{
+    const std::uint64_t test_words[] {
+        UINT64_C(0), UINT64_C(1), UINT64_C(2), UINT64_C(3),
+        UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0x8000000000000000),
+        UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0x0123456789ABCDEF),
+        UINT64_C(0xFFFFFFFF), UINT64_C(0x100000000)
+    };
+
+    for (const auto vh : test_words)
+    {
+        if (vh == 0)
+        {
+            continue; // div3by2 requires vh != 0
+        }
+
+        for (const auto vl : test_words)
+        {
+            for (const auto uh : test_words)
+            {
+                for (const auto ul : test_words)
+                {
+                    check_div3by2(uh, ul, vh, vl);
+                }
+            }
+        }
+    }
+
+    // abs(INT128_MIN) == 2^127 as a dividend, divided by a range of >= 2^64 divisors
+    for (const auto vl : test_words)
+    {
+        check_div3by2(UINT64_C(0x8000000000000000), UINT64_C(0), UINT64_C(0x8000000000000001), vl);
+        check_div3by2(UINT64_C(0x8000000000000000), UINT64_C(0), UINT64_C(0xFFFFFFFFFFFFFFFF), vl);
+    }
+}
+
+#if defined(BOOST_INT128_HAS_INT128)
+
+// Construct dividends of the exact form V*q + offset to stress the correction / add-back path,
+// where the single-digit quotient estimate is most likely to be one too large.
+static void test_div3by2_boundary()
+{
+    const std::uint64_t div_hi[] {
+        UINT64_C(1), UINT64_C(0x8000000000000000), UINT64_C(0xFFFFFFFFFFFFFFFF),
+        UINT64_C(0x0123456789ABCDEF), UINT64_C(0x00000000FFFFFFFF)
+    };
+    const std::uint64_t div_lo[] {
+        UINT64_C(0), UINT64_C(1), UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xDEADBEEFCAFEBABE)
+    };
+    const std::uint64_t quotients[] {
+        UINT64_C(1), UINT64_C(2), UINT64_C(7), UINT64_C(0xFFFFFFFF),
+        UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0x8000000000000000), UINT64_C(0x123456789)
+    };
+
+    for (const auto vh : div_hi)
+    {
+        for (const auto vl : div_lo)
+        {
+            const detail::builtin_u128 v_val {(static_cast<detail::builtin_u128>(vh) << 64) | vl};
+
+            for (const auto q : quotients)
+            {
+                // offsets just below the divisor are where the maximum remainder lives
+                const detail::builtin_u128 offsets[] {
+                    detail::builtin_u128{0}, detail::builtin_u128{1}, v_val - 1, v_val >> 1
+                };
+
+                for (const auto off : offsets)
+                {
+                    const detail::builtin_u128 prod {v_val * q};
+
+                    // skip combinations where V*q already overflows 128 bits
+                    if (q != 0 && (prod / q) != v_val)
+                    {
+                        continue;
+                    }
+
+                    const detail::builtin_u128 u_val {prod + off};
+                    if (u_val < prod)
+                    {
+                        continue; // offset pushed us past 2^128
+                    }
+
+                    const auto uh {static_cast<std::uint64_t>(u_val >> 64)};
+                    const auto ul {static_cast<std::uint64_t>(u_val)};
+
+                    std::uint64_t rem_hi {};
+                    std::uint64_t rem_lo {};
+                    const auto got_q {detail::div3by2<true>(uh, ul, vh, vl, rem_hi, rem_lo)};
+
+                    BOOST_TEST_EQ(got_q, static_cast<std::uint64_t>(u_val / v_val));
+                    BOOST_TEST_EQ(uint128_t(rem_hi, rem_lo), static_cast<uint128_t>(u_val % v_val));
+                }
+            }
+        }
+    }
+}
+
+static void check_2by1(const std::uint64_t u1, const std::uint64_t u0, const std::uint64_t d)
+{
+    const detail::builtin_u128 full {(static_cast<detail::builtin_u128>(u1) << 64) | u0};
+    const auto expected_q {static_cast<std::uint64_t>(full / d)};
+    const auto expected_r {static_cast<std::uint64_t>(full % d)};
+
+    std::uint64_t r {};
+    const auto q {detail::udiv_2by1(u1, u0, d, r)};
+    BOOST_TEST_EQ(q, expected_q);
+    BOOST_TEST_EQ(r, expected_r);
+
+    // divlu is the portable fallback that udiv_2by1 uses off x86-64 / MSVC; test it directly too
+    std::uint64_t r2 {};
+    const auto q2 {detail::divlu(u1, u0, d, r2)};
+    BOOST_TEST_EQ(q2, expected_q);
+    BOOST_TEST_EQ(r2, expected_r);
+}
+
+static void test_udiv_2by1_random()
+{
+    constexpr int iters {2000000};
+    for (int i {}; i < iters; ++i)
+    {
+        const auto u0 {dist(rng)};
+        auto d {dist(rng)};
+        if (d == 0)
+        {
+            d = 1;
+        }
+        const auto u1 {dist(rng) % d}; // precondition: u1 < d
+
+        check_2by1(u1, u0, d);
+    }
+}
+
+static void test_udiv_2by1_edges()
+{
+    const std::uint64_t divisors[] {
+        UINT64_C(1), UINT64_C(2), UINT64_C(3), UINT64_C(10),
+        UINT64_C(0xFFFFFFFF), UINT64_C(0x100000000), UINT64_C(0x80000000),
+        UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0x8000000000000000), UINT64_C(0xFFFFFFFFFFFFFFFF)
+    };
+    const std::uint64_t lows[] {
+        UINT64_C(0), UINT64_C(1), UINT64_C(0x80000000),
+        UINT64_C(0xFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF)
+    };
+
+    for (const auto d : divisors)
+    {
+        for (const auto u0 : lows)
+        {
+            check_2by1(UINT64_C(0), u0, d);        // u1 == 0
+            check_2by1(d - 1, u0, d);              // u1 == d - 1 (maximal)
+            if (d > 1)
+            {
+                check_2by1(d / 2, u0, d);
+            }
+        }
+    }
+}
+
+#endif // BOOST_INT128_HAS_INT128
+
+int main()
+{
+    test_div3by2_random();
+    test_div3by2_edges();
+
+    #if defined(BOOST_INT128_HAS_INT128)
+
+    test_div3by2_boundary();
+    test_udiv_2by1_random();
+    test_udiv_2by1_edges();
+
+    #endif
+
+    return boost::report_errors();
+}
diff --git a/test/test_fail_sign_compare.cpp b/test/test_fail_sign_compare.cpp
deleted file mode 100644
index 5dd13370..00000000
--- a/test/test_fail_sign_compare.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2025 Matt Borland
-// Distributed under the Boost Software License, Version 1.0.
-// https://www.boost.org/LICENSE_1_0.txt
-
-#define BOOST_INT128_ALLOW_SIGN_COMPARE
-
-// Only sign compare means that we should still be failing to compile on operations
-
-#include <boost/int128/int128.hpp>
-#include <boost/core/lightweight_test.hpp>
-
-void test_u128()
-{
-    boost::int128::uint128_t x {5U};
-    BOOST_TEST(x > 4);
-    BOOST_TEST(x >= 4);
-    BOOST_TEST(x == 5);
-    BOOST_TEST(x != 0);
-    BOOST_TEST(x <= 5);
-    BOOST_TEST(x < 10);
-
-    x *= 2;
-    BOOST_TEST(x == 10);
-    x += 2;
-    BOOST_TEST(x == 12);
-    x -= 2;
-    BOOST_TEST(x == 10);
-    x /= 2;
-    BOOST_TEST(x == 5);
-}
-
-void test_int128()
-{
-    boost::int128::int128_t x {5};
-    BOOST_TEST(x > 4U);
-    BOOST_TEST(x >= 4U);
-    BOOST_TEST(x == 5U);
-    BOOST_TEST(x != 3U);
-    BOOST_TEST(x <= 5U);
-    BOOST_TEST(x < 10U);
-
-    x *= 2U;
-    BOOST_TEST(x == 10U);
-    x += 2U;
-    BOOST_TEST(x == 12U);
-    x -= 2U;
-    BOOST_TEST(x == 10U);
-    x /= 2U;
-    BOOST_TEST(x == 5U);
-}
-
-int main()
-{
-    test_u128();
-    test_int128();
-
-    return boost::report_errors();
-}
diff --git a/test/test_from_chars_bases.cpp b/test/test_from_chars_bases.cpp
new file mode 100644
index 00000000..4e637c3a
--- /dev/null
+++ b/test/test_from_chars_bases.cpp
@@ -0,0 +1,248 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+// Exercises boost::int128::detail::from_chars across every base it supports
+// (2..36) for both int128_t and uint128_t. Locks in the fix to the per-iteration
+// overflow threshold (the spurious overflow_value <<= 1 was masking MAX+1
+// overflow in mini_from_chars).
+
+#ifndef BOOST_INT128_BUILD_MODULE
+
+#include <boost/int128/int128.hpp>
+#include <boost/int128/detail/mini_from_chars.hpp>
+
+#else
+
+import boost.int128;
+
+#endif
+
+#include <boost/core/lightweight_test.hpp>
+
+#include <algorithm>
+#include <cerrno>
+#include <limits>
+#include <string>
+
+namespace {
+
+using boost::int128::int128_t;
+using boost::int128::uint128_t;
+
+// Format a uint128_t into a base-N string (lowercase). Self-contained so the
+// test does not pull in boost::charconv just to generate inputs.
+std::string format_unsigned(uint128_t value, int base)
+{
+    if (value == uint128_t{0U})
+    {
+        return "0";
+    }
+
+    const auto ubase {static_cast<unsigned>(base)};
+    std::string out;
+    while (value != uint128_t{0U})
+    {
+        const auto digit {static_cast<unsigned>(value % ubase)};
+        const char c {digit < 10U ? static_cast<char>('0' + digit)
+                                  : static_cast<char>('a' + digit - 10U)};
+        out.push_back(c);
+        value /= ubase;
+    }
+    std::reverse(out.begin(), out.end());
+    return out;
+}
+
+std::string format_signed(int128_t value, int base)
+{
+    if (value == int128_t{0})
+    {
+        return "0";
+    }
+
+    if (value == (std::numeric_limits<int128_t>::min)())
+    {
+        // |INT128_MIN| does not fit in int128_t; do the magnitude in uint128_t.
+        const uint128_t magnitude {uint128_t{1} << 127U};
+        std::string out {format_unsigned(magnitude, base)};
+        out.insert(out.begin(), '-');
+        return out;
+    }
+
+    if (value < int128_t{0})
+    {
+        std::string out {format_unsigned(static_cast<uint128_t>(-value), base)};
+        out.insert(out.begin(), '-');
+        return out;
+    }
+
+    return format_unsigned(static_cast<uint128_t>(value), base);
+}
+
+inline std::string format_value(int128_t value, int base)
+{
+    return format_signed(value, base);
+}
+
+inline std::string format_value(uint128_t value, int base)
+{
+    return format_unsigned(value, base);
+}
+
+template <typename T>
+void check_roundtrip(T expected, int base)
+{
+    const std::string s {format_value(expected, base)};
+
+    T parsed {};
+    const auto r {boost::int128::detail::from_chars(s.data(), s.data() + s.size(), parsed, base)};
+
+    BOOST_TEST_LT(r, 0);
+    BOOST_TEST(parsed == expected);
+}
+
+template <typename T>
+void check_overflow(const std::string& s, int base)
+{
+    T parsed {};
+    const auto r {boost::int128::detail::from_chars(s.data(), s.data() + s.size(), parsed, base)};
+
+    BOOST_TEST_EQ(r, EDOM);
+}
+
+void test_uint128_all_bases()
+{
+    constexpr auto max_value {(std::numeric_limits<uint128_t>::max)()};
+
+    for (int base {2}; base <= 36; ++base)
+    {
+        // Canonical small values.
+        check_roundtrip<uint128_t>(uint128_t{0U}, base);
+        check_roundtrip<uint128_t>(uint128_t{1U}, base);
+        check_roundtrip<uint128_t>(static_cast<uint128_t>(static_cast<unsigned>(base) - 1U), base);
+        check_roundtrip<uint128_t>(static_cast<uint128_t>(static_cast<unsigned>(base)), base);
+
+        // A handful of mid-range values that span the per-base digit window.
+        check_roundtrip<uint128_t>(uint128_t{42U}, base);
+        check_roundtrip<uint128_t>(uint128_t{1234567890U}, base);
+        check_roundtrip<uint128_t>(uint128_t{0xFFFFFFFFFFFFFFFFULL}, base);
+        check_roundtrip<uint128_t>(uint128_t{1U} << 100U, base);
+
+        // The boundary itself parses correctly.
+        check_roundtrip<uint128_t>(max_value, base);
+
+        // MAX with any extra digit appended is at least MAX * base, which
+        // overflows uint128_t for every base in [2, 36].
+        const auto max_str {format_unsigned(max_value, base)};
+        check_overflow<uint128_t>(max_str + "0", base);
+    }
+}
+
+void test_int128_all_bases()
+{
+    constexpr auto max_value {(std::numeric_limits<int128_t>::max)()};
+    constexpr auto min_value {(std::numeric_limits<int128_t>::min)()};
+
+    for (int base {2}; base <= 36; ++base)
+    {
+        check_roundtrip<int128_t>(int128_t{0}, base);
+        check_roundtrip<int128_t>(int128_t{1}, base);
+        check_roundtrip<int128_t>(int128_t{-1}, base);
+        check_roundtrip<int128_t>(int128_t{42}, base);
+        check_roundtrip<int128_t>(int128_t{-42}, base);
+        check_roundtrip<int128_t>(int128_t{1234567890}, base);
+        check_roundtrip<int128_t>(int128_t{-1234567890}, base);
+
+        // Both signed boundaries parse correctly.
+        check_roundtrip<int128_t>(max_value, base);
+        check_roundtrip<int128_t>(min_value, base);
+
+        // Append a digit to push past the magnitude bound on each side.
+        const auto max_str {format_signed(max_value, base)};
+        check_overflow<int128_t>(max_str + "0", base);
+
+        const auto min_str {format_signed(min_value, base)};
+        check_overflow<int128_t>(min_str + "0", base);
+    }
+}
+
+void test_decimal_boundaries()
+{
+    // Tight base-10 boundary cases: the spurious <<= 1 in the threshold made
+    // these silently produce wrong values instead of returning EDOM.
+
+    // UINT128_MAX exactly.
+    {
+        const std::string s {"340282366920938463463374607431768211455"};
+        uint128_t v {};
+        const auto r {boost::int128::detail::from_chars(s.data(), s.data() + s.size(), v)};
+        BOOST_TEST_LT(r, 0);
+        BOOST_TEST(v == (std::numeric_limits<uint128_t>::max)());
+    }
+
+    // UINT128_MAX + 1.
+    check_overflow<uint128_t>("340282366920938463463374607431768211456", 10);
+
+    // INT128_MAX exactly.
+    {
+        const std::string s {"170141183460469231731687303715884105727"};
+        int128_t v {};
+        const auto r {boost::int128::detail::from_chars(s.data(), s.data() + s.size(), v)};
+        BOOST_TEST_LT(r, 0);
+        BOOST_TEST(v == (std::numeric_limits<int128_t>::max)());
+    }
+
+    // INT128_MAX + 1.
+    check_overflow<int128_t>("170141183460469231731687303715884105728", 10);
+
+    // INT128_MIN exactly.
+    {
+        const std::string s {"-170141183460469231731687303715884105728"};
+        int128_t v {};
+        const auto r {boost::int128::detail::from_chars(s.data(), s.data() + s.size(), v)};
+        BOOST_TEST_LT(r, 0);
+        BOOST_TEST(v == (std::numeric_limits<int128_t>::min)());
+    }
+
+    // INT128_MIN - 1.
+    check_overflow<int128_t>("-170141183460469231731687303715884105729", 10);
+}
+
+void test_invalid_inputs()
+{
+    // Empty range is EINVAL.
+    {
+        const char* s {""};
+        uint128_t v {};
+        const auto r {boost::int128::detail::from_chars(s, s, v)};
+        BOOST_TEST_EQ(r, EINVAL);
+    }
+
+    // Lone sign is EINVAL.
+    {
+        const std::string s {"-"};
+        int128_t v {};
+        const auto r {boost::int128::detail::from_chars(s.data(), s.data() + s.size(), v)};
+        BOOST_TEST_EQ(r, EINVAL);
+    }
+
+    // Leading sign on the unsigned overload is EINVAL.
+    {
+        const std::string s {"-1"};
+        uint128_t v {};
+        const auto r {boost::int128::detail::from_chars(s.data(), s.data() + s.size(), v)};
+        BOOST_TEST_EQ(r, EINVAL);
+    }
+}
+
+} // anonymous namespace
+
+int main()
+{
+    test_uint128_all_bases();
+    test_int128_all_bases();
+    test_decimal_boundaries();
+    test_invalid_inputs();
+
+    return boost::report_errors();
+}
diff --git a/test/test_has_single_bit.cu b/test/test_has_single_bit.cu
index fcaa8ddd..3dd4985a 100644
--- a/test/test_has_single_bit.cu
+++ b/test/test_has_single_bit.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_hash.cpp b/test/test_hash.cpp
new file mode 100644
index 00000000..a3b54c5e
--- /dev/null
+++ b/test/test_hash.cpp
@@ -0,0 +1,200 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#include <boost/core/lightweight_test.hpp>
+
+#ifndef BOOST_INT128_BUILD_MODULE
+
+#include <boost/int128/int128.hpp>
+#include <boost/int128/hash.hpp>
+
+#else
+
+import boost.int128;
+
+#endif
+
+#include <cstdint>
+#include <functional>
+#include <random>
+#include <unordered_map>
+#include <unordered_set>
+
+void test_uint128_equivalent_hashes()
+{
+    using boost::int128::uint128_t;
+
+    std::hash<uint128_t> hasher {};
+
+    // Same value built different ways must hash identically
+    const uint128_t a {UINT64_C(0xDEADBEEF), UINT64_C(0xCAFEBABE)};
+    const uint128_t b {UINT64_C(0xDEADBEEF), UINT64_C(0xCAFEBABE)};
+    BOOST_TEST_EQ(hasher(a), hasher(b));
+
+    // Equality across assignment
+    uint128_t c {};
+    c = a;
+    BOOST_TEST_EQ(hasher(a), hasher(c));
+
+    // Zero hashes to a stable value
+    const uint128_t zero1 {};
+    const uint128_t zero2 {0};
+    BOOST_TEST_EQ(hasher(zero1), hasher(zero2));
+
+    // Values constructed from a small integer match equivalent two-word form
+    const uint128_t small_a {42};
+    const uint128_t small_b {0, 42};
+    BOOST_TEST_EQ(hasher(small_a), hasher(small_b));
+}
+
+void test_int128_equivalent_hashes()
+{
+    using boost::int128::int128_t;
+
+    std::hash<int128_t> hasher {};
+
+    const int128_t a {INT64_C(-1), UINT64_C(0xCAFEBABE)};
+    const int128_t b {INT64_C(-1), UINT64_C(0xCAFEBABE)};
+    BOOST_TEST_EQ(hasher(a), hasher(b));
+
+    int128_t c {};
+    c = a;
+    BOOST_TEST_EQ(hasher(a), hasher(c));
+
+    const int128_t zero1 {};
+    const int128_t zero2 {0};
+    BOOST_TEST_EQ(hasher(zero1), hasher(zero2));
+
+    const int128_t pos_a {42};
+    const int128_t pos_b {0, 42};
+    BOOST_TEST_EQ(hasher(pos_a), hasher(pos_b));
+}
+
+void test_int128_negative_no_collision_with_absolute()
+{
+    using boost::int128::int128_t;
+
+    std::hash<int128_t> hasher {};
+
+    // hash(-x) must not equal hash(x) for non-zero x
+    for (std::int64_t i {1}; i <= 1024; ++i)
+    {
+        const int128_t pos {i};
+        const int128_t neg {-i};
+        BOOST_TEST_NE(hasher(pos), hasher(neg));
+    }
+
+    // Larger magnitudes including values that span beyond 64 bits
+    const int128_t big_pos {INT64_C(0x0000000100000000), UINT64_C(0)};
+    const int128_t big_neg {-big_pos};
+    BOOST_TEST_NE(hasher(big_pos), hasher(big_neg));
+
+    // Values whose low word matches their negation's low word arithmetic (mod 2^64) are still distinct
+    const int128_t one {1};
+    const int128_t minus_one {-1};
+    BOOST_TEST_NE(hasher(one), hasher(minus_one));
+
+    // INT128_MIN and a value with the same low word but opposite high word
+    const int128_t edge_pos {1};
+    const int128_t edge_neg {-1};
+    BOOST_TEST_NE(hasher(edge_pos), hasher(edge_neg));
+}
+
+void test_hash_distribution_uint128()
+{
+    using boost::int128::uint128_t;
+
+    std::hash<uint128_t> hasher {};
+    std::unordered_set<std::size_t> seen {};
+
+    // A few thousand random values should produce nearly distinct hashes
+    std::mt19937_64 rng {42};
+    constexpr int num_samples {4096};
+    for (int i {0}; i < num_samples; ++i)
+    {
+        const uint128_t v {rng(), rng()};
+        seen.insert(hasher(v));
+    }
+
+    // Allow some collisions (size_t may be 32-bit on some platforms) but require high uniqueness
+    BOOST_TEST_GT(seen.size(), static_cast<std::size_t>(num_samples - 16));
+}
+
+void test_hash_distribution_int128()
+{
+    using boost::int128::int128_t;
+
+    std::hash<int128_t> hasher {};
+    std::unordered_set<std::size_t> seen {};
+
+    std::mt19937_64 rng {123};
+    constexpr int num_samples {4096};
+    for (int i {0}; i < num_samples; ++i)
+    {
+        const int128_t v {static_cast<std::int64_t>(rng()), rng()};
+        seen.insert(hasher(v));
+    }
+
+    BOOST_TEST_GT(seen.size(), static_cast<std::size_t>(num_samples - 16));
+}
+
+void test_use_in_unordered_map_uint128()
+{
+    using boost::int128::uint128_t;
+
+    std::unordered_map<uint128_t, int> map {};
+    map[uint128_t{0, 1}] = 1;
+    map[uint128_t{0, 2}] = 2;
+    map[uint128_t{1, 0}] = 3;
+    map[uint128_t{UINT64_MAX, UINT64_MAX}] = 4;
+
+    BOOST_TEST_EQ((map[uint128_t{0, 1}]), 1);
+    BOOST_TEST_EQ((map[uint128_t{0, 2}]), 2);
+    BOOST_TEST_EQ((map[uint128_t{1, 0}]), 3);
+    BOOST_TEST_EQ((map[uint128_t{UINT64_MAX, UINT64_MAX}]), 4);
+    BOOST_TEST_EQ(map.size(), (std::size_t {4}));
+}
+
+void test_use_in_unordered_map_int128()
+{
+    using boost::int128::int128_t;
+
+    std::unordered_map<int128_t, int> map {};
+    map[int128_t{0, 1}] = 1;
+    map[int128_t{-1, UINT64_MAX}] = 2;
+    map[int128_t{1, 0}] = 3;
+    map[int128_t{-1, 0}] = 4;
+
+    BOOST_TEST_EQ((map[int128_t{0, 1}]), 1);
+    BOOST_TEST_EQ((map[int128_t{-1, UINT64_MAX}]), 2);
+    BOOST_TEST_EQ((map[int128_t{1, 0}]), 3);
+    BOOST_TEST_EQ((map[int128_t{-1, 0}]), 4);
+    BOOST_TEST_EQ(map.size(), (std::size_t {4}));
+}
+
+void test_high_low_swap_not_colliding()
+{
+    using boost::int128::uint128_t;
+
+    std::hash<uint128_t> hasher {};
+
+    // hash({a, b}) should not equal hash({b, a}) in general
+    const uint128_t a {UINT64_C(0x1111), UINT64_C(0x2222)};
+    const uint128_t b {UINT64_C(0x2222), UINT64_C(0x1111)};
+    BOOST_TEST_NE(hasher(a), hasher(b));
+}
+
+int main()
+{
+    test_uint128_equivalent_hashes();
+    test_int128_equivalent_hashes();
+    test_int128_negative_no_collision_with_absolute();
+    test_hash_distribution_uint128();
+    test_hash_distribution_int128();
+    test_use_in_unordered_map_uint128();
+    test_use_in_unordered_map_int128();
+    test_high_low_swap_not_colliding();
+
+    return boost::report_errors();
+}
diff --git a/test/test_i128.cpp b/test/test_i128.cpp
index d52c2e59..5dd982ff 100644
--- a/test/test_i128.cpp
+++ b/test/test_i128.cpp
@@ -2,10 +2,6 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-#ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-#  define BOOST_INT128_ALLOW_SIGN_CONVERSION
-#endif
-
 #ifndef BOOST_INT128_BUILD_MODULE
 
 #include <boost/int128/detail/int128_imp.hpp>
@@ -81,7 +77,16 @@ IntType get_root_max()
 template <typename IntType>
 IntType get_root_min()
 {
-    return static_cast<IntType>(std::sqrt(std::numeric_limits<IntType>::min()));
+    // numeric_limits<IntType>::min() is negative for signed IntType, so sqrt() would be
+    // NaN and the cast UB; use the negative of the positive root instead.
+    BOOST_INT128_IF_CONSTEXPR (std::is_signed<IntType>::value)
+    {
+        return static_cast<IntType>(-get_root_max<IntType>());
+    }
+    else
+    {
+        return static_cast<IntType>(0);
+    }
 }
 
 #include <boost/random/uniform_int_distribution.hpp>
diff --git a/test/test_ipow.cpp b/test/test_ipow.cpp
new file mode 100644
index 00000000..ec30c452
--- /dev/null
+++ b/test/test_ipow.cpp
@@ -0,0 +1,238 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#if defined(__GNUC__) && __GNUC__ == 7 && defined(__i386__)
+
+// 32-bit GCC-7 fails with: "error: constexpr loop iteration count exceeds limit of 262144"
+
+int main() { return 0; }
+
+#else
+
+#ifndef BOOST_INT128_BUILD_MODULE
+
+#include <boost/int128.hpp>
+
+#else
+
+import boost.int128;
+
+#endif
+
+#include <boost/core/lightweight_test.hpp>
+#include <cstdint>
+#include <limits>
+
+using namespace boost::int128;
+
+namespace {
+
+// Naive reference implementation used to cross-check the squaring loop.
+template <typename T>
+constexpr T ipow_ref(T base, std::uint64_t exp) noexcept
+{
+    T result {1};
+
+    for (std::uint64_t i {0}; i < exp; ++i)
+    {
+        result *= base;
+    }
+
+    return result;
+}
+
+} // namespace
+
+void test_uint128_ipow_basic()
+{
+    BOOST_TEST_EQ(ipow(uint128_t{0}, 0U), uint128_t{1});
+    BOOST_TEST_EQ(ipow(uint128_t{1}, 0U), uint128_t{1});
+    BOOST_TEST_EQ(ipow(uint128_t{42}, 0U), uint128_t{1});
+    BOOST_TEST_EQ(ipow(uint128_t{0}, 1U), uint128_t{0});
+    BOOST_TEST_EQ(ipow(uint128_t{0}, 5U), uint128_t{0});
+    BOOST_TEST_EQ(ipow(uint128_t{1}, 1000U), uint128_t{1});
+    BOOST_TEST_EQ(ipow(uint128_t{42}, 1U), uint128_t{42});
+
+    BOOST_TEST_EQ(ipow(uint128_t{2}, 0U), uint128_t{1});
+    BOOST_TEST_EQ(ipow(uint128_t{2}, 1U), uint128_t{2});
+    BOOST_TEST_EQ(ipow(uint128_t{2}, 2U), uint128_t{4});
+    BOOST_TEST_EQ(ipow(uint128_t{2}, 10U), uint128_t{1024});
+    BOOST_TEST_EQ(ipow(uint128_t{3}, 5U), uint128_t{243});
+    BOOST_TEST_EQ(ipow(uint128_t{10}, 9U), uint128_t{UINT64_C(1000000000)});
+    BOOST_TEST_EQ(ipow(uint128_t{10}, 18U), uint128_t{UINT64_C(1000000000000000000)});
+}
+
+void test_uint128_ipow_power_of_two()
+{
+    // 2^k fills bit k, so we can hit every bit position up to 127.
+    for (std::uint64_t k {0}; k < 64; ++k)
+    {
+        const uint128_t expected {static_cast<std::uint64_t>(1) << k};
+        BOOST_TEST_EQ(ipow(uint128_t{2}, k), expected);
+    }
+
+    for (std::uint64_t k {64}; k < 128; ++k)
+    {
+        const uint128_t expected {static_cast<std::uint64_t>(1) << (k - 64), 0U};
+        BOOST_TEST_EQ(ipow(uint128_t{2}, k), expected);
+    }
+
+    // 2^128 wraps to 0 in uint128 arithmetic.
+    BOOST_TEST_EQ(ipow(uint128_t{2}, 128U), uint128_t{0});
+    BOOST_TEST_EQ(ipow(uint128_t{2}, 200U), uint128_t{0});
+}
+
+void test_uint128_ipow_large()
+{
+    // 10^38 is the largest power of 10 that fits in 128 bits.
+    // 10^38 = 100000000000000000000000000000000000000.
+    const uint128_t ten_pow_38 {UINT64_C(0x4B3B4CA85A86C47A), UINT64_C(0x098A224000000000)};
+    BOOST_TEST_EQ(ipow(uint128_t{10}, 38U), ten_pow_38);
+
+    // Cross-check a range of bases against the naive reference for small
+    // exponents where the result is hand-verifiable through repeated mul.
+    for (std::uint64_t base {2}; base < 8; ++base)
+    {
+        for (std::uint64_t exp {0}; exp < 12; ++exp)
+        {
+            BOOST_TEST_EQ(ipow(uint128_t{base}, exp), ipow_ref(uint128_t{base}, exp));
+        }
+    }
+}
+
+void test_uint128_ipow_wrap()
+{
+    // Squaring 2^64 yields 2^128 which wraps to 0.
+    const uint128_t two_pow_64 {1U, 0U};
+    BOOST_TEST_EQ(ipow(two_pow_64, 2U), uint128_t{0});
+
+    // (2^64 - 1)^2 mod 2^128 = 2^128 - 2^65 + 1, which has a known bit pattern.
+    const uint128_t u64_max {(std::numeric_limits<std::uint64_t>::max)()};
+    const uint128_t expected {UINT64_C(0xFFFFFFFFFFFFFFFE), 1U};
+    BOOST_TEST_EQ(ipow(u64_max, 2U), expected);
+
+    // Anything to a sufficiently large power eventually wraps to 0 if the base
+    // shares a factor of 2 with 2^128.
+    BOOST_TEST_EQ(ipow(uint128_t{4}, 64U), uint128_t{0});
+    BOOST_TEST_EQ(ipow(uint128_t{6}, 200U), uint128_t{0});
+}
+
+void test_uint128_ipow_identities()
+{
+    // a^(b+c) == a^b * a^c (under wrap modulo 2^128).
+    const uint128_t a {UINT64_C(0xDEADBEEF)};
+    BOOST_TEST_EQ(ipow(a, 7U), ipow(a, 3U) * ipow(a, 4U));
+    BOOST_TEST_EQ(ipow(a, 20U), ipow(a, 13U) * ipow(a, 7U));
+
+    // (a*b)^e == a^e * b^e.
+    const uint128_t aa {7};
+    const uint128_t bb {11};
+    BOOST_TEST_EQ(ipow(aa * bb, 6U), ipow(aa, 6U) * ipow(bb, 6U));
+
+    // (a^b)^c == a^(b*c).
+    BOOST_TEST_EQ(ipow(ipow(uint128_t{3}, 4U), 5U), ipow(uint128_t{3}, 4U * 5U));
+}
+
+void test_int128_ipow_basic()
+{
+    BOOST_TEST_EQ(ipow(int128_t{0}, 0U), int128_t{1});
+    BOOST_TEST_EQ(ipow(int128_t{1}, 0U), int128_t{1});
+    BOOST_TEST_EQ(ipow(int128_t{-1}, 0U), int128_t{1});
+    BOOST_TEST_EQ(ipow(int128_t{0}, 5U), int128_t{0});
+    BOOST_TEST_EQ(ipow(int128_t{42}, 1U), int128_t{42});
+
+    BOOST_TEST_EQ(ipow(int128_t{2}, 10U), int128_t{1024});
+    BOOST_TEST_EQ(ipow(int128_t{3}, 5U), int128_t{243});
+    BOOST_TEST_EQ(ipow(int128_t{10}, 18U), int128_t{INT64_C(1000000000000000000)});
+}
+
+void test_int128_ipow_negative_base()
+{
+    // Even exponents are non-negative, odd exponents preserve the sign.
+    BOOST_TEST_EQ(ipow(int128_t{-2}, 0U), int128_t{1});
+    BOOST_TEST_EQ(ipow(int128_t{-2}, 1U), int128_t{-2});
+    BOOST_TEST_EQ(ipow(int128_t{-2}, 2U), int128_t{4});
+    BOOST_TEST_EQ(ipow(int128_t{-2}, 3U), int128_t{-8});
+    BOOST_TEST_EQ(ipow(int128_t{-2}, 10U), int128_t{1024});
+    BOOST_TEST_EQ(ipow(int128_t{-3}, 5U), int128_t{-243});
+
+    BOOST_TEST_EQ(ipow(int128_t{-1}, 100U), int128_t{1});
+    BOOST_TEST_EQ(ipow(int128_t{-1}, 101U), int128_t{-1});
+
+    BOOST_TEST_EQ(ipow(int128_t{-10}, 18U), int128_t{INT64_C(1000000000000000000)});
+    BOOST_TEST_EQ(ipow(int128_t{-10}, 17U), int128_t{INT64_C(-100000000000000000)});
+}
+
+void test_int128_ipow_large()
+{
+    // 10^38 still fits in int128_t (signed max is roughly 1.7e38).
+    const int128_t ten_pow_38 {static_cast<int128_t>(uint128_t{UINT64_C(0x4B3B4CA85A86C47A), UINT64_C(0x098A224000000000)})};
+    BOOST_TEST_EQ(ipow(int128_t{10}, 38U), ten_pow_38);
+    BOOST_TEST_EQ(ipow(int128_t{-10}, 38U), ten_pow_38);
+
+    // Cross-check small bases against the naive reference.
+    for (std::int64_t base {-7}; base < 8; ++base)
+    {
+        for (std::uint64_t exp {0}; exp < 12; ++exp)
+        {
+            BOOST_TEST_EQ(ipow(int128_t{base}, exp), ipow_ref(int128_t{base}, exp));
+        }
+    }
+}
+
+void test_int128_ipow_identities()
+{
+    const int128_t a {12345};
+    BOOST_TEST_EQ(ipow(a, 7U), ipow(a, 3U) * ipow(a, 4U));
+    BOOST_TEST_EQ(ipow(ipow(int128_t{3}, 4U), 5U), ipow(int128_t{3}, 4U * 5U));
+
+    // Sign behaves multiplicatively.
+    BOOST_TEST_EQ(ipow(int128_t{-7}, 3U) * ipow(int128_t{-7}, 4U), ipow(int128_t{-7}, 7U));
+}
+
+#ifdef _MSC_VER
+#  pragma warning(push)
+#  pragma warning(disable : 4307) // integral constant overflow
+#  pragma warning(disable : 4308) // negative integral constant converted to unsigned type
+#endif
+
+void test_constexpr_ipow()
+{
+    constexpr uint128_t r1 {ipow(uint128_t{2}, 10U)};
+    static_assert(r1 == uint128_t{1024}, "ipow constexpr uint128 small case");
+
+    constexpr uint128_t r2 {ipow(uint128_t{10}, 18U)};
+    static_assert(r2 == uint128_t{UINT64_C(1000000000000000000)}, "ipow constexpr uint128 18 digits");
+
+    constexpr int128_t r3 {ipow(int128_t{-3}, 5U)};
+    static_assert(r3 == int128_t{-243}, "ipow constexpr int128 negative base odd exp");
+
+    constexpr int128_t r4 {ipow(int128_t{-3}, 4U)};
+    static_assert(r4 == int128_t{81}, "ipow constexpr int128 negative base even exp");
+
+    constexpr uint128_t r5 {ipow(uint128_t{2}, 128U)};
+    static_assert(r5 == uint128_t{0}, "ipow constexpr uint128 wrap to zero");
+}
+
+#ifdef _MSC_VER
+#  pragma warning(pop)
+#endif
+
+int main()
+{
+    test_uint128_ipow_basic();
+    test_uint128_ipow_power_of_two();
+    test_uint128_ipow_large();
+    test_uint128_ipow_wrap();
+    test_uint128_ipow_identities();
+    test_int128_ipow_basic();
+    test_int128_ipow_negative_base();
+    test_int128_ipow_large();
+    test_int128_ipow_identities();
+    test_constexpr_ipow();
+
+    return boost::report_errors();
+}
+
+#endif
diff --git a/test/test_isqrt.cpp b/test/test_isqrt.cpp
new file mode 100644
index 00000000..5d485e31
--- /dev/null
+++ b/test/test_isqrt.cpp
@@ -0,0 +1,278 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#if defined(__GNUC__) && __GNUC__ == 7 && defined(__i386__)
+
+// 32-bit GCC-7 fails with: "error: constexpr loop iteration count exceeds limit of 262144"
+
+int main() { return 0; }
+
+#else
+
+#ifndef BOOST_INT128_BUILD_MODULE
+
+#include <boost/int128.hpp>
+
+#else
+
+import boost.int128;
+
+#endif
+
+#include <boost/core/lightweight_test.hpp>
+#include <cstdint>
+#include <limits>
+
+using namespace boost::int128;
+
+namespace {
+
+// Naive bit-by-bit reference. Independent of the Newton implementation under
+// test, so it serves as a ground truth for cross-checking.
+constexpr uint128_t isqrt_ref(uint128_t n) noexcept
+{
+    if (n < 2U)
+    {
+        return n;
+    }
+
+    uint128_t res {0};
+    uint128_t bit {uint128_t{1} << 126};
+
+    while (bit > n)
+    {
+        bit >>= 2;
+    }
+
+    while (bit != 0U)
+    {
+        if (n >= res + bit)
+        {
+            n -= res + bit;
+            res = (res >> 1) + bit;
+        }
+        else
+        {
+            res >>= 1;
+        }
+
+        bit >>= 2;
+    }
+
+    return res;
+}
+
+// Verify the defining invariant: r == isqrt(n) iff r*r <= n < (r+1)^2.
+// Computed with overflow-safe comparisons so callers can pass values near the
+// uint128 upper bound.
+void check_invariant(const uint128_t n)
+{
+    const uint128_t r {isqrt(n)};
+
+    BOOST_TEST(r * r <= n);
+
+    const uint128_t r_plus_1 {r + 1U};
+
+    // (r+1)^2 may overflow uint128 when r is close to 2^64; in that case the
+    // invariant n < (r+1)^2 is trivially satisfied since n fits in 128 bits.
+    if (r_plus_1 != 0U && r_plus_1 <= ((std::numeric_limits<uint128_t>::max)() / r_plus_1))
+    {
+        BOOST_TEST(n < r_plus_1 * r_plus_1);
+    }
+}
+
+} // namespace
+
+void test_uint128_isqrt_small()
+{
+    BOOST_TEST_EQ(isqrt(uint128_t{0}), uint128_t{0});
+    BOOST_TEST_EQ(isqrt(uint128_t{1}), uint128_t{1});
+    BOOST_TEST_EQ(isqrt(uint128_t{2}), uint128_t{1});
+    BOOST_TEST_EQ(isqrt(uint128_t{3}), uint128_t{1});
+    BOOST_TEST_EQ(isqrt(uint128_t{4}), uint128_t{2});
+    BOOST_TEST_EQ(isqrt(uint128_t{5}), uint128_t{2});
+    BOOST_TEST_EQ(isqrt(uint128_t{8}), uint128_t{2});
+    BOOST_TEST_EQ(isqrt(uint128_t{9}), uint128_t{3});
+    BOOST_TEST_EQ(isqrt(uint128_t{10}), uint128_t{3});
+    BOOST_TEST_EQ(isqrt(uint128_t{15}), uint128_t{3});
+    BOOST_TEST_EQ(isqrt(uint128_t{16}), uint128_t{4});
+    BOOST_TEST_EQ(isqrt(uint128_t{99}), uint128_t{9});
+    BOOST_TEST_EQ(isqrt(uint128_t{100}), uint128_t{10});
+    BOOST_TEST_EQ(isqrt(uint128_t{101}), uint128_t{10});
+
+    // Exhaustive cross-check against the reference for every small n.
+    for (std::uint64_t i {0}; i < 200; ++i)
+    {
+        BOOST_TEST_EQ(isqrt(uint128_t{i}), isqrt_ref(uint128_t{i}));
+    }
+}
+
+void test_uint128_isqrt_perfect_squares()
+{
+    // Squares spanning the full 64-bit range, including the largest k whose
+    // square still fits in 128 bits (k = 2^64 - 1).
+    for (std::uint64_t k {0}; k < 10000; ++k)
+    {
+        const uint128_t kk {k};
+        BOOST_TEST_EQ(isqrt(kk * kk), kk);
+
+        if (k > 0)
+        {
+            BOOST_TEST_EQ(isqrt(kk * kk - 1U), kk - 1U);
+            BOOST_TEST_EQ(isqrt(kk * kk + 2U * kk), kk);  // (k+1)^2 - 1
+        }
+    }
+
+    // Powers of two as bases: k = 2^i for i in [0, 63] - largest exact square
+    // is (2^63)^2 = 2^126.
+    for (int i {0}; i < 64; ++i)
+    {
+        const uint128_t k {uint128_t{1} << i};
+        BOOST_TEST_EQ(isqrt(k * k), k);
+    }
+
+    // Largest representable perfect square: (2^64 - 1)^2 = 2^128 - 2^65 + 1.
+    const uint128_t k_max {(std::numeric_limits<std::uint64_t>::max)()};
+    BOOST_TEST_EQ(isqrt(k_max * k_max), k_max);
+}
+
+void test_uint128_isqrt_bit_boundaries()
+{
+    // 2^(2k) has integer square root 2^k.
+    for (int k {0}; k < 64; ++k)
+    {
+        const uint128_t n {uint128_t{1} << (2 * k)};
+        BOOST_TEST_EQ(isqrt(n), uint128_t{1} << k);
+    }
+
+    // 2^(2k+1) has integer square root floor(2^(k+0.5)) = floor(sqrt(2) * 2^k).
+    // Check the invariant holds rather than hard-coding the value.
+    for (int k {0}; k < 63; ++k)
+    {
+        check_invariant(uint128_t{1} << (2 * k + 1));
+    }
+
+    // Just below and just above bit boundaries.
+    for (int k {2}; k < 128; ++k)
+    {
+        const uint128_t boundary {uint128_t{1} << k};
+        check_invariant(boundary - 1U);
+        check_invariant(boundary);
+        check_invariant(boundary + 1U);
+    }
+}
+
+void test_uint128_isqrt_extreme()
+{
+    // (uint128 max). (2^64)^2 = 2^128 wraps, so isqrt(2^128 - 1) = 2^64 - 1.
+    const uint128_t u128_max {(std::numeric_limits<uint128_t>::max)()};
+    const uint128_t u64_max {(std::numeric_limits<std::uint64_t>::max)()};
+    BOOST_TEST_EQ(isqrt(u128_max), u64_max);
+
+    // 2^128 - 2^65 + 1 = (2^64 - 1)^2 - exact square at the very top.
+    BOOST_TEST_EQ(isqrt(u64_max * u64_max), u64_max);
+
+    // One above the largest representable square: still has isqrt = 2^64 - 1.
+    BOOST_TEST_EQ(isqrt(u64_max * u64_max + 1U), u64_max);
+
+    // A handful of large hand-picked values, cross-checked against the bit-by-
+    // bit reference.
+    const uint128_t big_a {UINT64_C(0x0123456789ABCDEF), UINT64_C(0xFEDCBA9876543210)};
+    const uint128_t big_b {UINT64_C(0xDEADBEEFCAFEBABE), UINT64_C(0x0123456789ABCDEF)};
+    const uint128_t big_c {UINT64_C(0x8000000000000000), 0U};
+
+    BOOST_TEST_EQ(isqrt(big_a), isqrt_ref(big_a));
+    BOOST_TEST_EQ(isqrt(big_b), isqrt_ref(big_b));
+    BOOST_TEST_EQ(isqrt(big_c), isqrt_ref(big_c));
+
+    check_invariant(big_a);
+    check_invariant(big_b);
+    check_invariant(big_c);
+    check_invariant(u128_max);
+}
+
+void test_int128_isqrt()
+{
+    BOOST_TEST_EQ(isqrt(int128_t{0}), int128_t{0});
+    BOOST_TEST_EQ(isqrt(int128_t{1}), int128_t{1});
+    BOOST_TEST_EQ(isqrt(int128_t{2}), int128_t{1});
+    BOOST_TEST_EQ(isqrt(int128_t{100}), int128_t{10});
+    BOOST_TEST_EQ(isqrt(int128_t{144}), int128_t{12});
+    BOOST_TEST_EQ(isqrt(int128_t{INT64_C(1000000000000000000)}), int128_t{INT64_C(1000000000)});
+
+    // int128 max = 2^127 - 1. floor(sqrt) = floor(2^63.5) = 6074001000.7e9
+    // Use the unsigned implementation as the source of truth.
+    const int128_t i128_max {(std::numeric_limits<int128_t>::max)()};
+    BOOST_TEST_EQ(isqrt(i128_max), static_cast<int128_t>(isqrt(static_cast<uint128_t>(i128_max))));
+
+    // Negative inputs are documented to return 0.
+    BOOST_TEST_EQ(isqrt(int128_t{-1}), int128_t{0});
+    BOOST_TEST_EQ(isqrt(int128_t{-100}), int128_t{0});
+    BOOST_TEST_EQ(isqrt((std::numeric_limits<int128_t>::min)()), int128_t{0});
+}
+
+void test_isqrt_against_ipow()
+{
+    // isqrt(ipow(k, 2)) == k for any k whose square fits.
+    for (std::uint64_t k {0}; k < 1000; ++k)
+    {
+        BOOST_TEST_EQ(isqrt(ipow(uint128_t{k}, 2U)), uint128_t{k});
+    }
+
+    // isqrt is monotonically non-decreasing.
+    uint128_t prev {0};
+
+    for (std::uint64_t i {0}; i < 1000; ++i)
+    {
+        const uint128_t curr {isqrt(uint128_t{i})};
+        BOOST_TEST(curr >= prev);
+        prev = curr;
+    }
+}
+
+#ifdef _MSC_VER
+#  pragma warning(push)
+#  pragma warning(disable : 4307) // integral constant overflow
+#  pragma warning(disable : 4308) // negative integral constant converted to unsigned type
+#endif
+
+void test_constexpr_isqrt()
+{
+    constexpr uint128_t r1 {isqrt(uint128_t{0})};
+    static_assert(r1 == uint128_t{0}, "isqrt(0) constexpr");
+
+    constexpr uint128_t r2 {isqrt(uint128_t{1})};
+    static_assert(r2 == uint128_t{1}, "isqrt(1) constexpr");
+
+    constexpr uint128_t r3 {isqrt(uint128_t{100})};
+    static_assert(r3 == uint128_t{10}, "isqrt(100) constexpr");
+
+    constexpr uint128_t r4 {isqrt(uint128_t{UINT64_C(1000000000000000000)})};
+    static_assert(r4 == uint128_t{UINT64_C(1000000000)}, "isqrt(10^18) constexpr");
+
+    constexpr int128_t r5 {isqrt(int128_t{-5})};
+    static_assert(r5 == int128_t{0}, "isqrt negative constexpr");
+
+    constexpr int128_t r6 {isqrt(int128_t{12321})};
+    static_assert(r6 == int128_t{111}, "isqrt(12321) constexpr");
+}
+
+#ifdef _MSC_VER
+#  pragma warning(pop)
+#endif
+
+int main()
+{
+    test_uint128_isqrt_small();
+    test_uint128_isqrt_perfect_squares();
+    test_uint128_isqrt_bit_boundaries();
+    test_uint128_isqrt_extreme();
+    test_int128_isqrt();
+    test_isqrt_against_ipow();
+    test_constexpr_isqrt();
+
+    return boost::report_errors();
+}
+
+#endif
diff --git a/test/test_mixed_arithmetic.cpp b/test/test_mixed_arithmetic.cpp
deleted file mode 100644
index f4dd5f42..00000000
--- a/test/test_mixed_arithmetic.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2025 Matt Borland
-// Distributed under the Boost Software License, Version 1.0.
-// https://www.boost.org/LICENSE_1_0.txt
-
-#include <boost/int128.hpp>
-#include <boost/core/lightweight_test.hpp>
-
-int main()
-{
-    constexpr boost::int128::uint128_t lhs {3};
-    constexpr boost::int128::int128_t rhs {-3};
-
-    BOOST_TEST(lhs + rhs == 0);
-    BOOST_TEST(lhs - rhs == -6);
-    BOOST_TEST(lhs * rhs == -9);
-    BOOST_TEST(lhs / rhs == -1);
-    BOOST_TEST(lhs % rhs == 0);
-
-    return boost::report_errors();
-}
diff --git a/test/test_mixed_type_ops.cpp b/test/test_mixed_type_ops.cpp
deleted file mode 100644
index 372d007c..00000000
--- a/test/test_mixed_type_ops.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright 2025 Matt Borland
-// Distributed under the Boost Software License, Version 1.0.
-// https://www.boost.org/LICENSE_1_0.txt
-
-#include <boost/int128.hpp>
-#include <boost/core/lightweight_test.hpp>
-
-int main()
-{
-    constexpr boost::int128::uint128_t lhs {3};
-    constexpr boost::int128::int128_t rhs {-3};
-
-    BOOST_TEST(lhs == rhs);
-    BOOST_TEST(lhs != rhs);
-    BOOST_TEST(lhs < rhs);
-    BOOST_TEST(lhs <= rhs);
-    BOOST_TEST(lhs > rhs);
-    BOOST_TEST(lhs >= rhs);
-
-    return boost::report_errors();
-}
diff --git a/test/test_mixed_type_sign_compare.cpp b/test/test_mixed_type_sign_compare.cpp
index ebd99a88..b2e868f3 100644
--- a/test/test_mixed_type_sign_compare.cpp
+++ b/test/test_mixed_type_sign_compare.cpp
@@ -2,8 +2,6 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-#define BOOST_INT128_ALLOW_SIGN_COMPARE
-
 #include <boost/int128.hpp>
 #include <boost/core/lightweight_test.hpp>
 #include <random>
@@ -11,17 +9,22 @@
 #ifdef __GNUC__
 #  pragma GCC diagnostic push
 #  pragma GCC diagnostic ignored "-Wsign-compare"
+#  pragma GCC diagnostic ignored "-Wsign-conversion"
 #endif
 
+#ifdef BOOST_INT128_HAS_INT128
+
 static std::mt19937_64 rng{42};
 static std::uniform_int_distribution<std::uint64_t> u_dist{0, UINT64_MAX};
-static std::uniform_int_distribution<std::int64_t> i_dist{0, INT64_MAX};
+static std::uniform_int_distribution<std::int64_t> i_dist{INT64_MIN, INT64_MAX};
 static constexpr std::size_t N {1024U};
 
 using namespace boost::int128;
 
 void test_left_unsigned()
 {
+    using boost::int128::detail::builtin_u128;
+
     for (std::size_t i {0}; i < N; ++i)
     {
         const auto lhs {u_dist(rng)};
@@ -30,27 +33,37 @@ void test_left_unsigned()
         const uint128_t lib_lhs {lhs};
         const int128_t lib_rhs {rhs};
 
-        BOOST_TEST_EQ(lib_lhs == lib_rhs, lhs == static_cast<std::uint64_t>(rhs));
-        BOOST_TEST_EQ(lib_lhs != lib_rhs, lhs != static_cast<std::uint64_t>(rhs));
-        BOOST_TEST_EQ(lib_lhs > lib_rhs, lhs > static_cast<std::uint64_t>(rhs));
-        BOOST_TEST_EQ(lib_lhs >= lib_rhs, lhs >= static_cast<std::uint64_t>(rhs));
-        BOOST_TEST_EQ(lib_lhs < lib_rhs, lhs < static_cast<std::uint64_t>(rhs));
-        BOOST_TEST_EQ(lib_lhs <= lib_rhs, lhs <= static_cast<std::uint64_t>(rhs));
-    }
+        // Builtin oracle: same-rank int128/uint128 -> both promote to unsigned __int128
+        const builtin_u128 builtin_lhs {lhs};
+        const builtin_u128 builtin_rhs = static_cast<builtin_u128>(static_cast<__int128>(rhs));
 
-    const uint128_t lhs {42u};
-    const int128_t rhs {-42};
+        BOOST_TEST_EQ(lib_lhs == lib_rhs, builtin_lhs == builtin_rhs);
+        BOOST_TEST_EQ(lib_lhs != lib_rhs, builtin_lhs != builtin_rhs);
+        BOOST_TEST_EQ(lib_lhs > lib_rhs,  builtin_lhs >  builtin_rhs);
+        BOOST_TEST_EQ(lib_lhs >= lib_rhs, builtin_lhs >= builtin_rhs);
+        BOOST_TEST_EQ(lib_lhs < lib_rhs,  builtin_lhs <  builtin_rhs);
+        BOOST_TEST_EQ(lib_lhs <= lib_rhs, builtin_lhs <= builtin_rhs);
+    }
 
-    BOOST_TEST_EQ(lhs == rhs, false);
-    BOOST_TEST_EQ(lhs != rhs, true);
-    BOOST_TEST_EQ(lhs < rhs, false);
-    BOOST_TEST_EQ(lhs <= rhs, false);
-    BOOST_TEST_EQ(lhs > rhs, true);
-    BOOST_TEST_EQ(lhs >= rhs, true);
+    // Edge cases that the old deviations would have answered differently
+    {
+        const uint128_t lhs {42u};
+        const int128_t rhs {-42};
+
+        // Builtin: int128_t(-42) -> unsigned huge; 42 vs huge
+        BOOST_TEST_EQ(lhs == rhs, false);
+        BOOST_TEST_EQ(lhs != rhs, true);
+        BOOST_TEST_EQ(lhs <  rhs, true);   // 42 < huge
+        BOOST_TEST_EQ(lhs <= rhs, true);
+        BOOST_TEST_EQ(lhs >  rhs, false);
+        BOOST_TEST_EQ(lhs >= rhs, false);
+    }
 }
 
 void test_right_unsigned()
 {
+    using boost::int128::detail::builtin_u128;
+
     for (std::size_t i {0}; i < N; ++i)
     {
         const auto lhs {i_dist(rng)};
@@ -59,29 +72,41 @@ void test_right_unsigned()
         const int128_t lib_lhs {lhs};
         const uint128_t lib_rhs {rhs};
 
-        BOOST_TEST_EQ(lib_lhs == lib_rhs, static_cast<std::uint64_t>(lhs) == rhs);
-        BOOST_TEST_EQ(lib_lhs != lib_rhs, static_cast<std::uint64_t>(lhs) != rhs);
-        BOOST_TEST_EQ(lib_lhs > lib_rhs, static_cast<std::uint64_t>(lhs) > rhs);
-        BOOST_TEST_EQ(lib_lhs >= lib_rhs, static_cast<std::uint64_t>(lhs) >= rhs);
-        BOOST_TEST_EQ(lib_lhs < lib_rhs, static_cast<std::uint64_t>(lhs) < rhs);
-        BOOST_TEST_EQ(lib_lhs <= lib_rhs, static_cast<std::uint64_t>(lhs) <= rhs);
-    }
+        const builtin_u128 builtin_lhs = static_cast<builtin_u128>(static_cast<__int128>(lhs));
+        const builtin_u128 builtin_rhs {rhs};
 
-    const int128_t lhs {-42};
-    const uint128_t rhs {42u};
+        BOOST_TEST_EQ(lib_lhs == lib_rhs, builtin_lhs == builtin_rhs);
+        BOOST_TEST_EQ(lib_lhs != lib_rhs, builtin_lhs != builtin_rhs);
+        BOOST_TEST_EQ(lib_lhs > lib_rhs,  builtin_lhs >  builtin_rhs);
+        BOOST_TEST_EQ(lib_lhs >= lib_rhs, builtin_lhs >= builtin_rhs);
+        BOOST_TEST_EQ(lib_lhs < lib_rhs,  builtin_lhs <  builtin_rhs);
+        BOOST_TEST_EQ(lib_lhs <= lib_rhs, builtin_lhs <= builtin_rhs);
+    }
 
-    BOOST_TEST_EQ(lhs == rhs, false);
-    BOOST_TEST_EQ(lhs != rhs, true);
-    BOOST_TEST_EQ(lhs < rhs, true);
-    BOOST_TEST_EQ(lhs <= rhs, true);
-    BOOST_TEST_EQ(lhs > rhs, false);
-    BOOST_TEST_EQ(lhs >= rhs, false);
+    {
+        const int128_t lhs {-42};
+        const uint128_t rhs {42u};
+
+        // Builtin: int128_t(-42) -> unsigned huge; huge vs 42
+        BOOST_TEST_EQ(lhs == rhs, false);
+        BOOST_TEST_EQ(lhs != rhs, true);
+        BOOST_TEST_EQ(lhs <  rhs, false);  // huge not < 42
+        BOOST_TEST_EQ(lhs <= rhs, false);
+        BOOST_TEST_EQ(lhs >  rhs, true);
+        BOOST_TEST_EQ(lhs >= rhs, true);
+    }
 }
 
+#endif // BOOST_INT128_HAS_INT128
+
 int main()
 {
+    #ifdef BOOST_INT128_HAS_INT128
+
     test_left_unsigned();
     test_right_unsigned();
 
+    #endif
+
     return boost::report_errors();
 }
diff --git a/test/test_mixed_type_sign_conversion.cpp b/test/test_mixed_type_sign_conversion.cpp
index 79f6106c..8c5f41ad 100644
--- a/test/test_mixed_type_sign_conversion.cpp
+++ b/test/test_mixed_type_sign_conversion.cpp
@@ -2,8 +2,6 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <boost/int128.hpp>
 #include <boost/core/lightweight_test.hpp>
 #include <random>
@@ -15,48 +13,65 @@
 #  pragma GCC diagnostic ignored "-Wsign-conversion"
 #endif
 
+#ifdef BOOST_INT128_HAS_INT128
+
 static std::mt19937_64 rng{42};
-static std::uniform_int_distribution<std::uint64_t> u_dist{0, static_cast<std::uint64_t>(std::sqrt(UINT64_MAX))};
-static std::uniform_int_distribution<std::int64_t> i_dist{0, static_cast<std::int64_t>(std::sqrt(INT64_MAX))};
+// Use sqrt-bounded ranges so multiplication doesn't overflow the 64-bit oracle, but cover
+// negative signed values to exercise the sign-extension path.
+static std::uniform_int_distribution<std::uint64_t> u_dist{1, static_cast<std::uint64_t>(std::sqrt(UINT64_MAX))};
+static std::uniform_int_distribution<std::int64_t> i_dist{
+    -static_cast<std::int64_t>(std::sqrt(INT64_MAX)),
+    static_cast<std::int64_t>(std::sqrt(INT64_MAX))};
 static constexpr std::size_t N {1024U};
 
 using namespace boost::int128;
 
 void test()
 {
+    using boost::int128::detail::builtin_u128;
+
     for (std::size_t i {0}; i < N; ++i)
     {
         const auto u_val {u_dist(rng)};
         const auto i_val {i_dist(rng)};
-
-        if (u_val > static_cast<std::uint64_t>(i_val))
+        if (i_val == 0)
         {
-            const uint128_t lhs {u_val};
-            const int128_t rhs {i_val};
-
-            BOOST_TEST_EQ(lhs + rhs, u_val + static_cast<std::uint64_t>(i_val));
-            BOOST_TEST_EQ(lhs - rhs, u_val - static_cast<std::uint64_t>(i_val));
-            BOOST_TEST_EQ(lhs * rhs, u_val * static_cast<std::uint64_t>(i_val));
-            BOOST_TEST_EQ(lhs / rhs, u_val / static_cast<std::uint64_t>(i_val));
-            BOOST_TEST_EQ(lhs % rhs, u_val % static_cast<std::uint64_t>(i_val));
+            continue;  // skip divide/modulo by zero
         }
-        else
+
+        const uint128_t lhs_u {u_val};
+        const int128_t rhs_i {i_val};
+
+        // Builtin oracle: both operands promoted to unsigned __int128
+        const builtin_u128 builtin_lhs {u_val};
+        const builtin_u128 builtin_rhs = static_cast<builtin_u128>(static_cast<__int128>(i_val));
+
+        BOOST_TEST_EQ(lhs_u + rhs_i, uint128_t{builtin_lhs + builtin_rhs});
+        BOOST_TEST_EQ(lhs_u - rhs_i, uint128_t{builtin_lhs - builtin_rhs});
+        BOOST_TEST_EQ(lhs_u * rhs_i, uint128_t{builtin_lhs * builtin_rhs});
+        BOOST_TEST_EQ(lhs_u / rhs_i, uint128_t{builtin_lhs / builtin_rhs});
+        BOOST_TEST_EQ(lhs_u % rhs_i, uint128_t{builtin_lhs % builtin_rhs});
+
+        // Reverse operand order
+        if (u_val == 0)
         {
-            const int128_t lhs {i_val};
-            const uint128_t rhs {u_val};
-
-            BOOST_TEST_EQ(lhs + rhs, static_cast<std::uint64_t>(i_val) + u_val);
-            BOOST_TEST_EQ(lhs - rhs, static_cast<std::uint64_t>(i_val) - u_val);
-            BOOST_TEST_EQ(lhs * rhs, static_cast<std::uint64_t>(i_val) * u_val);
-            BOOST_TEST_EQ(lhs / rhs, static_cast<std::uint64_t>(i_val) / u_val);
-            BOOST_TEST_EQ(lhs % rhs, static_cast<std::uint64_t>(i_val) % u_val);
+            continue;
         }
+        BOOST_TEST_EQ(rhs_i + lhs_u, uint128_t{builtin_rhs + builtin_lhs});
+        BOOST_TEST_EQ(rhs_i - lhs_u, uint128_t{builtin_rhs - builtin_lhs});
+        BOOST_TEST_EQ(rhs_i * lhs_u, uint128_t{builtin_rhs * builtin_lhs});
+        BOOST_TEST_EQ(rhs_i / lhs_u, uint128_t{builtin_rhs / builtin_lhs});
+        BOOST_TEST_EQ(rhs_i % lhs_u, uint128_t{builtin_rhs % builtin_lhs});
     }
 }
 
+#endif // BOOST_INT128_HAS_INT128
+
 int main()
 {
+#ifdef BOOST_INT128_HAS_INT128
     test();
+#endif
 
     return boost::report_errors();
-}
\ No newline at end of file
+}
diff --git a/test/test_num_digits.cpp b/test/test_num_digits.cpp
index f7a62785..e709b4d0 100644
--- a/test/test_num_digits.cpp
+++ b/test/test_num_digits.cpp
@@ -2,10 +2,6 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-#ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-#  define BOOST_INT128_ALLOW_SIGN_CONVERSION
-#endif
-
 #include <boost/int128.hpp>
 #include <boost/int128/charconv.hpp>
 #include <boost/core/lightweight_test.hpp>
diff --git a/test/test_popcount.cu b/test/test_popcount.cu
index 5ee24511..a819ee09 100644
--- a/test/test_popcount.cu
+++ b/test/test_popcount.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_powm.cpp b/test/test_powm.cpp
new file mode 100644
index 00000000..267d793b
--- /dev/null
+++ b/test/test_powm.cpp
@@ -0,0 +1,221 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#if defined(__GNUC__) && __GNUC__ == 7 && defined(__i386__)
+
+// 32-bit GCC-7 fails with: "error: constexpr loop iteration count exceeds limit of 262144"
+
+int main() { return 0; }
+
+#else
+
+#ifndef BOOST_INT128_BUILD_MODULE
+
+#include <boost/int128.hpp>
+
+#else
+
+import boost.int128;
+
+#endif
+
+#include <boost/core/lightweight_test.hpp>
+#include <cstdint>
+#include <limits>
+
+using namespace boost::int128;
+
+void test_uint128_powm_basic()
+{
+    BOOST_TEST_EQ(powm(uint128_t{2}, uint128_t{10}, uint128_t{1000}), uint128_t{24});
+    BOOST_TEST_EQ(powm(uint128_t{3}, uint128_t{5}, uint128_t{7}), uint128_t{5});
+    BOOST_TEST_EQ(powm(uint128_t{7}, uint128_t{13}, uint128_t{19}), uint128_t{7});
+    BOOST_TEST_EQ(powm(uint128_t{5}, uint128_t{10}, uint128_t{13}), uint128_t{12});
+    // 10^9 ~ -7 (mod 1000000007), so 10^18 ~ 49.
+    BOOST_TEST_EQ(powm(uint128_t{10}, uint128_t{18}, uint128_t{1000000007}), uint128_t{49});
+
+    // exp == 0
+    BOOST_TEST_EQ(powm(uint128_t{5}, uint128_t{0}, uint128_t{1000}), uint128_t{1});
+    BOOST_TEST_EQ(powm(uint128_t{0}, uint128_t{0}, uint128_t{7}), uint128_t{1});
+
+    // base == 0
+    BOOST_TEST_EQ(powm(uint128_t{0}, uint128_t{5}, uint128_t{7}), uint128_t{0});
+    BOOST_TEST_EQ(powm(uint128_t{0}, uint128_t{1}, uint128_t{7}), uint128_t{0});
+
+    // base == 1
+    BOOST_TEST_EQ(powm(uint128_t{1}, uint128_t{1000}, uint128_t{42}), uint128_t{1});
+
+    // exp == 1
+    BOOST_TEST_EQ(powm(uint128_t{42}, uint128_t{1}, uint128_t{100}), uint128_t{42});
+
+    // m == 1
+    BOOST_TEST_EQ(powm(uint128_t{42}, uint128_t{17}, uint128_t{1}), uint128_t{0});
+
+    // m == 0 - documented to return 0
+    BOOST_TEST_EQ(powm(uint128_t{42}, uint128_t{17}, uint128_t{0}), uint128_t{0});
+
+    // Base larger than the modulus must be reduced first.
+    BOOST_TEST_EQ(powm(uint128_t{1234567}, uint128_t{2}, uint128_t{1000}), uint128_t{(1234567ULL * 1234567ULL) % 1000ULL});
+}
+
+void test_uint128_powm_power_of_two_modulus()
+{
+    BOOST_TEST_EQ(powm(uint128_t{3}, uint128_t{10}, uint128_t{128}), uint128_t{41});
+    BOOST_TEST_EQ(powm(uint128_t{2}, uint128_t{7}, uint128_t{128}), uint128_t{0});
+    BOOST_TEST_EQ(powm(uint128_t{2}, uint128_t{6}, uint128_t{128}), uint128_t{64});
+
+    const uint128_t m32 {static_cast<std::uint64_t>(1) << 32};
+    BOOST_TEST_EQ(powm(uint128_t{2}, uint128_t{32}, m32), uint128_t{0});
+    BOOST_TEST_EQ(powm(uint128_t{3}, uint128_t{20}, m32), uint128_t{3486784401ULL});
+
+    const uint128_t m64 {1U, 0U};
+    BOOST_TEST_EQ(powm(uint128_t{2}, uint128_t{64}, m64), uint128_t{0});
+    const uint128_t high_bit {0U, static_cast<std::uint64_t>(1) << 63};
+    BOOST_TEST_EQ(powm(uint128_t{2}, uint128_t{63}, m64), high_bit);
+
+    const uint128_t m127 {static_cast<std::uint64_t>(1) << 63, 0U};
+    BOOST_TEST_EQ(powm(uint128_t{2}, uint128_t{127}, m127), uint128_t{0});
+    BOOST_TEST_EQ(powm(uint128_t{2}, uint128_t{126}, m127), m127 >> 1);
+}
+
+void test_uint128_powm_fermat_64bit()
+{
+    // Mersenne prime p = 2^61 - 1.
+    const uint128_t p {UINT64_C(2305843009213693951)};
+
+    for (std::uint64_t a {2}; a < 25; ++a)
+    {
+        BOOST_TEST_EQ(powm(uint128_t{a}, p - 1U, p), uint128_t{1});
+    }
+
+    // Common 32-bit primes that show up in competitive math problems.
+    const uint128_t mod_a {UINT64_C(1000000007)};
+    const uint128_t mod_b {UINT64_C(998244353)};
+
+    for (std::uint64_t a {2}; a < 10; ++a)
+    {
+        BOOST_TEST_EQ(powm(uint128_t{a}, mod_a - 1U, mod_a), uint128_t{1});
+        BOOST_TEST_EQ(powm(uint128_t{a}, mod_b - 1U, mod_b), uint128_t{1});
+    }
+}
+
+void test_uint128_powm_fermat_128bit()
+{
+    // Mersenne prime p = 2^127 - 1 = 170141183460469231731687303715884105727.
+    const uint128_t p {UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF)};
+
+    for (std::uint64_t a {2}; a < 6; ++a)
+    {
+        BOOST_TEST_EQ(powm(uint128_t{a}, p - 1U, p), uint128_t{1});
+    }
+
+    // a^p mod p == a mod p for prime p (Fermat).
+    const uint128_t big_base {UINT64_C(0x0123456789ABCDEF), UINT64_C(0xFEDCBA9876543210)};
+    BOOST_TEST_EQ(powm(big_base, p, p), big_base % p);
+}
+
+void test_uint128_powm_properties()
+{
+    // (a^(b+c)) mod m == ((a^b)(a^c)) mod m
+    const uint128_t m {UINT64_C(1000000007)};
+    const uint128_t a {42};
+    const uint128_t b {17};
+    const uint128_t c {23};
+    BOOST_TEST_EQ(powm(a, b + c, m), (powm(a, b, m) * powm(a, c, m)) % m);
+
+    // (a^e * b^e) mod m == ((a*b)^e) mod m
+    const uint128_t aa {7};
+    const uint128_t bb {11};
+    const uint128_t e {30};
+    BOOST_TEST_EQ(powm(aa * bb, e, m), (powm(aa, e, m) * powm(bb, e, m)) % m);
+
+    // Same identities through the full-128-bit code path.
+    const uint128_t big_m {1U, UINT64_C(0xDEADBEEFCAFEBABE)};
+    const uint128_t big_a {UINT64_C(0x00000000DEADBEEF), UINT64_C(0xBEEFCAFEFACEFEED)};
+    const uint128_t b1 {13};
+    const uint128_t b2 {21};
+    BOOST_TEST_EQ(powm(big_a, b1 + b2, big_m), detail::mulmod_shift(powm(big_a, b1, big_m), powm(big_a, b2, big_m), big_m));
+}
+
+void test_uint128_powm_extreme()
+{
+    // Modulus exactly 2^64 - 1: not a power of two, exercises the small-m path
+    // at its upper boundary. Since 2^64 ~ 1 (mod 2^64 - 1), 2^64 mod m == 1.
+    const uint128_t m_u64max {(std::numeric_limits<std::uint64_t>::max)()};
+    BOOST_TEST_EQ(powm(uint128_t{2}, uint128_t{64}, m_u64max), uint128_t{1});
+    BOOST_TEST_EQ(powm(uint128_t{3}, uint128_t{0}, m_u64max), uint128_t{1});
+
+    // 2^65 - 1 forces the general 128-bit path. 2^65 ~ 1 (mod 2^65 - 1).
+    const uint128_t m65 {1U, UINT64_C(0xFFFFFFFFFFFFFFFF)};
+    BOOST_TEST_EQ(powm(uint128_t{2}, uint128_t{65}, m65), uint128_t{1});
+    BOOST_TEST_EQ(powm(uint128_t{2}, uint128_t{130}, m65), uint128_t{1});
+    BOOST_TEST_EQ(powm(uint128_t{2}, uint128_t{64}, m65), (uint128_t{1U, 0U}));
+    BOOST_TEST_EQ(powm(uint128_t{4}, uint128_t{65}, m65), uint128_t{1});
+
+    // Full 128-bit modulus with high bit set so addmod must take the overflow
+    // branch when doubling values near m.
+    const uint128_t m_high {UINT64_C(0x8000000000000001), 0U};
+    BOOST_TEST_EQ(powm(uint128_t{1}, uint128_t{1234567}, m_high), uint128_t{1});
+    BOOST_TEST_EQ(powm(uint128_t{0}, uint128_t{1234567}, m_high), uint128_t{0});
+
+    // Small base whose square still fits in 128 bits gives a hand-verifiable
+    // answer while still routing through the shift-and-add path.
+    BOOST_TEST_EQ(powm(uint128_t{10}, uint128_t{4}, m_high), uint128_t{10000});
+}
+
+void test_int128_powm()
+{
+    BOOST_TEST_EQ(powm(int128_t{2}, int128_t{10}, int128_t{1000}), int128_t{24});
+    BOOST_TEST_EQ(powm(int128_t{3}, int128_t{5}, int128_t{7}), int128_t{5});
+
+    // Negative bases reduce to non-negative residues.
+    BOOST_TEST_EQ(powm(int128_t{-3}, int128_t{2}, int128_t{5}), int128_t{4});
+    BOOST_TEST_EQ(powm(int128_t{-3}, int128_t{3}, int128_t{5}), int128_t{3});
+    BOOST_TEST_EQ(powm(int128_t{-1}, int128_t{100}, int128_t{1000}), int128_t{1});
+    BOOST_TEST_EQ(powm(int128_t{-1}, int128_t{101}, int128_t{1000}), int128_t{999});
+
+    // Negative base where the magnitude is a multiple of m reduces to 0.
+    BOOST_TEST_EQ(powm(int128_t{-10}, int128_t{5}, int128_t{5}), int128_t{0});
+
+    // INT128_MIN handling: abs() preserves the bit pattern, which maps to 2^127
+    // when reinterpreted as uint128_t. 2^127 mod 5 == 3 (since 2^4 ~ 1 (mod 5)
+    // gives 2^127 = 2^(4*31+3) ~ 8 ~ 3), so the negative residue is 5 - 3 = 2.
+    BOOST_TEST_EQ(powm((std::numeric_limits<int128_t>::min)(), int128_t{1}, int128_t{5}), int128_t{2});
+    // 2^127 mod 7 == 2 (since 2^3 ~ 1 (mod 7), 2^127 = 2^(3*42+1) ~ 2), so the
+    // residue of -2^127 mod 7 is 7 - 2 = 5.
+    BOOST_TEST_EQ(powm((std::numeric_limits<int128_t>::min)(), int128_t{1}, int128_t{7}), int128_t{5});
+
+    // Invalid arguments are documented to return 0.
+    BOOST_TEST_EQ(powm(int128_t{2}, int128_t{10}, int128_t{0}), int128_t{0});
+    BOOST_TEST_EQ(powm(int128_t{2}, int128_t{10}, int128_t{-5}), int128_t{0});
+    BOOST_TEST_EQ(powm(int128_t{2}, int128_t{-1}, int128_t{5}), int128_t{0});
+}
+
+void test_constexpr_powm()
+{
+    constexpr uint128_t r1 {powm(uint128_t{2}, uint128_t{10}, uint128_t{1000})};
+    static_assert(r1 == uint128_t{24}, "powm constexpr small case");
+
+    constexpr uint128_t r2 {powm(uint128_t{3}, uint128_t{10}, uint128_t{128})};
+    static_assert(r2 == uint128_t{41}, "powm constexpr power-of-two modulus");
+
+    constexpr int128_t r3 {powm(int128_t{-3}, int128_t{3}, int128_t{5})};
+    static_assert(r3 == int128_t{3}, "powm constexpr signed");
+}
+
+int main()
+{
+    test_uint128_powm_basic();
+    test_uint128_powm_power_of_two_modulus();
+    test_uint128_powm_fermat_64bit();
+    test_uint128_powm_fermat_128bit();
+    test_uint128_powm_properties();
+    test_uint128_powm_extreme();
+    test_int128_powm();
+    test_constexpr_powm();
+
+    return boost::report_errors();
+}
+
+#endif
diff --git a/test/test_rotl.cu b/test/test_rotl.cu
index ab3bd01b..6056552f 100644
--- a/test/test_rotl.cu
+++ b/test/test_rotl.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_rotr.cu b/test/test_rotr.cu
index 09f2c68e..619bab6f 100644
--- a/test/test_rotr.cu
+++ b/test/test_rotr.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_sign_compare.cpp b/test/test_sign_compare.cpp
index aae0c686..b6314a12 100644
--- a/test/test_sign_compare.cpp
+++ b/test/test_sign_compare.cpp
@@ -2,8 +2,6 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-#define BOOST_INT128_ALLOW_SIGN_COMPARE
-
 #include <boost/int128/int128.hpp>
 #include <boost/core/lightweight_test.hpp>
 
diff --git a/test/test_signed_add.cu b/test/test_signed_add.cu
index 57369852..c691671f 100644
--- a/test/test_signed_add.cu
+++ b/test/test_signed_add.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_add_sat.cu b/test/test_signed_add_sat.cu
index 45b45116..aef72c4b 100644
--- a/test/test_signed_add_sat.cu
+++ b/test/test_signed_add_sat.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_signed_and.cu b/test/test_signed_and.cu
index 95fb3fec..36622e5a 100644
--- a/test/test_signed_and.cu
+++ b/test/test_signed_and.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_cstdlib_div.cu b/test/test_signed_cstdlib_div.cu
index a8445ef5..0c30a435 100644
--- a/test/test_signed_cstdlib_div.cu
+++ b/test/test_signed_cstdlib_div.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_signed_div.cu b/test/test_signed_div.cu
index eb10a192..04957feb 100644
--- a/test/test_signed_div.cu
+++ b/test/test_signed_div.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_div_sat.cu b/test/test_signed_div_sat.cu
index 804d4dc9..27ab307f 100644
--- a/test/test_signed_div_sat.cu
+++ b/test/test_signed_div_sat.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_signed_eq.cu b/test/test_signed_eq.cu
index 4f7156c1..7713a3f4 100644
--- a/test/test_signed_eq.cu
+++ b/test/test_signed_eq.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_from_chars.cu b/test/test_signed_from_chars.cu
index 66d67eb0..d868fb4c 100644
--- a/test/test_signed_from_chars.cu
+++ b/test/test_signed_from_chars.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_from_chars_bases.cu b/test/test_signed_from_chars_bases.cu
index 69b175fb..929d29b7 100644
--- a/test/test_signed_from_chars_bases.cu
+++ b/test/test_signed_from_chars_bases.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_gcd.cu b/test/test_signed_gcd.cu
index 7d5c8434..358dfa5a 100644
--- a/test/test_signed_gcd.cu
+++ b/test/test_signed_gcd.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_signed_ge.cu b/test/test_signed_ge.cu
index efe510ea..76e154e4 100644
--- a/test/test_signed_ge.cu
+++ b/test/test_signed_ge.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_gt.cu b/test/test_signed_gt.cu
index 820b0797..e4a136e0 100644
--- a/test/test_signed_gt.cu
+++ b/test/test_signed_gt.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_lcm.cu b/test/test_signed_lcm.cu
index bafe559d..cb6ab9fa 100644
--- a/test/test_signed_lcm.cu
+++ b/test/test_signed_lcm.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_signed_le.cu b/test/test_signed_le.cu
index d2d67ce6..96645085 100644
--- a/test/test_signed_le.cu
+++ b/test/test_signed_le.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_left_shift.cu b/test/test_signed_left_shift.cu
index 89cf0a67..279d327d 100644
--- a/test/test_signed_left_shift.cu
+++ b/test/test_signed_left_shift.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_literals.cu b/test/test_signed_literals.cu
index 8723b06c..1e7ff273 100644
--- a/test/test_signed_literals.cu
+++ b/test/test_signed_literals.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <boost/int128.hpp>
diff --git a/test/test_signed_lt.cu b/test/test_signed_lt.cu
index c4094c4d..6cd4ef89 100644
--- a/test/test_signed_lt.cu
+++ b/test/test_signed_lt.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_midpoint.cu b/test/test_signed_midpoint.cu
index 5ee28d71..e974745c 100644
--- a/test/test_signed_midpoint.cu
+++ b/test/test_signed_midpoint.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_signed_mod.cu b/test/test_signed_mod.cu
index cbda3580..325c94ea 100644
--- a/test/test_signed_mod.cu
+++ b/test/test_signed_mod.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_mul.cu b/test/test_signed_mul.cu
index 1c9a12fd..4ff6828a 100644
--- a/test/test_signed_mul.cu
+++ b/test/test_signed_mul.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_mul_sat.cu b/test/test_signed_mul_sat.cu
index 569e583c..ce1e4f1b 100644
--- a/test/test_signed_mul_sat.cu
+++ b/test/test_signed_mul_sat.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_signed_ne.cu b/test/test_signed_ne.cu
index 6c34a111..6e256401 100644
--- a/test/test_signed_ne.cu
+++ b/test/test_signed_ne.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_not.cu b/test/test_signed_not.cu
index 5dc285c6..7bfc6a88 100644
--- a/test/test_signed_not.cu
+++ b/test/test_signed_not.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_or.cu b/test/test_signed_or.cu
index 7bcf7a6e..cc73d6a8 100644
--- a/test/test_signed_or.cu
+++ b/test/test_signed_or.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_right_shift.cu b/test/test_signed_right_shift.cu
index c606ddec..73f4aedd 100644
--- a/test/test_signed_right_shift.cu
+++ b/test/test_signed_right_shift.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
@@ -44,7 +42,7 @@ int main(void)
     cuda_managed_ptr<unsigned> shift_vector(numElements);
     cuda_managed_ptr<test_type> output_vector(numElements);
 
-    // Include negative values — right shift of negative signed integers is
+    // Include negative values -- right shift of negative signed integers is
     // implementation-defined (arithmetic shift) but not UB
     boost::random::uniform_int_distribution<test_type> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
     std::uniform_int_distribution<unsigned> shift_dist {0U, 127U};
diff --git a/test/test_signed_sub.cu b/test/test_signed_sub.cu
index dd48db3a..5fc8339a 100644
--- a/test/test_signed_sub.cu
+++ b/test/test_signed_sub.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_sub_sat.cu b/test/test_signed_sub_sat.cu
index 7dd40f30..aac56f0c 100644
--- a/test/test_signed_sub_sat.cu
+++ b/test/test_signed_sub_sat.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_signed_to_chars.cu b/test/test_signed_to_chars.cu
index 20a6a944..064dcd50 100644
--- a/test/test_signed_to_chars.cu
+++ b/test/test_signed_to_chars.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_to_chars_bases.cu b/test/test_signed_to_chars_bases.cu
index 15733649..aa6faaac 100644
--- a/test/test_signed_to_chars_bases.cu
+++ b/test/test_signed_to_chars_bases.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_to_unsigned_conversion.cu b/test/test_signed_to_unsigned_conversion.cu
index 5073f0f6..35cd558f 100644
--- a/test/test_signed_to_unsigned_conversion.cu
+++ b/test/test_signed_to_unsigned_conversion.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_signed_xor.cu b/test/test_signed_xor.cu
index ff11af14..bcd70dce 100644
--- a/test/test_signed_xor.cu
+++ b/test/test_signed_xor.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_stream.cpp b/test/test_stream.cpp
index b6ebea8b..532f868d 100644
--- a/test/test_stream.cpp
+++ b/test/test_stream.cpp
@@ -2,10 +2,6 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-#ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-#  define BOOST_INT128_ALLOW_SIGN_CONVERSION
-#endif
-
 #include <boost/int128.hpp>
 #include <boost/core/lightweight_test.hpp>
 #include <iostream>
diff --git a/test/test_u128.cpp b/test/test_u128.cpp
index 81a9b5c7..925f2c73 100644
--- a/test/test_u128.cpp
+++ b/test/test_u128.cpp
@@ -2,10 +2,6 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-#ifndef BOOST_INT128_ALLOW_SIGN_CONVERSION
-#  define BOOST_INT128_ALLOW_SIGN_CONVERSION
-#endif
-
 #include <boost/int128.hpp>
 #include <boost/core/lightweight_test.hpp>
 #include <boost/mp11.hpp>
@@ -71,7 +67,16 @@ T get_root_max()
 template <typename T>
 T get_root_min()
 {
-    return static_cast<T>(std::sqrt(std::numeric_limits<T>::min()));
+    // numeric_limits<T>::min() is negative for signed T, so sqrt() would be NaN
+    // and the cast UB; use the negative of the positive root instead.
+    BOOST_INT128_IF_CONSTEXPR (std::is_signed<T>::value)
+    {
+        return static_cast<T>(-get_root_max<T>());
+    }
+    else
+    {
+        return static_cast<T>(0);
+    }
 }
 
 #include <boost/random/uniform_int_distribution.hpp>
@@ -1256,7 +1261,6 @@ void test_spot_div(IntType value, IntType value2)
     static_assert(sizeof(decltype(value2 / emulated_value)) ==
                   sizeof(decltype(value2 / builtin_value)), "Mismatch Return Types");
 
-
     // The tested values are pulled out unlike the regular test
     // so that it's easier to read the values with GDB
 
diff --git a/test/test_u128_no_sign_conv.cpp b/test/test_u128_no_sign_conv.cpp
index 55d08a31..729161ac 100644
--- a/test/test_u128_no_sign_conv.cpp
+++ b/test/test_u128_no_sign_conv.cpp
@@ -66,7 +66,16 @@ T get_root_max()
 template <typename T>
 T get_root_min()
 {
-    return static_cast<T>(std::sqrt(std::numeric_limits<T>::min()));
+    // numeric_limits<T>::min() is negative for signed T, so sqrt() would be NaN
+    // and the cast UB; use the negative of the positive root instead.
+    BOOST_INT128_IF_CONSTEXPR (std::is_signed<T>::value)
+    {
+        return static_cast<T>(-get_root_max<T>());
+    }
+    else
+    {
+        return static_cast<T>(0);
+    }
 }
 
 #include <boost/random/uniform_int_distribution.hpp>
diff --git a/test/test_unsigned_add.cu b/test/test_unsigned_add.cu
index 59368281..443295be 100644
--- a/test/test_unsigned_add.cu
+++ b/test/test_unsigned_add.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_add_sat.cu b/test/test_unsigned_add_sat.cu
index 3cfc0317..57b5d43e 100644
--- a/test/test_unsigned_add_sat.cu
+++ b/test/test_unsigned_add_sat.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_unsigned_and.cu b/test/test_unsigned_and.cu
index 7ced87e1..8e9f1bbd 100644
--- a/test/test_unsigned_and.cu
+++ b/test/test_unsigned_and.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_cstdlib_div.cu b/test/test_unsigned_cstdlib_div.cu
index 62ccae81..f957e63e 100644
--- a/test/test_unsigned_cstdlib_div.cu
+++ b/test/test_unsigned_cstdlib_div.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_unsigned_div.cu b/test/test_unsigned_div.cu
index fb3070a2..e1f47941 100644
--- a/test/test_unsigned_div.cu
+++ b/test/test_unsigned_div.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_div_sat.cu b/test/test_unsigned_div_sat.cu
index 9f76b869..c6a69fb9 100644
--- a/test/test_unsigned_div_sat.cu
+++ b/test/test_unsigned_div_sat.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_unsigned_eq.cu b/test/test_unsigned_eq.cu
index c2c1d415..b3a7dca9 100644
--- a/test/test_unsigned_eq.cu
+++ b/test/test_unsigned_eq.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_from_chars.cu b/test/test_unsigned_from_chars.cu
index 727dcfa9..ba5de8f1 100644
--- a/test/test_unsigned_from_chars.cu
+++ b/test/test_unsigned_from_chars.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_from_chars_bases.cu b/test/test_unsigned_from_chars_bases.cu
index 514e4cdc..6e45842e 100644
--- a/test/test_unsigned_from_chars_bases.cu
+++ b/test/test_unsigned_from_chars_bases.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_gcd.cu b/test/test_unsigned_gcd.cu
index f23abe48..6b44cf60 100644
--- a/test/test_unsigned_gcd.cu
+++ b/test/test_unsigned_gcd.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_unsigned_ge.cu b/test/test_unsigned_ge.cu
index 4803e307..ade4913c 100644
--- a/test/test_unsigned_ge.cu
+++ b/test/test_unsigned_ge.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_gt.cu b/test/test_unsigned_gt.cu
index 0dd51292..d4234d91 100644
--- a/test/test_unsigned_gt.cu
+++ b/test/test_unsigned_gt.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_lcm.cu b/test/test_unsigned_lcm.cu
index d586d58b..83fe2171 100644
--- a/test/test_unsigned_lcm.cu
+++ b/test/test_unsigned_lcm.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_unsigned_le.cu b/test/test_unsigned_le.cu
index 4ef2d2b6..9d034350 100644
--- a/test/test_unsigned_le.cu
+++ b/test/test_unsigned_le.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_left_shift.cu b/test/test_unsigned_left_shift.cu
index 053c054b..7778d935 100644
--- a/test/test_unsigned_left_shift.cu
+++ b/test/test_unsigned_left_shift.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_literals.cu b/test/test_unsigned_literals.cu
index d3fad8c3..bbac3b21 100644
--- a/test/test_unsigned_literals.cu
+++ b/test/test_unsigned_literals.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <boost/int128.hpp>
diff --git a/test/test_unsigned_lt.cu b/test/test_unsigned_lt.cu
index 6394e773..07b7c1df 100644
--- a/test/test_unsigned_lt.cu
+++ b/test/test_unsigned_lt.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_midpoint.cu b/test/test_unsigned_midpoint.cu
index e695b5ff..505af9e1 100644
--- a/test/test_unsigned_midpoint.cu
+++ b/test/test_unsigned_midpoint.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_unsigned_mod.cu b/test/test_unsigned_mod.cu
index 56e31095..7dc0a627 100644
--- a/test/test_unsigned_mod.cu
+++ b/test/test_unsigned_mod.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_mul.cu b/test/test_unsigned_mul.cu
index fb32b655..ef7d89dd 100644
--- a/test/test_unsigned_mul.cu
+++ b/test/test_unsigned_mul.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_mul_sat.cu b/test/test_unsigned_mul_sat.cu
index 228ef806..81a125fe 100644
--- a/test/test_unsigned_mul_sat.cu
+++ b/test/test_unsigned_mul_sat.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_unsigned_ne.cu b/test/test_unsigned_ne.cu
index 2356c75c..c9a02b41 100644
--- a/test/test_unsigned_ne.cu
+++ b/test/test_unsigned_ne.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_not.cu b/test/test_unsigned_not.cu
index 809baf3c..9bfb08b7 100644
--- a/test/test_unsigned_not.cu
+++ b/test/test_unsigned_not.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_or.cu b/test/test_unsigned_or.cu
index 45ebf30d..9399341f 100644
--- a/test/test_unsigned_or.cu
+++ b/test/test_unsigned_or.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_right_shift.cu b/test/test_unsigned_right_shift.cu
index f81792f2..2dd59fba 100644
--- a/test/test_unsigned_right_shift.cu
+++ b/test/test_unsigned_right_shift.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_sub.cu b/test/test_unsigned_sub.cu
index b4fc0f87..be22b2fc 100644
--- a/test/test_unsigned_sub.cu
+++ b/test/test_unsigned_sub.cu
@@ -4,8 +4,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_sub_sat.cu b/test/test_unsigned_sub_sat.cu
index 73bf36d7..71856613 100644
--- a/test/test_unsigned_sub_sat.cu
+++ b/test/test_unsigned_sub_sat.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <vector>
 #include <random>
diff --git a/test/test_unsigned_to_chars.cu b/test/test_unsigned_to_chars.cu
index 25d4252a..76dd830f 100644
--- a/test/test_unsigned_to_chars.cu
+++ b/test/test_unsigned_to_chars.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_to_chars_bases.cu b/test/test_unsigned_to_chars_bases.cu
index 2a4545a2..b67c2bb5 100644
--- a/test/test_unsigned_to_chars_bases.cu
+++ b/test/test_unsigned_to_chars_bases.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_to_signed_conversion.cu b/test/test_unsigned_to_signed_conversion.cu
index 016e7a5d..67195383 100644
--- a/test/test_unsigned_to_signed_conversion.cu
+++ b/test/test_unsigned_to_signed_conversion.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_unsigned_xor.cu b/test/test_unsigned_xor.cu
index 8201a432..1416fc87 100644
--- a/test/test_unsigned_xor.cu
+++ b/test/test_unsigned_xor.cu
@@ -3,8 +3,6 @@
 //  Boost Software License, Version 1.0. (See accompanying file
 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 
-#define BOOST_INT128_ALLOW_SIGN_CONVERSION
-
 #include <iostream>
 #include <iomanip>
 #include <vector>
diff --git a/test/test_x64_msvc_div.cpp b/test/test_x64_msvc_div.cpp
index 1d3f1e73..40cbb70c 100644
--- a/test/test_x64_msvc_div.cpp
+++ b/test/test_x64_msvc_div.cpp
@@ -2,93 +2,118 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
+// On MSVC x64 the division building blocks use the hardware intrinsics _udiv128 (via udiv_2by1)
+// and _umul128 (via umul, inside div3by2). This validates that intrinsic path against
+// intrinsic-free references on the same inputs: the portable Hacker's Delight divlu, and the
+// 32-bit-limb Knuth Algorithm D. Those references are in turn checked against a native 128-bit
+// integer on the platforms that have one (see test_div_primitives.cpp), so agreement here pins
+// down the MSVC intrinsic wiring specifically.
+
 #include <boost/int128.hpp>
+#include <boost/int128/detail/common_div.hpp>
 #include <boost/core/lightweight_test.hpp>
 #include <random>
 
 #if defined(_M_AMD64) && !defined(__GNUC__) && !defined(__clang__) && _MSC_VER >= 1920
 
+using boost::int128::uint128_t;
+
 static std::mt19937_64 rng{42};
-static constexpr std::size_t N{1024U};
+static constexpr std::size_t N{4096U};
 static std::uniform_int_distribution<std::uint64_t> dist{UINT64_C(0), UINT64_MAX};
-static std::uniform_int_distribution<std::uint32_t> dist32{UINT32_C(0), UINT32_MAX};
 
-void test_two_words()
+// Independent reference for 128/128 (divisor >= 2^64): the 32-bit-limb Knuth Algorithm D, which
+// uses no 64-bit-divide or multiply intrinsics.
+static void knuth_oracle(const std::uint64_t uh, const std::uint64_t ul,
+                         const std::uint64_t vh, const std::uint64_t vl,
+                         uint128_t& quot, uint128_t& rem)
 {
-    for (std::size_t i{}; i < N; ++i)
-    {
-        boost::int128::uint128_t lhs{dist(rng), dist(rng)};
-        boost::int128::uint128_t rhs{dist(rng), dist(rng)};
+    const uint128_t u_val{uh, ul};
+    const uint128_t v_val{vh, vl};
 
-        // Guarantee lhs is greater than rhs
-        if (lhs < rhs)
-        {
-            std::swap(lhs, rhs);
-        }
+    if (u_val < v_val)
+    {
+        quot = uint128_t{UINT64_C(0)};
+        rem = u_val;
+        return;
+    }
 
+    std::uint32_t u[4]{};
+    std::uint32_t v[4]{};
+    std::uint32_t q[4]{};
 
-        boost::int128::uint128_t remainder{};
-        const auto quotient{boost::int128::detail::impl::div_mod_msvc<true>(lhs, rhs, remainder)};
+    const auto m{boost::int128::detail::impl::to_words(u_val, u)};
+    const auto n{boost::int128::detail::impl::to_words(v_val, v)};
 
-        boost::int128::uint128_t knuth_remainder{};
-        const auto knuth_quotient{boost::int128::detail::knuth_div(lhs, rhs, knuth_remainder)};
+    boost::int128::detail::impl::knuth_divide<true>(u, m, v, n, q);
 
-        BOOST_TEST_EQ(remainder, knuth_remainder);
-        BOOST_TEST_EQ(quotient, knuth_quotient);
-    }
+    quot = boost::int128::detail::impl::from_words<uint128_t>(q);
+    rem = boost::int128::detail::impl::from_words<uint128_t>(u);
 }
 
-void test_four_by_three()
+// _udiv128 (udiv_2by1) versus the portable divlu, for 128/64 -> 64.
+void test_udiv_2by1()
 {
     for (std::size_t i{}; i < N; ++i)
     {
-        boost::int128::uint128_t lhs{dist(rng), dist(rng)};
-        boost::int128::uint128_t rhs{dist32(rng), dist(rng)};
+        const auto u0{dist(rng)};
+        auto d{dist(rng)};
+        if (d == 0)
+        {
+            d = 1;
+        }
+        const auto u1{dist(rng) % d}; // precondition u1 < d
 
-        boost::int128::uint128_t remainder{};
-        const auto quotient{boost::int128::detail::impl::div_mod_msvc<true>(lhs, rhs, remainder)};
+        std::uint64_t r_intrin{};
+        const auto q_intrin{boost::int128::detail::udiv_2by1(u1, u0, d, r_intrin)};
 
-        boost::int128::uint128_t knuth_remainder{};
-        const auto knuth_quotient{boost::int128::detail::knuth_div(lhs, rhs, knuth_remainder)};
+        std::uint64_t r_soft{};
+        const auto q_soft{boost::int128::detail::divlu(u1, u0, d, r_soft)};
 
-        BOOST_TEST_EQ(remainder, knuth_remainder);
-        BOOST_TEST_EQ(quotient, knuth_quotient);
+        BOOST_TEST_EQ(q_intrin, q_soft);
+        BOOST_TEST_EQ(r_intrin, r_soft);
     }
+}
 
-    // The biggest gap we can have between 2 word unsigned values
-    {
-        constexpr auto lhs{(std::numeric_limits<boost::int128::uint128_t>::max)()};
-        constexpr boost::int128::uint128_t rhs{1,0};
+static void check_div3by2(const std::uint64_t uh, const std::uint64_t ul,
+                          const std::uint64_t vh, const std::uint64_t vl)
+{
+    std::uint64_t rh{};
+    std::uint64_t rl{};
+    const auto q{boost::int128::detail::div3by2<true>(uh, ul, vh, vl, rh, rl)};
 
-        boost::int128::uint128_t remainder{};
-        const auto quotient{boost::int128::detail::impl::div_mod_msvc<true>(lhs, rhs, remainder)};
+    uint128_t expected_q{};
+    uint128_t expected_r{};
+    knuth_oracle(uh, ul, vh, vl, expected_q, expected_r);
 
-        boost::int128::uint128_t knuth_remainder{};
-        const auto knuth_quotient{boost::int128::detail::knuth_div(lhs, rhs, knuth_remainder)};
+    BOOST_TEST_EQ(expected_q.high, UINT64_C(0));
+    BOOST_TEST_EQ(q, expected_q.low);
+    BOOST_TEST_EQ(uint128_t(rh, rl), expected_r);
+}
 
-        BOOST_TEST_EQ(remainder, knuth_remainder);
-        BOOST_TEST_EQ(quotient, knuth_quotient);
-    }
-    // And again for signed
+// _udiv128 + _umul128 (div3by2) versus the 32-bit-limb Knuth reference, for 128/128 -> 64.
+void test_div3by2()
+{
+    for (std::size_t i{}; i < N; ++i)
     {
-        constexpr auto lhs{static_cast<boost::int128::uint128_t>((std::numeric_limits<boost::int128::int128_t>::max)())};
-        constexpr boost::int128::uint128_t rhs{1,0};
-
-        boost::int128::uint128_t remainder{};
-        const auto quotient{boost::int128::detail::impl::div_mod_msvc<true>(lhs, rhs, remainder)};
-
-        boost::int128::uint128_t knuth_remainder{};
-        const auto knuth_quotient{boost::int128::detail::knuth_div(lhs, rhs, knuth_remainder)};
-
-        BOOST_TEST_EQ(remainder, knuth_remainder);
-        BOOST_TEST_EQ(quotient, knuth_quotient);
+        auto vh{dist(rng)};
+        if (vh == 0)
+        {
+            vh = 1; // div3by2 requires divisor >= 2^64
+        }
+        check_div3by2(dist(rng), dist(rng), vh, dist(rng));
     }
+
+    // The widest gap between two-word unsigned values, and the signed-max case the original test
+    // exercised, both dividing by exactly 2^64.
+    check_div3by2(UINT64_MAX, UINT64_MAX, UINT64_C(1), UINT64_C(0));
+    check_div3by2(UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_MAX, UINT64_C(1), UINT64_C(0));
 }
 
 int main()
 {
-    test_two_words();
-    test_four_by_three();
+    test_udiv_2by1();
+    test_div3by2();
 
     return boost::report_errors();
 }
@@ -100,4 +125,4 @@ int main()
     return 0;
 }
 
-#endif 
+#endif