From cfd184704f24a8960a9756ccf721fdf3e425fc8e Mon Sep 17 00:00:00 2001 From: martinPavesio Date: Thu, 28 May 2026 20:11:39 -0300 Subject: [PATCH 1/3] Apply AS Platform build patches for Python 2.7 / ActiveState compatibility - setupinfo.py: route to AS-installed libxml2/libxslt via pkg-config - setupinfo.py: replace f-string in get_dotfile_version() with str.format() so setup.py can be invoked under Python 2.7 - setupinfo.py: use 'z' instead of 'zlib' in Windows libs.extend() for correct static zlib library name - versioninfo.py: use __file__ instead of sys.argv[0] for base directory detection so it works correctly when invoked via the AS wheel builder --- setupinfo.py | 66 +++++++++++++++++++++++++++++++++++++++++++++----- versioninfo.py | 2 +- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/setupinfo.py b/setupinfo.py index 226715f00..d9722b3be 100644 --- a/setupinfo.py +++ b/setupinfo.py @@ -2,6 +2,7 @@ import io import os import os.path +import re import subprocess from setuptools.command.build_ext import build_ext as _build_ext @@ -15,6 +16,23 @@ except ImportError: CYTHON_INSTALLED = False +# ActiveState - Gather needed Environment Vars +def get_as_deps_dir(): + as_deps_dir = os.getenv('AS_DEPENDENCIES_DIR') + if sys.platform == 'win32': + return as_deps_dir + else: + return os.path.join(as_deps_dir, "usr") + + +def get_as_lib_dir(): + return os.path.join(get_as_deps_dir(), "lib") + + +def get_as_inc_dir(): + return os.path.join(get_as_deps_dir(), "include") + + EXT_MODULES = ["lxml.etree", "lxml.objectify"] COMPILED_MODULES = [ "lxml.builder", @@ -115,13 +133,22 @@ def ext_modules(static_include_dirs, static_library_dirs, if not check_build_dependencies(): raise RuntimeError("Dependency missing") + # ActiveState - This should ensure that the Headers and Libraries are present. + LIBXML2_INCLUDE_PATH = os.path.join(get_as_inc_dir(), "libxml2") + LIBXSLT_INCLUDE_PATH = get_as_inc_dir() + AS_LIBRARY_PATH = get_as_lib_dir() + base_dir = get_base_dir() _include_dirs = _prefer_reldirs( base_dir, include_dirs(static_include_dirs) + [ SOURCE_PATH, INCLUDE_PACKAGE_PATH, + LIBXSLT_INCLUDE_PATH, + LIBXML2_INCLUDE_PATH, ]) - _library_dirs = _prefer_reldirs(base_dir, library_dirs(static_library_dirs)) + _library_dirs = _prefer_reldirs( + base_dir, library_dirs(static_library_dirs) + ) + [AS_LIBRARY_PATH] _cflags = cflags(static_cflags) _ldflags = ['-isysroot', get_xcode_isysroot()] if sys.platform == 'darwin' else None _define_macros = define_macros() @@ -304,7 +331,7 @@ def libraries(): libs = ['libxslt', 'libexslt', 'libxml2', 'iconv'] if OPTION_STATIC: libs = ['%s_a' % lib for lib in libs] - libs.extend(['zlib', 'WS2_32']) + libs.extend(['z', 'WS2_32']) elif OPTION_STATIC: libs = standard_libs else: @@ -425,7 +452,33 @@ def get_library_version(prog, libname=None): XML2_CONFIG = None XSLT_CONFIG = None + +def get_win_library_versions(): + # ActiveState - On Windows, the original LXML Windows build downloads the latest + # versions of libxml2 and libxslt and builds them locally. This is bad for + # reproducibility. We disable this mechanism and use our libraries, but without + # reliable *_config.sh scripts, we get the version information from + # LIBXML_DOTTED_VERSION in xmlversion.h and LIBXSLT_DOTTED_VERSION from xsltconfig.h. + + xml2_file = os.path.join(get_as_inc_dir(), "libxml2", "libxml", "xmlversion.h") + xslt_file = os.path.join(get_as_inc_dir(), "libxslt", "xsltconfig.h") + xml2_version = get_dotfile_version("LIBXML", xml2_file) + xslt_version = get_dotfile_version("LIBXSLT", xslt_file) + return xml2_version, xslt_version + + +def get_dotfile_version(library, config_path): + with open(config_path, "r") as fh: + for line in fh: + m = re.search("#define {0}_DOTTED_VERSION \"([0-9.]*)\"".format(library), line) + if m is not None: + return m.group(1) + + def get_library_versions(): + if sys.platform.startswith('win'): + return get_win_library_versions() + global XML2_CONFIG, XSLT_CONFIG # Pre-built libraries @@ -560,7 +613,7 @@ def print_deprecated_option(name, new_name): print("WARN: Option '%s' is deprecated. Use '%s' instead." % (name, new_name)) -staticbuild = bool(os.environ.get('STATICBUILD', '')) +staticbuild = False # pick up any commandline options and/or env variables OPTION_WITHOUT_OBJECTIFY = has_option('without-objectify') OPTION_WITH_UNICODE_STRINGS = has_option('with-unicode-strings') @@ -574,15 +627,16 @@ def print_deprecated_option(name, new_name): OPTION_WITH_CLINES = has_option('with-clines') if OPTION_WITHOUT_CYTHON: CYTHON_INSTALLED = False -OPTION_STATIC = staticbuild or has_option('static') +OPTION_STATIC = False OPTION_DEBUG_GCC = has_option('debug-gcc') OPTION_SHOW_WARNINGS = has_option('warnings') OPTION_AUTO_RPATH = has_option('auto-rpath') OPTION_BUILD_LIBXML2XSLT = staticbuild or has_option('static-deps') if OPTION_BUILD_LIBXML2XSLT: OPTION_STATIC = True -OPTION_WITH_XML2_CONFIG = option_value('with-xml2-config') or option_value('xml2-config', deprecated_for='with-xml2-config') -OPTION_WITH_XSLT_CONFIG = option_value('with-xslt-config') or option_value('xslt-config', deprecated_for='with-xslt-config') +# ActiveState - look for libxml2 and libxslt in our dependencies dir. +OPTION_WITH_XML2_CONFIG = get_as_lib_dir() +OPTION_WITH_XSLT_CONFIG = get_as_lib_dir() OPTION_LIBXML2_VERSION = option_value('libxml2-version') OPTION_LIBXSLT_VERSION = option_value('libxslt-version') OPTION_LIBICONV_VERSION = option_value('libiconv-version') diff --git a/versioninfo.py b/versioninfo.py index 34c273f13..ed50ea557 100644 --- a/versioninfo.py +++ b/versioninfo.py @@ -78,4 +78,4 @@ def create_version_h(): def get_base_dir(): - return os.path.abspath(os.path.dirname(sys.argv[0])) + return os.path.abspath(os.path.dirname(__file__)) From 1bb4e280961d0b9eef7a901239adb141479ce3db Mon Sep 17 00:00:00 2001 From: martinPavesio Date: Thu, 28 May 2026 20:13:32 -0300 Subject: [PATCH 2/3] Backport CVE-2026-41066: set resolve_entities='internal' in lxml Cython sources CVE-2026-41066 (GHSA-vfmq-68hx-4jfw): iterparse() and ETCompatXMLParser() defaulted to resolve_entities=True, allowing XXE injection from untrusted XML. Upstream fix: lxml 6.1.0, commit ab431ea0 (LP#2146291). - iterparse.pxi: resolve_entities default True -> 'internal' - parser.pxi: ETCompatXMLParser resolve_entities default True -> 'internal' - proxy.pxi: cast &c_attribute.defaultValue to at the _fixThreadDictPtr call site to fix -Werror=incompatible-pointer-types on GCC 14 / libxml2 2.15.x (Rocky 9 builder) Cython 3 will regenerate etree.c from these sources at build time. --- src/lxml/iterparse.pxi | 4 ++-- src/lxml/parser.pxi | 4 ++-- src/lxml/proxy.pxi | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi index 2758b14d5..58b70d31f 100644 --- a/src/lxml/iterparse.pxi +++ b/src/lxml/iterparse.pxi @@ -44,7 +44,7 @@ cdef class iterparse: - remove_pis: discard processing instructions - strip_cdata: replace CDATA sections by normal text content (default: True) - compact: safe memory for short text content (default: True) - - resolve_entities: replace entities by their text value (default: True) + - resolve_entities: replace entities by their text value (default: 'internal' only) - huge_tree: disable security restrictions and support very deep trees and very long text content (only affects libxml2 2.7+) - html: parse input as HTML (default: XML) @@ -67,7 +67,7 @@ cdef class iterparse: def __init__(self, source, events=(u"end",), *, tag=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, remove_blank_text=False, - compact=True, resolve_entities=True, remove_comments=False, + compact=True, resolve_entities='internal', remove_comments=False, remove_pis=False, strip_cdata=True, encoding=None, html=False, recover=None, huge_tree=False, collect_ids=True, XMLSchema schema=None): diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi index e9f4bec7c..f078b27d3 100644 --- a/src/lxml/parser.pxi +++ b/src/lxml/parser.pxi @@ -1686,7 +1686,7 @@ cdef class ETCompatXMLParser(XMLParser): u"""ETCompatXMLParser(self, encoding=None, attribute_defaults=False, \ dtd_validation=False, load_dtd=False, no_network=True, \ ns_clean=False, recover=False, schema=None, \ - huge_tree=False, remove_blank_text=False, resolve_entities=True, \ + huge_tree=False, remove_blank_text=False, resolve_entities='internal', \ remove_comments=True, remove_pis=True, strip_cdata=True, \ target=None, compact=True) @@ -1700,7 +1700,7 @@ cdef class ETCompatXMLParser(XMLParser): def __init__(self, *, encoding=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, schema=None, - huge_tree=False, remove_blank_text=False, resolve_entities=True, + huge_tree=False, remove_blank_text=False, resolve_entities='internal', remove_comments=True, remove_pis=True, strip_cdata=True, target=None, compact=True): XMLParser.__init__(self, diff --git a/src/lxml/proxy.pxi b/src/lxml/proxy.pxi index 16148ed1d..5f10f2f00 100644 --- a/src/lxml/proxy.pxi +++ b/src/lxml/proxy.pxi @@ -574,7 +574,7 @@ cdef void fixThreadDictNamesForDtd(tree.xmlDtd* c_dtd, _fixThreadDictPtr(&c_element.content.prefix, c_src_dict, c_dict) c_attribute = c_element.attributes while c_attribute: - _fixThreadDictPtr(&c_attribute.defaultValue, c_src_dict, c_dict) + _fixThreadDictPtr(&c_attribute.defaultValue, c_src_dict, c_dict) _fixThreadDictPtr(&c_attribute.name, c_src_dict, c_dict) _fixThreadDictPtr(&c_attribute.prefix, c_src_dict, c_dict) _fixThreadDictPtr(&c_attribute.elem, c_src_dict, c_dict) From 40095083958321b5bd9a97ccafa43a7bb6c54d5b Mon Sep 17 00:00:00 2001 From: martinPavesio Date: Thu, 28 May 2026 20:13:48 -0300 Subject: [PATCH 3/3] Release 5.0.2+security.1 Version string: 5.0.2+security.1 (PEP 440 local identifier) Git tag: 5.0.2.1 CHANGES.txt updated with security release notes. --- CHANGES.txt | 28 ++++++++++++++++++++++++++++ src/lxml/__init__.py | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index c9d8a291d..3bc6c92fa 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -2,6 +2,34 @@ lxml changelog ============== +5.0.2+security.1 (2026-05-28) +============================== + +Security +-------- + +* CVE-2026-41066: ``iterparse()`` and ``ETCompatXMLParser()`` now default to + ``resolve_entities='internal'`` instead of ``True``, preventing XXE injection + from untrusted XML input. Fix applied in Cython source (``iterparse.pxi``, + ``parser.pxi``). (GHSA-vfmq-68hx-4jfw) + +Bug fixes +--------- + +* Fixed ``-Werror=incompatible-pointer-types`` compiler error on Rocky 9 / GCC 14 + with libxml2 2.15.x: added explicit Cython cast for ``c_attribute.defaultValue`` + in ``proxy.pxi``. + +* Fixed Windows static linking: changed ``'zlib'`` to ``'z'`` in + ``setupinfo.py`` ``libs.extend()`` call. + +* Fixed ``versioninfo.py`` to use ``__file__`` instead of ``sys.argv[0]`` + for base directory detection when invoked via the AS builder. + +* Fixed ``setupinfo.py`` f-string in ``get_dotfile_version()`` to use + ``str.format()`` for Python 2.7 compatibility. + + 5.0.2 (2024-03-28) ================== diff --git a/src/lxml/__init__.py b/src/lxml/__init__.py index 27614df3a..13756ca4c 100644 --- a/src/lxml/__init__.py +++ b/src/lxml/__init__.py @@ -1,6 +1,6 @@ # this is a package -__version__ = "5.0.2" +__version__ = "5.0.2+security.1" def get_include():