diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..dbb7756454d10140b6f3aeea2f80f3ef1206b847 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +nltk-3.8.1.tar.gz diff --git a/nltk-3.8.1-CVE-2025-14009.patch b/nltk-3.8.1-CVE-2025-14009.patch new file mode 100644 index 0000000000000000000000000000000000000000..73de053c275508d03b63788bb1f574add881b600 --- /dev/null +++ b/nltk-3.8.1-CVE-2025-14009.patch @@ -0,0 +1,303 @@ +diff --git a/nltk/downloader.py b/nltk/downloader.py +index e5542be..98d2290 100644 +--- a/nltk/downloader.py ++++ b/nltk/downloader.py +@@ -2229,15 +2229,27 @@ def _md5_hexdigest(fp): + # this when we build the index, anyway. + def unzip(filename, root, verbose=True): + """ +- Extract the contents of the zip file ``filename`` into the +- directory ``root``. ++ Securely extract the contents of the zip file ``filename`` into ++ the directory ``root``. Raises Exception(ErrorMessage.message) ++ on failure (keeps previous behavior). + """ + for message in _unzip_iter(filename, root, verbose): + if isinstance(message, ErrorMessage): +- raise Exception(message) ++ raise Exception(message.message) + + +-def _unzip_iter(filename, root, verbose=True): ++def _unzip_iter( ++ filename, root, verbose=True, max_unzipped_bytes=1 * 1024 * 1024 * 1024 ++): ++ """ ++ Secure replacement for ZipFile.extractall that: ++ - defends against path traversal, absolute paths, drive letters ++ - blocks symlinks inside the archive ++ - enforces a total-uncompressed-bytes limit (simple zip-bomb protection) ++ - extracts into a temp dir and then moves contents into place ++ ++ Yields ProgressMessage(progress:int) and ErrorMessage instances. ++ """ + if verbose: + sys.stdout.write("Unzipping %s" % os.path.split(filename)[1]) + sys.stdout.flush() +@@ -2251,10 +2263,238 @@ def _unzip_iter(filename, root, verbose=True): + yield ErrorMessage(filename, e) + return + +- zf.extractall(root) ++ # Ensure root exists (create if necessary) ++ try: ++ os.makedirs(root, exist_ok=True) ++ except Exception as e: ++ zf.close() ++ yield ErrorMessage(filename, f"Could not create extraction root {root!r}: {e}") ++ return + +- if verbose: +- print() ++ root_real = os.path.realpath(root) ++ tmpdir = None ++ total_unzipped = 0 ++ ++ try: ++ # compute total uncompressed size for progress reporting (best-effort) ++ try: ++ total_uncompressed = sum(mi.file_size for mi in zf.infolist()) ++ if total_uncompressed <= 0: ++ total_uncompressed = None ++ except Exception: ++ total_uncompressed = None ++ ++ # create a temp extraction dir next to final root for atomic move ++ parent = os.path.dirname(root_real) or "." ++ tmpdir = os.path.join(parent, f".tmp_nltk_unzip_{int(time.time() * 1000)}") ++ if os.path.exists(tmpdir): ++ shutil.rmtree(tmpdir) ++ os.makedirs(tmpdir, exist_ok=False) ++ ++ for member in zf.infolist(): ++ name = member.filename ++ ++ # skip empty or non-string names ++ if not isinstance(name, str) or name.strip() == "": ++ continue ++ ++ # Normalize path separators ++ norm_name = name.replace("\\", "/") ++ ++ # Reject absolute paths and drive-letter components ++ if norm_name.startswith("/") or norm_name.startswith("\\"): ++ zf.close() ++ try: ++ shutil.rmtree(tmpdir) ++ except Exception: ++ pass ++ yield ErrorMessage( ++ filename, f"Unsafe zip entry blocked (absolute path): {name!r}" ++ ) ++ return ++ if ":" in norm_name.split("/")[0]: ++ zf.close() ++ try: ++ shutil.rmtree(tmpdir) ++ except Exception: ++ pass ++ yield ErrorMessage( ++ filename, f"Unsafe zip entry blocked (drive spec): {name!r}" ++ ) ++ return ++ ++ # Reject traversal segments ++ parts = [p for p in norm_name.split("/") if p and p != "."] ++ if ".." in parts: ++ zf.close() ++ try: ++ shutil.rmtree(tmpdir) ++ except Exception: ++ pass ++ yield ErrorMessage( ++ filename, f"Unsafe zip entry blocked (path traversal): {name!r}" ++ ) ++ return ++ ++ # Build the target path under tmpdir ++ target_path = os.path.join(tmpdir, *parts) ++ target_dir = os.path.dirname(target_path) ++ if target_dir: ++ os.makedirs(target_dir, exist_ok=True) ++ ++ # Detect symlink entries (best-effort; unix external_attr) ++ is_symlink = False ++ try: ++ if hasattr(member, "external_attr"): ++ mode = (member.external_attr >> 16) & 0xFFFF ++ # 0o120000 is symlink in unix file types ++ if (mode & 0o170000) == 0o120000: ++ is_symlink = True ++ except Exception: ++ is_symlink = False ++ ++ if is_symlink: ++ zf.close() ++ try: ++ shutil.rmtree(tmpdir) ++ except Exception: ++ pass ++ yield ErrorMessage( ++ filename, f"Unsafe zip entry blocked (symlink): {name!r}" ++ ) ++ return ++ ++ # Extract member safely streaming (don't load whole file into memory) ++ try: ++ if member.is_dir(): ++ os.makedirs(target_path, exist_ok=True) ++ else: ++ with zf.open(member, "r") as src, open(target_path, "wb") as dst: ++ while True: ++ chunk = src.read(1024 * 16) ++ if not chunk: ++ break ++ dst.write(chunk) ++ total_unzipped += len(chunk) ++ if total_unzipped > max_unzipped_bytes: ++ raise ValueError( ++ "Exceeded maximum allowed unzipped bytes" ++ ) ++ ++ # try to preserve permission bits if present ++ try: ++ if hasattr(member, "external_attr"): ++ mode = (member.external_attr >> 16) & 0xFFFF ++ if mode: ++ os.chmod(target_path, mode) ++ except Exception: ++ # best-effort only ++ pass ++ ++ # Emit progress as a ProgressMessage (0-100) ++ if total_uncompressed: ++ percent = int( ++ min(100, (total_unzipped * 100) // total_uncompressed) ++ ) ++ yield ProgressMessage(percent) ++ else: ++ # unknown total size: emit small progress nudges ++ yield ProgressMessage(0) ++ ++ except Exception as e: ++ zf.close() ++ try: ++ shutil.rmtree(tmpdir) ++ except Exception: ++ pass ++ yield ErrorMessage(filename, f"Error extracting member {name!r}: {e}") ++ return ++ ++ # Verify that nothing in tmpdir would escape the intended root when moved ++ for walk_root, dirs, files in os.walk(tmpdir): ++ for entry in dirs + files: ++ src = os.path.join(walk_root, entry) ++ rel = os.path.relpath(src, tmpdir) ++ dest = os.path.join(root, rel) ++ dest_real = os.path.realpath(dest) ++ if not dest_real.startswith(root_real): ++ zf.close() ++ try: ++ shutil.rmtree(tmpdir) ++ except Exception: ++ pass ++ yield ErrorMessage( ++ filename, f"Unsafe extraction path detected for {rel!r}" ++ ) ++ return ++ ++ # Move/merge tmpdir contents into final root atomically-ish ++ for entry in os.listdir(tmpdir): ++ src_entry = os.path.join(tmpdir, entry) ++ dst_entry = os.path.join(root, entry) ++ if os.path.exists(dst_entry): ++ # If both are dirs, merge; otherwise replace ++ if os.path.isdir(dst_entry) and os.path.isdir(src_entry): ++ for top, _, names in os.walk(src_entry): ++ rel = os.path.relpath(top, src_entry) ++ target_top = ( ++ os.path.join(dst_entry, rel) if rel != "." else dst_entry ++ ) ++ os.makedirs(target_top, exist_ok=True) ++ for name in names: ++ s = os.path.join(top, name) ++ d = os.path.join(target_top, name) ++ if os.path.exists(d): ++ try: ++ if os.path.isdir(d): ++ shutil.rmtree(d) ++ else: ++ os.remove(d) ++ except Exception: ++ pass ++ shutil.move(s, d) ++ try: ++ shutil.rmtree(src_entry) ++ except Exception: ++ pass ++ else: ++ try: ++ if os.path.isdir(dst_entry): ++ shutil.rmtree(dst_entry) ++ else: ++ os.remove(dst_entry) ++ except Exception: ++ pass ++ shutil.move(src_entry, dst_entry) ++ else: ++ shutil.move(src_entry, dst_entry) ++ ++ # final progress = 100 ++ yield ProgressMessage(100) ++ ++ # Clean up tempdir ++ try: ++ if os.path.exists(tmpdir): ++ shutil.rmtree(tmpdir) ++ except Exception: ++ pass ++ ++ zf.close() ++ if verbose: ++ print() ++ ++ except Exception as e: ++ try: ++ zf.close() ++ except Exception: ++ pass ++ try: ++ if tmpdir and os.path.exists(tmpdir): ++ shutil.rmtree(tmpdir) ++ except Exception: ++ pass ++ yield ErrorMessage(filename, f"Unexpected error extracting zip: {e}") ++ return + + + ###################################################################### +diff --git a/nltk/tag/mapping.py b/nltk/tag/mapping.py +index c09467b..6eae8b3 100644 +--- a/nltk/tag/mapping.py ++++ b/nltk/tag/mapping.py +@@ -32,7 +32,7 @@ X - other: foreign words, typos, abbreviations + from collections import defaultdict + from os.path import join + +-from nltk.data import load ++from nltk.data import load, normalize_resource_url + + _UNIVERSAL_DATA = "taggers/universal_tagset" + _UNIVERSAL_TAGS = ( +@@ -56,7 +56,10 @@ _MAPPINGS = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: "UNK"))) + + + def _load_universal_map(fileid): +- contents = load(join(_UNIVERSAL_DATA, fileid + ".map"), format="text") ++ resource = normalize_resource_url( ++ f"nltk:{_UNIVERSAL_DATA.rstrip('/')}/{fileid.lstrip('/')}.map" ++ ) ++ contents = load(resource, format="text") + + # When mapping to the Universal Tagset, + # map unknown inputs to 'X' not 'UNK' diff --git a/python-nltk.spec b/python-nltk.spec index 826fe3264894be2cab6a3c1238f3c5ee55c7e871..9f1d0d19b5a5d2f1ac201abb7984a0cfa36405dd 100644 --- a/python-nltk.spec +++ b/python-nltk.spec @@ -3,11 +3,14 @@ Summary: Natural Language Toolkit Name: python-nltk Version: 3.8.1 -Release: 8%{?dist} +Release: 9%{?dist} License: ASL 2.0 and GPLv2+ with exceptions URL: http://www.nltk.org/ Source0: https://github.com/nltk/nltk/archive/%{version}.tar.gz#/%{mod_name}-%{version}.tar.gz +# CVE-2025-14009: Zip-Slip vulnerability fix +Patch0001: nltk-3.8.1-CVE-2025-14009.patch + BuildArch: noarch %description @@ -33,7 +36,7 @@ research projects. This package provides the Python 3 build of NLTK. %prep -%autosetup -n %{mod_name}-%{version} +%autosetup -p1 -n %{mod_name}-%{version} for f in $(grep -Frl '/usr/bin/env' %{mod_name}); do sed -i -e "s|^#!/usr/bin/env python$|#!%{__python3}|g" $f @@ -59,6 +62,10 @@ done %changelog +* Wed Mar 11 2026 ZoeDong - 3.8.1-9 +- [Type] security +- [DESC] Fix CVE-2025-14009 vulnerability + * Wed Jun 11 2025 bbrucezhang - 3.8.1-8 - Rebuilt for loongarch64