From 83d0d473d6d5a8d59875b0af581d27267e804ba9 Mon Sep 17 00:00:00 2001 From: Ross Burton Date: Tue, 29 Apr 2025 16:29:30 +0100 Subject: python3: backport the full fix for importlib scanning invalid distributions Even with our fixes in deterministic_imports.patch the importlib.metadata package scan was still returning Distribution objects for empty directories. This interacts badly with rebuilds when recipes are changing as when a recipe is removed from the sysroot directories are not removed[1]. In particular this breaks python3-meson-python-native rebuilds when Meson upgrades from 1.7 to 1.8: the site-packages directory has an empty meson-1.7.dist-info/ and populated meson-1.8.dist-info/. Whilst it's deterministic to return the empty 1.7 first, this breaks pypa/build as it looks through the distributions in order. We had discussed this with upstream previously and there's a more comprehensive fix upstream (actually in importlib_metadata, not cpython) which ensures that valid distribution objects are listed first. So we can drop our patch and replace it with a backport to fix these rebuilds. [1] oe-core 4f94d929639 ("sstate/staging: Handle directory creation race issue") (From OE-Core rev: 73de8daa6293403f5b92d313af32882c47bce396) Signed-off-by: Ross Burton Signed-off-by: Richard Purdie --- .../python/python3/valid-dists.patch | 160 +++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 meta/recipes-devtools/python/python3/valid-dists.patch (limited to 'meta/recipes-devtools/python/python3/valid-dists.patch') diff --git a/meta/recipes-devtools/python/python3/valid-dists.patch b/meta/recipes-devtools/python/python3/valid-dists.patch new file mode 100644 index 0000000000..1b2c078c21 --- /dev/null +++ b/meta/recipes-devtools/python/python3/valid-dists.patch @@ -0,0 +1,160 @@ +From a65c29adc027b3615154cab73aaedd58a6aa23da Mon Sep 17 00:00:00 2001 +From: "Jason R. Coombs" +Date: Tue, 23 Jul 2024 08:36:16 -0400 +Subject: [PATCH] Prioritize valid dists to invalid dists when retrieving by + name. + +Closes python/importlib_metadata#489 + +Upstream-Status: Backport [https://github.com/python/importlib_metadata/commit/a65c29adc027b3615154cab73aaedd58a6aa23da] +Signed-off-by: Ross Burton + +diff --git i/Lib/importlib/metadata/__init__.py w/Lib/importlib/metadata/__init__.py +index 8ce62dd864f..085378caabc 100644 +--- i/Lib/importlib/metadata/__init__.py ++++ w/Lib/importlib/metadata/__init__.py +@@ -21,7 +21,7 @@ + from . import _meta + from ._collections import FreezableDefaultDict, Pair + from ._functools import method_cache, pass_none +-from ._itertools import always_iterable, unique_everseen ++from ._itertools import always_iterable, bucket, unique_everseen + from ._meta import PackageMetadata, SimplePath + + from contextlib import suppress +@@ -404,7 +404,7 @@ def from_name(cls, name: str) -> Distribution: + if not name: + raise ValueError("A distribution name is required.") + try: +- return next(iter(cls.discover(name=name))) ++ return next(iter(cls._prefer_valid(cls.discover(name=name)))) + except StopIteration: + raise PackageNotFoundError(name) + +@@ -428,6 +428,16 @@ def discover( + resolver(context) for resolver in cls._discover_resolvers() + ) + ++ @staticmethod ++ def _prefer_valid(dists: Iterable[Distribution]) -> Iterable[Distribution]: ++ """ ++ Prefer (move to the front) distributions that have metadata. ++ ++ Ref python/importlib_resources#489. ++ """ ++ buckets = bucket(dists, lambda dist: bool(dist.metadata)) ++ return itertools.chain(buckets[True], buckets[False]) ++ + @staticmethod + def at(path: str | os.PathLike[str]) -> Distribution: + """Return a Distribution for the indicated metadata path. +diff --git i/Lib/importlib/metadata/_itertools.py w/Lib/importlib/metadata/_itertools.py +index d4ca9b9140e..79d37198ce7 100644 +--- i/Lib/importlib/metadata/_itertools.py ++++ w/Lib/importlib/metadata/_itertools.py +@@ -1,3 +1,4 @@ ++from collections import defaultdict, deque + from itertools import filterfalse + + +@@ -71,3 +72,100 @@ def always_iterable(obj, base_type=(str, bytes)): + return iter(obj) + except TypeError: + return iter((obj,)) ++ ++ ++# Copied from more_itertools 10.3 ++class bucket: ++ """Wrap *iterable* and return an object that buckets the iterable into ++ child iterables based on a *key* function. ++ ++ >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3'] ++ >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character ++ >>> sorted(list(s)) # Get the keys ++ ['a', 'b', 'c'] ++ >>> a_iterable = s['a'] ++ >>> next(a_iterable) ++ 'a1' ++ >>> next(a_iterable) ++ 'a2' ++ >>> list(s['b']) ++ ['b1', 'b2', 'b3'] ++ ++ The original iterable will be advanced and its items will be cached until ++ they are used by the child iterables. This may require significant storage. ++ ++ By default, attempting to select a bucket to which no items belong will ++ exhaust the iterable and cache all values. ++ If you specify a *validator* function, selected buckets will instead be ++ checked against it. ++ ++ >>> from itertools import count ++ >>> it = count(1, 2) # Infinite sequence of odd numbers ++ >>> key = lambda x: x % 10 # Bucket by last digit ++ >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only ++ >>> s = bucket(it, key=key, validator=validator) ++ >>> 2 in s ++ False ++ >>> list(s[2]) ++ [] ++ ++ """ ++ ++ def __init__(self, iterable, key, validator=None): ++ self._it = iter(iterable) ++ self._key = key ++ self._cache = defaultdict(deque) ++ self._validator = validator or (lambda x: True) ++ ++ def __contains__(self, value): ++ if not self._validator(value): ++ return False ++ ++ try: ++ item = next(self[value]) ++ except StopIteration: ++ return False ++ else: ++ self._cache[value].appendleft(item) ++ ++ return True ++ ++ def _get_values(self, value): ++ """ ++ Helper to yield items from the parent iterator that match *value*. ++ Items that don't match are stored in the local cache as they ++ are encountered. ++ """ ++ while True: ++ # If we've cached some items that match the target value, emit ++ # the first one and evict it from the cache. ++ if self._cache[value]: ++ yield self._cache[value].popleft() ++ # Otherwise we need to advance the parent iterator to search for ++ # a matching item, caching the rest. ++ else: ++ while True: ++ try: ++ item = next(self._it) ++ except StopIteration: ++ return ++ item_value = self._key(item) ++ if item_value == value: ++ yield item ++ break ++ elif self._validator(item_value): ++ self._cache[item_value].append(item) ++ ++ def __iter__(self): ++ for item in self._it: ++ item_value = self._key(item) ++ if self._validator(item_value): ++ self._cache[item_value].append(item) ++ ++ yield from self._cache.keys() ++ ++ def __getitem__(self, value): ++ if not self._validator(value): ++ return iter(()) ++ ++ return self._get_values(value) -- cgit v1.2.3-54-g00ecf