diff options
| author | Bruce Ashfield <bruce.ashfield@gmail.com> | 2025-12-04 22:36:12 +0000 |
|---|---|---|
| committer | Bruce Ashfield <bruce.ashfield@gmail.com> | 2025-12-08 20:57:44 -0500 |
| commit | a303bf16ffd747c50c95cbe385407ba8b0122cec (patch) | |
| tree | ddb26a7945e746ce8206fc65b0a971ed74dc812b | |
| parent | 9f40ce9b277a677ad3cddd8bf1c1d15fbd035251 (diff) | |
| download | meta-virtualization-a303bf16ffd747c50c95cbe385407ba8b0122cec.tar.gz | |
scripts: add oe-go-mod-fetcher for Go module VCS resolution
Add the oe-go-mod-fetcher.py tool and supporting files for resolving
Go module dependencies via git repositories instead of module proxies.
oe-go-mod-fetcher.py:
- Parses go.mod and go.sum to identify required modules
- Resolves module paths to git repositories (handles vanity URLs)
- Maps module versions to git commits
- Generates SRC_URI entries for bitbake fetcher
- Creates go-mod-git.inc and go-mod-cache.inc files
- Supports monorepo detection and nested module handling
- Caches resolution results for performance
extract-discovered-modules.py:
- Helper script to extract module information from discovery cache
- Used by go-mod-discovery.bbclass during build
Also adds .gitignore to exclude runtime caches from version control.
Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
| -rw-r--r-- | scripts/.gitignore | 15 | ||||
| -rw-r--r-- | scripts/data/manual-overrides.json | 8 | ||||
| -rwxr-xr-x | scripts/extract-discovered-modules.py | 491 | ||||
| -rwxr-xr-x | scripts/oe-go-mod-fetcher.py | 4580 |
4 files changed, 5094 insertions, 0 deletions
diff --git a/scripts/.gitignore b/scripts/.gitignore new file mode 100644 index 00000000..57fdcfc0 --- /dev/null +++ b/scripts/.gitignore | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | # Runtime caches generated by oe-go-mod-fetcher.py | ||
| 2 | data/module-cache.json | ||
| 3 | data/vanity-url-cache.json | ||
| 4 | data/ls-remote-cache.json | ||
| 5 | data/verify-cache.json | ||
| 6 | data/.verify/ | ||
| 7 | |||
| 8 | # Python bytecode | ||
| 9 | __pycache__/ | ||
| 10 | *.pyc | ||
| 11 | |||
| 12 | # Editor/IDE files | ||
| 13 | *.swp | ||
| 14 | *~ | ||
| 15 | .cache/ | ||
diff --git a/scripts/data/manual-overrides.json b/scripts/data/manual-overrides.json new file mode 100644 index 00000000..5657c40c --- /dev/null +++ b/scripts/data/manual-overrides.json | |||
| @@ -0,0 +1,8 @@ | |||
| 1 | { | ||
| 2 | "_comment": "Git-tracked repository overrides for modules where automatic discovery fails.", | ||
| 3 | "_format": "module/path or module/path@version -> repository URL", | ||
| 4 | "_example": { | ||
| 5 | "example.com/broken-vanity": "https://github.com/org/actual-repo", | ||
| 6 | "example.com/versioned@v1.2.3": "https://github.com/org/specific-version-repo" | ||
| 7 | } | ||
| 8 | } | ||
diff --git a/scripts/extract-discovered-modules.py b/scripts/extract-discovered-modules.py new file mode 100755 index 00000000..1cfca6ad --- /dev/null +++ b/scripts/extract-discovered-modules.py | |||
| @@ -0,0 +1,491 @@ | |||
| 1 | #!/usr/bin/env python3 | ||
| 2 | # SPDX-License-Identifier: GPL-2.0-only | ||
| 3 | # | ||
| 4 | # go-dep processor | ||
| 5 | # | ||
| 6 | # Copyright (C) 2025 Bruce Ashfield | ||
| 7 | # | ||
| 8 | # This program is free software; you can redistribute it and/or modify | ||
| 9 | # it under the terms of the GNU General Public License version 2 as | ||
| 10 | # published by the Free Software Foundation. | ||
| 11 | # | ||
| 12 | # This program is distributed in the hope that it will be useful, | ||
| 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | # GNU General Public License for more details. | ||
| 16 | # | ||
| 17 | # You should have received a copy of the GNU General Public License along | ||
| 18 | # with this program; if not, write to the Free Software Foundation, Inc., | ||
| 19 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 20 | |||
| 21 | """ | ||
| 22 | Extract complete module metadata from BitBake Go discovery build cache. | ||
| 23 | |||
| 24 | This script walks a GOMODCACHE directory (from BitBake discovery build) and | ||
| 25 | extracts all module metadata from .info files, including VCS information. | ||
| 26 | |||
| 27 | Usage: | ||
| 28 | extract-discovered-modules.py --gomodcache /path/to/cache --output modules.json | ||
| 29 | |||
| 30 | The script creates: | ||
| 31 | - modules.json: Complete metadata with VCS URLs, commits, subdirs, timestamps | ||
| 32 | - modules.txt: Simple module@version list | ||
| 33 | |||
| 34 | This provides 100% accurate module discovery for BitBake recipe generation. | ||
| 35 | """ | ||
| 36 | |||
| 37 | import argparse | ||
| 38 | import json | ||
| 39 | import os | ||
| 40 | import re | ||
| 41 | import shutil | ||
| 42 | import subprocess | ||
| 43 | import sys | ||
| 44 | import tempfile | ||
| 45 | import urllib.parse | ||
| 46 | from pathlib import Path | ||
| 47 | |||
| 48 | |||
| 49 | def git_ls_remote(url: str, ref: str) -> str: | ||
| 50 | """ | ||
| 51 | Query a git repository for a ref and return the commit hash. | ||
| 52 | |||
| 53 | For tags, also tries dereferenced form (^{}) to handle annotated tags. | ||
| 54 | """ | ||
| 55 | try: | ||
| 56 | # Try dereferenced form first (handles annotated tags) | ||
| 57 | refs_to_try = [f"{ref}^{{}}", ref] if ref.startswith("refs/tags/") else [ref] | ||
| 58 | |||
| 59 | for query_ref in refs_to_try: | ||
| 60 | result = subprocess.run( | ||
| 61 | ['git', 'ls-remote', url, query_ref], | ||
| 62 | capture_output=True, | ||
| 63 | text=True, | ||
| 64 | timeout=30 | ||
| 65 | ) | ||
| 66 | if result.returncode == 0 and result.stdout.strip(): | ||
| 67 | # Parse: "hash<tab>ref" | ||
| 68 | line = result.stdout.strip().split('\n')[0] | ||
| 69 | parts = line.split('\t') | ||
| 70 | if len(parts) >= 1 and len(parts[0]) == 40: | ||
| 71 | return parts[0] | ||
| 72 | except Exception: | ||
| 73 | pass | ||
| 74 | return '' | ||
| 75 | |||
| 76 | |||
| 77 | def resolve_short_hash(url: str, short_hash: str) -> str: | ||
| 78 | """ | ||
| 79 | Resolve a 12-char short hash to full 40-char hash. | ||
| 80 | |||
| 81 | Go pseudo-versions only contain 12 characters of the commit hash. | ||
| 82 | BitBake's git fetcher needs the full 40-char hash. | ||
| 83 | |||
| 84 | Strategy: Try GitHub API first (fast), then git ls-remote, then shallow clone. | ||
| 85 | """ | ||
| 86 | if len(short_hash) != 12: | ||
| 87 | return short_hash # Already full or invalid | ||
| 88 | |||
| 89 | # First try: GitHub API (fast - single HTTP request) | ||
| 90 | # Note: Rate limited to 60/hour without auth token | ||
| 91 | if 'github.com' in url: | ||
| 92 | try: | ||
| 93 | import urllib.request | ||
| 94 | repo_path = url.replace('https://github.com/', '').replace('.git', '') | ||
| 95 | api_url = f"https://api.github.com/repos/{repo_path}/commits/{short_hash}" | ||
| 96 | req = urllib.request.Request(api_url, headers={'User-Agent': 'oe-go-mod-fetcher'}) | ||
| 97 | with urllib.request.urlopen(req, timeout=10) as response: | ||
| 98 | data = json.loads(response.read().decode()) | ||
| 99 | if 'sha' in data and len(data['sha']) == 40: | ||
| 100 | return data['sha'] | ||
| 101 | except Exception: | ||
| 102 | pass # Rate limited or other error - try next method | ||
| 103 | |||
| 104 | # Second try: git ls-remote (downloads all refs, checks if any match) | ||
| 105 | # This works if the commit is a branch head or tag | ||
| 106 | try: | ||
| 107 | result = subprocess.run( | ||
| 108 | ['git', 'ls-remote', url], | ||
| 109 | capture_output=True, | ||
| 110 | text=True, | ||
| 111 | timeout=30 | ||
| 112 | ) | ||
| 113 | if result.returncode == 0: | ||
| 114 | for line in result.stdout.strip().split('\n'): | ||
| 115 | if line: | ||
| 116 | full_hash = line.split('\t')[0] | ||
| 117 | if full_hash.startswith(short_hash): | ||
| 118 | return full_hash | ||
| 119 | except Exception: | ||
| 120 | pass | ||
| 121 | |||
| 122 | # Third try: Shallow clone and rev-parse (slower but works for any commit) | ||
| 123 | try: | ||
| 124 | with tempfile.TemporaryDirectory(prefix='hash-resolve-') as tmpdir: | ||
| 125 | # Clone with minimal depth | ||
| 126 | clone_result = subprocess.run( | ||
| 127 | ['git', 'clone', '--bare', '--filter=blob:none', url, tmpdir + '/repo'], | ||
| 128 | capture_output=True, | ||
| 129 | timeout=120, | ||
| 130 | env={**os.environ, 'GIT_TERMINAL_PROMPT': '0'} | ||
| 131 | ) | ||
| 132 | if clone_result.returncode == 0: | ||
| 133 | # Use rev-parse to expand short hash | ||
| 134 | parse_result = subprocess.run( | ||
| 135 | ['git', 'rev-parse', short_hash], | ||
| 136 | cwd=tmpdir + '/repo', | ||
| 137 | capture_output=True, | ||
| 138 | text=True, | ||
| 139 | timeout=10 | ||
| 140 | ) | ||
| 141 | if parse_result.returncode == 0: | ||
| 142 | full_hash = parse_result.stdout.strip() | ||
| 143 | if len(full_hash) == 40: | ||
| 144 | return full_hash | ||
| 145 | except Exception: | ||
| 146 | pass | ||
| 147 | |||
| 148 | # Could not resolve - return original short hash | ||
| 149 | return short_hash | ||
| 150 | |||
| 151 | |||
| 152 | def derive_vcs_info(module_path, version): | ||
| 153 | """ | ||
| 154 | Derive VCS URL and commit info from module path and version. | ||
| 155 | |||
| 156 | This is used for modules where the Go proxy doesn't provide Origin metadata | ||
| 157 | (older modules cached before Go 1.18). | ||
| 158 | |||
| 159 | Returns: | ||
| 160 | dict with vcs_url, vcs_hash (if pseudo-version), vcs_ref, subdir | ||
| 161 | or None if cannot derive | ||
| 162 | """ | ||
| 163 | vcs_url = None | ||
| 164 | vcs_hash = '' | ||
| 165 | vcs_ref = '' | ||
| 166 | subpath = '' # FIX #32: Track subpath for multi-module repos (tag prefix) | ||
| 167 | |||
| 168 | # Derive URL from module path | ||
| 169 | if module_path.startswith('github.com/'): | ||
| 170 | # github.com/owner/repo or github.com/owner/repo/subpkg | ||
| 171 | parts = module_path.split('/') | ||
| 172 | if len(parts) >= 3: | ||
| 173 | vcs_url = f"https://github.com/{parts[1]}/{parts[2]}" | ||
| 174 | # FIX #32: Track subpath for multi-module repos (e.g., github.com/owner/repo/cmd/tool) | ||
| 175 | if len(parts) > 3: | ||
| 176 | subpath = '/'.join(parts[3:]) | ||
| 177 | |||
| 178 | elif module_path.startswith('gitlab.com/'): | ||
| 179 | parts = module_path.split('/') | ||
| 180 | if len(parts) >= 3: | ||
| 181 | vcs_url = f"https://gitlab.com/{parts[1]}/{parts[2]}" | ||
| 182 | |||
| 183 | elif module_path.startswith('bitbucket.org/'): | ||
| 184 | parts = module_path.split('/') | ||
| 185 | if len(parts) >= 3: | ||
| 186 | vcs_url = f"https://bitbucket.org/{parts[1]}/{parts[2]}" | ||
| 187 | |||
| 188 | elif module_path.startswith('gopkg.in/'): | ||
| 189 | # gopkg.in/yaml.v2 -> github.com/go-yaml/yaml | ||
| 190 | # gopkg.in/check.v1 -> github.com/go-check/check | ||
| 191 | # gopkg.in/pkg.v3 -> github.com/go-pkg/pkg (convention) | ||
| 192 | # gopkg.in/fsnotify.v1 -> github.com/fsnotify/fsnotify (no go- prefix) | ||
| 193 | match = re.match(r'gopkg\.in/([^/]+)\.v\d+', module_path) | ||
| 194 | if match: | ||
| 195 | pkg_name = match.group(1) | ||
| 196 | # Common mappings - some use go-* prefix, others don't | ||
| 197 | mappings = { | ||
| 198 | 'yaml': 'https://github.com/go-yaml/yaml', | ||
| 199 | 'check': 'https://github.com/go-check/check', | ||
| 200 | 'inf': 'https://github.com/go-inf/inf', | ||
| 201 | 'tomb': 'https://github.com/go-tomb/tomb', | ||
| 202 | 'fsnotify': 'https://github.com/fsnotify/fsnotify', # No go- prefix | ||
| 203 | } | ||
| 204 | vcs_url = mappings.get(pkg_name, f"https://github.com/go-{pkg_name}/{pkg_name}") | ||
| 205 | |||
| 206 | elif module_path.startswith('google.golang.org/'): | ||
| 207 | # google.golang.org vanity imports -> github.com/golang/* | ||
| 208 | # google.golang.org/appengine -> github.com/golang/appengine | ||
| 209 | # google.golang.org/protobuf -> github.com/protocolbuffers/protobuf-go (special case) | ||
| 210 | # google.golang.org/grpc -> github.com/grpc/grpc-go (special case) | ||
| 211 | # google.golang.org/genproto -> github.com/googleapis/go-genproto (special case) | ||
| 212 | # | ||
| 213 | # FIX #32: Handle submodules in multi-module repos | ||
| 214 | # google.golang.org/grpc/cmd/protoc-gen-go-grpc has tags like: | ||
| 215 | # cmd/protoc-gen-go-grpc/v1.1.0 (NOT v1.1.0) | ||
| 216 | # We need to track the subpath for tag prefix construction | ||
| 217 | parts = module_path.split('/') | ||
| 218 | if len(parts) >= 2: | ||
| 219 | pkg_name = parts[1] # First component after google.golang.org/ | ||
| 220 | mappings = { | ||
| 221 | 'protobuf': 'https://github.com/protocolbuffers/protobuf-go', | ||
| 222 | 'grpc': 'https://github.com/grpc/grpc-go', | ||
| 223 | 'genproto': 'https://github.com/googleapis/go-genproto', | ||
| 224 | 'api': 'https://github.com/googleapis/google-api-go-client', | ||
| 225 | } | ||
| 226 | vcs_url = mappings.get(pkg_name, f"https://github.com/golang/{pkg_name}") | ||
| 227 | # Track subpath for submodule tag construction (e.g., cmd/protoc-gen-go-grpc) | ||
| 228 | if len(parts) > 2: | ||
| 229 | subpath = '/'.join(parts[2:]) # Everything after google.golang.org/grpc/ | ||
| 230 | |||
| 231 | if not vcs_url: | ||
| 232 | return None | ||
| 233 | |||
| 234 | # Parse version for commit hash (pseudo-versions) | ||
| 235 | # Go pseudo-version formats: | ||
| 236 | # v0.0.0-20200815063812-42c35b437635 (no base version) | ||
| 237 | # v1.2.3-0.20200815063812-42c35b437635 (pre-release with "0." prefix) | ||
| 238 | # v1.2.4-0.20200815063812-42c35b437635 (post v1.2.3, pre v1.2.4) | ||
| 239 | # The key pattern: optional "0." then YYYYMMDDHHMMSS (14 digits) then 12-char commit hash | ||
| 240 | # Also handle +incompatible suffix | ||
| 241 | clean_version = version.replace('+incompatible', '') | ||
| 242 | |||
| 243 | # Try both pseudo-version formats: | ||
| 244 | # Format 1: -0.YYYYMMDDHHMMSS-HASH (with "0." prefix) | ||
| 245 | # Format 2: -YYYYMMDDHHMMSS-HASH (without prefix, typically v0.0.0-...) | ||
| 246 | pseudo_match = re.search(r'-(?:0\.)?(\d{14})-([0-9a-f]{12})$', clean_version) | ||
| 247 | if pseudo_match: | ||
| 248 | vcs_hash = pseudo_match.group(2) # 12-char short hash | ||
| 249 | # Note: Short hashes are expanded to full 40-char by oe-go-mod-fetcher.py | ||
| 250 | # in load_native_modules() using resolve_pseudo_version_commit() | ||
| 251 | else: | ||
| 252 | # Tagged version - resolve tag to commit hash | ||
| 253 | # FIX #32: For multi-module repos, the tag includes the subpath prefix | ||
| 254 | # e.g., google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.1.0 | ||
| 255 | # has tag: cmd/protoc-gen-go-grpc/v1.1.0 (not v1.1.0) | ||
| 256 | if subpath: | ||
| 257 | tag_name = f"{subpath}/{clean_version}" | ||
| 258 | else: | ||
| 259 | tag_name = clean_version | ||
| 260 | vcs_ref = f"refs/tags/{tag_name}" | ||
| 261 | # Query the repository to get the actual commit hash for this tag | ||
| 262 | vcs_hash = git_ls_remote(vcs_url, vcs_ref) | ||
| 263 | if not vcs_hash and subpath: | ||
| 264 | # FIX #32: Fallback - try without subpath prefix | ||
| 265 | # Some repos don't use prefixed tags for submodules | ||
| 266 | fallback_ref = f"refs/tags/{clean_version}" | ||
| 267 | vcs_hash = git_ls_remote(vcs_url, fallback_ref) | ||
| 268 | if vcs_hash: | ||
| 269 | vcs_ref = fallback_ref # Use the working ref | ||
| 270 | |||
| 271 | return { | ||
| 272 | 'vcs_url': vcs_url, | ||
| 273 | 'vcs_hash': vcs_hash, | ||
| 274 | 'vcs_ref': vcs_ref, | ||
| 275 | 'subdir': subpath, # FIX #32: Return subdir for submodules | ||
| 276 | } | ||
| 277 | |||
| 278 | |||
| 279 | def extract_modules(gomodcache_path): | ||
| 280 | """ | ||
| 281 | Walk GOMODCACHE and extract all module metadata from .info files. | ||
| 282 | |||
| 283 | Returns list of dicts with complete metadata: | ||
| 284 | - module_path: Unescaped module path | ||
| 285 | - version: Module version | ||
| 286 | - vcs_url: Git repository URL | ||
| 287 | - vcs_hash: Full commit hash (40 chars) | ||
| 288 | - vcs_ref: Tag/branch reference | ||
| 289 | - subdir: Subdirectory in mono-repos | ||
| 290 | - timestamp: Commit timestamp | ||
| 291 | """ | ||
| 292 | cache_dir = Path(gomodcache_path) / "cache" / "download" | ||
| 293 | |||
| 294 | if not cache_dir.exists(): | ||
| 295 | raise FileNotFoundError(f"Cache directory not found: {cache_dir}") | ||
| 296 | |||
| 297 | modules = [] | ||
| 298 | skipped = 0 | ||
| 299 | derived = 0 | ||
| 300 | total_info_files = 0 | ||
| 301 | |||
| 302 | print(f"Scanning GOMODCACHE: {cache_dir}") | ||
| 303 | |||
| 304 | for info_file in cache_dir.rglob("*.info"): | ||
| 305 | total_info_files += 1 | ||
| 306 | |||
| 307 | # Extract module path from directory structure | ||
| 308 | rel_path = info_file.parent.relative_to(cache_dir) | ||
| 309 | parts = list(rel_path.parts) | ||
| 310 | |||
| 311 | if parts[-1] != '@v': | ||
| 312 | continue | ||
| 313 | |||
| 314 | # Module path (unescape Go's !-encoding) | ||
| 315 | # Example: github.com/!microsoft/go-winio -> github.com/Microsoft/go-winio | ||
| 316 | module_path = '/'.join(parts[:-1]) | ||
| 317 | # Unescape !x -> X (Go's case-insensitive encoding) | ||
| 318 | module_path = re.sub(r'!([a-z])', lambda m: m.group(1).upper(), module_path) | ||
| 319 | |||
| 320 | # Version | ||
| 321 | version = info_file.stem | ||
| 322 | |||
| 323 | # Read .info file for VCS metadata | ||
| 324 | try: | ||
| 325 | with open(info_file) as f: | ||
| 326 | info = json.load(f) | ||
| 327 | |||
| 328 | origin = info.get('Origin', {}) | ||
| 329 | |||
| 330 | # Check if we have complete VCS info from Origin | ||
| 331 | if origin.get('URL') and origin.get('Hash'): | ||
| 332 | module = { | ||
| 333 | 'module_path': module_path, | ||
| 334 | 'version': version, | ||
| 335 | 'vcs_url': origin.get('URL', ''), | ||
| 336 | 'vcs_hash': origin.get('Hash', ''), | ||
| 337 | 'vcs_ref': origin.get('Ref', ''), | ||
| 338 | 'subdir': origin.get('Subdir', ''), | ||
| 339 | 'timestamp': info.get('Time', ''), | ||
| 340 | } | ||
| 341 | modules.append(module) | ||
| 342 | else: | ||
| 343 | # FIX #29: Module lacks Origin metadata (common for +incompatible modules) | ||
| 344 | # Use derive_vcs_info() to infer VCS URL and ref from module path/version | ||
| 345 | derived += 1 | ||
| 346 | # Progress output for derived modules (these require network calls) | ||
| 347 | if derived % 10 == 1: | ||
| 348 | print(f" Deriving VCS info... ({derived} modules)", end='\r', flush=True) | ||
| 349 | derived_info = derive_vcs_info(module_path, version) | ||
| 350 | if derived_info: | ||
| 351 | module = { | ||
| 352 | 'module_path': module_path, | ||
| 353 | 'version': version, | ||
| 354 | 'vcs_url': derived_info.get('vcs_url', ''), | ||
| 355 | 'vcs_hash': derived_info.get('vcs_hash', ''), | ||
| 356 | 'vcs_ref': derived_info.get('vcs_ref', ''), | ||
| 357 | 'subdir': derived_info.get('subdir', ''), # FIX #32: Use derived subdir | ||
| 358 | 'timestamp': info.get('Time', ''), | ||
| 359 | } | ||
| 360 | modules.append(module) | ||
| 361 | else: | ||
| 362 | # Cannot derive VCS info - skip this module | ||
| 363 | skipped += 1 | ||
| 364 | derived -= 1 # Don't count as derived if we couldn't derive | ||
| 365 | # Only log for debugging | ||
| 366 | # print(f" ⚠️ Cannot derive VCS info for {module_path}@{version}") | ||
| 367 | |||
| 368 | except json.JSONDecodeError as e: | ||
| 369 | print(f" ⚠️ Failed to parse {info_file}: {e}") | ||
| 370 | skipped += 1 | ||
| 371 | continue | ||
| 372 | except Exception as e: | ||
| 373 | print(f" ⚠️ Error processing {info_file}: {e}") | ||
| 374 | skipped += 1 | ||
| 375 | continue | ||
| 376 | |||
| 377 | print(f"\nProcessed {total_info_files} .info files") | ||
| 378 | print(f"Extracted {len(modules)} modules total:") | ||
| 379 | print(f" - {len(modules) - derived} with Origin metadata from proxy") | ||
| 380 | print(f" - {derived} with derived VCS info (Fix #29)") | ||
| 381 | print(f"Skipped {skipped} modules (cannot derive VCS info)") | ||
| 382 | |||
| 383 | return modules | ||
| 384 | |||
| 385 | |||
| 386 | def main(): | ||
| 387 | parser = argparse.ArgumentParser( | ||
| 388 | description='Extract module metadata from Go module cache', | ||
| 389 | formatter_class=argparse.RawDescriptionHelpFormatter, | ||
| 390 | epilog=""" | ||
| 391 | Examples: | ||
| 392 | # Extract from native Go build cache | ||
| 393 | %(prog)s --gomodcache /tmp/k3s-discovery-cache --output /tmp/k3s-modules.json | ||
| 394 | |||
| 395 | # Extract from BitBake discovery build | ||
| 396 | %(prog)s --gomodcache /path/to/build/tmp/work/.../discovery-cache --output /tmp/k3s-modules.json | ||
| 397 | |||
| 398 | # Extract from system GOMODCACHE | ||
| 399 | %(prog)s --gomodcache ~/go/pkg/mod --output /tmp/modules.json | ||
| 400 | |||
| 401 | Output: | ||
| 402 | - <output>.json: Complete module metadata (VCS URLs, commits, subdirs) | ||
| 403 | - <output>.txt: Simple module@version list (sorted) | ||
| 404 | """ | ||
| 405 | ) | ||
| 406 | parser.add_argument( | ||
| 407 | '--gomodcache', | ||
| 408 | required=True, | ||
| 409 | help='Path to GOMODCACHE directory' | ||
| 410 | ) | ||
| 411 | parser.add_argument( | ||
| 412 | '--output', | ||
| 413 | required=True, | ||
| 414 | help='Output JSON file path (e.g., /tmp/k3s-modules.json)' | ||
| 415 | ) | ||
| 416 | |||
| 417 | args = parser.parse_args() | ||
| 418 | |||
| 419 | # Validate GOMODCACHE path | ||
| 420 | gomodcache = Path(args.gomodcache) | ||
| 421 | if not gomodcache.exists(): | ||
| 422 | print(f"Error: GOMODCACHE directory does not exist: {gomodcache}", file=sys.stderr) | ||
| 423 | sys.exit(1) | ||
| 424 | |||
| 425 | # Extract modules | ||
| 426 | try: | ||
| 427 | modules = extract_modules(gomodcache) | ||
| 428 | except Exception as e: | ||
| 429 | print(f"Error during extraction: {e}", file=sys.stderr) | ||
| 430 | sys.exit(1) | ||
| 431 | |||
| 432 | if not modules: | ||
| 433 | print("Warning: No modules with VCS metadata found!", file=sys.stderr) | ||
| 434 | print("This may indicate:", file=sys.stderr) | ||
| 435 | print(" - GOMODCACHE is from BitBake (synthetic .info files)", file=sys.stderr) | ||
| 436 | print(" - GOMODCACHE is empty or incomplete", file=sys.stderr) | ||
| 437 | print(" - Need to run 'go mod download' first", file=sys.stderr) | ||
| 438 | sys.exit(1) | ||
| 439 | |||
| 440 | # Save as JSON | ||
| 441 | output_path = Path(args.output) | ||
| 442 | try: | ||
| 443 | output_path.parent.mkdir(parents=True, exist_ok=True) | ||
| 444 | output_path.write_text(json.dumps(modules, indent=2, sort_keys=True)) | ||
| 445 | print(f"\n✓ Saved {len(modules)} modules to {output_path}") | ||
| 446 | except Exception as e: | ||
| 447 | print(f"Error writing JSON output: {e}", file=sys.stderr) | ||
| 448 | sys.exit(1) | ||
| 449 | |||
| 450 | # Also save simple list | ||
| 451 | list_path = output_path.with_suffix('.txt') | ||
| 452 | try: | ||
| 453 | simple_list = [f"{m['module_path']}@{m['version']}" for m in modules] | ||
| 454 | list_path.write_text('\n'.join(sorted(simple_list)) + '\n') | ||
| 455 | print(f"✓ Saved module list to {list_path}") | ||
| 456 | except Exception as e: | ||
| 457 | print(f"Error writing module list: {e}", file=sys.stderr) | ||
| 458 | sys.exit(1) | ||
| 459 | |||
| 460 | # Print summary statistics | ||
| 461 | print("\n" + "="*60) | ||
| 462 | print("EXTRACTION SUMMARY") | ||
| 463 | print("="*60) | ||
| 464 | |||
| 465 | # Count unique repositories | ||
| 466 | unique_repos = len(set(m['vcs_url'] for m in modules)) | ||
| 467 | print(f"Total modules: {len(modules)}") | ||
| 468 | print(f"Unique repositories: {unique_repos}") | ||
| 469 | |||
| 470 | # Count modules with subdirs (multi-module repos) | ||
| 471 | with_subdirs = sum(1 for m in modules if m['subdir']) | ||
| 472 | print(f"Multi-module repos: {with_subdirs} modules have subdirs") | ||
| 473 | |||
| 474 | # Show top repositories by module count | ||
| 475 | repo_counts = {} | ||
| 476 | for m in modules: | ||
| 477 | repo_counts[m['vcs_url']] = repo_counts.get(m['vcs_url'], 0) + 1 | ||
| 478 | |||
| 479 | top_repos = sorted(repo_counts.items(), key=lambda x: x[1], reverse=True)[:5] | ||
| 480 | print("\nTop 5 repositories by module count:") | ||
| 481 | for repo_url, count in top_repos: | ||
| 482 | print(f" {count:3d} modules: {repo_url}") | ||
| 483 | |||
| 484 | print("\n" + "="*60) | ||
| 485 | print("Use this JSON file with:") | ||
| 486 | print(f" oe-go-mod-fetcher.py --native-modules {output_path}") | ||
| 487 | print("="*60) | ||
| 488 | |||
| 489 | |||
| 490 | if __name__ == '__main__': | ||
| 491 | main() | ||
diff --git a/scripts/oe-go-mod-fetcher.py b/scripts/oe-go-mod-fetcher.py new file mode 100755 index 00000000..699255bd --- /dev/null +++ b/scripts/oe-go-mod-fetcher.py | |||
| @@ -0,0 +1,4580 @@ | |||
| 1 | #!/usr/bin/env python3 | ||
| 2 | # SPDX-License-Identifier: GPL-2.0-only | ||
| 3 | # | ||
| 4 | # go-dep processor | ||
| 5 | # | ||
| 6 | # Copyright (C) 2025 Bruce Ashfield | ||
| 7 | # | ||
| 8 | # This program is free software; you can redistribute it and/or modify | ||
| 9 | # it under the terms of the GNU General Public License version 2 as | ||
| 10 | # published by the Free Software Foundation. | ||
| 11 | # | ||
| 12 | # This program is distributed in the hope that it will be useful, | ||
| 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | # GNU General Public License for more details. | ||
| 16 | # | ||
| 17 | # You should have received a copy of the GNU General Public License along | ||
| 18 | # with this program; if not, write to the Free Software Foundation, Inc., | ||
| 19 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 20 | |||
| 21 | """ | ||
| 22 | Go Module Git Fetcher - Hybrid Architecture | ||
| 23 | Version 3.0.0 - Complete rewrite using Go download for discovery + git builds | ||
| 24 | Author: Bruce Ashfield | ||
| 25 | Description: Use Go's download for discovery, build from git sources | ||
| 26 | |||
| 27 | ARCHITECTURE: | ||
| 28 | Phase 1: Discovery - Use 'go mod download' + filesystem walk to get correct module paths | ||
| 29 | Phase 2: Recipe Generation - Generate BitBake recipe with git:// SRC_URI entries | ||
| 30 | Phase 3: Cache Building - Build module cache from git sources during do_create_module_cache | ||
| 31 | |||
| 32 | This approach eliminates: | ||
| 33 | - Complex go list -m -json parsing | ||
| 34 | - Manual go.sum parsing and augmentation | ||
| 35 | - Parent module detection heuristics | ||
| 36 | - Version path manipulation (/v2+/v3+ workarounds) | ||
| 37 | - Module path normalization bugs | ||
| 38 | |||
| 39 | Instead we: | ||
| 40 | - Let Go download modules to temporary cache (discovery only) | ||
| 41 | - Walk filesystem to get CORRECT module paths (no parsing!) | ||
| 42 | - Extract VCS info from .info files | ||
| 43 | - Fetch git repositories for each module | ||
| 44 | - Build module cache from git during BitBake build | ||
| 45 | |||
| 46 | CHANGELOG v3.0.0: | ||
| 47 | - Complete architectural rewrite | ||
| 48 | - Removed all go list and go.sum parsing logic (4000+ lines) | ||
| 49 | - Implemented 3-phase hybrid approach | ||
| 50 | - Discovery uses go mod download + filesystem walk | ||
| 51 | - Module paths from filesystem, not from go list (no more /v3 stripping bugs!) | ||
| 52 | - Builds entirely from git sources | ||
| 53 | - Compatible with oe-core's gomod:// fetcher (same cache structure) | ||
| 54 | """ | ||
| 55 | |||
| 56 | import argparse | ||
| 57 | import concurrent.futures | ||
| 58 | import hashlib | ||
| 59 | import io | ||
| 60 | import json | ||
| 61 | import os | ||
| 62 | import re | ||
| 63 | import shutil | ||
| 64 | import subprocess | ||
| 65 | import sys | ||
| 66 | import tempfile | ||
| 67 | import textwrap | ||
| 68 | import threading | ||
| 69 | from pathlib import Path | ||
| 70 | from typing import Dict, List, Optional, Set, Tuple | ||
| 71 | from datetime import datetime, timedelta, timezone | ||
| 72 | |||
| 73 | VERSION = "3.0.0" | ||
| 74 | LOG_PATH: Optional[Path] = None | ||
| 75 | |||
| 76 | # ============================================================================= | ||
| 77 | # BitBake Task Templates | ||
| 78 | # ============================================================================= | ||
| 79 | |||
| 80 | |||
| 81 | class Tee(io.TextIOBase): | ||
| 82 | """Write data to multiple text streams.""" | ||
| 83 | |||
| 84 | def __init__(self, *streams: io.TextIOBase) -> None: | ||
| 85 | self.streams = streams | ||
| 86 | |||
| 87 | def write(self, data: str) -> int: | ||
| 88 | for stream in self.streams: | ||
| 89 | stream.write(data) | ||
| 90 | return len(data) | ||
| 91 | |||
| 92 | def flush(self) -> None: | ||
| 93 | for stream in self.streams: | ||
| 94 | stream.flush() | ||
| 95 | |||
| 96 | def parse_go_sum(go_sum_path: Path) -> Tuple[Set[Tuple[str, str]], Set[Tuple[str, str]]]: | ||
| 97 | """ | ||
| 98 | Parse go.sum to find modules that need source code. | ||
| 99 | |||
| 100 | Returns: | ||
| 101 | Tuple of (modules_needing_source, indirect_only_modules) | ||
| 102 | - modules_needing_source: Modules with source code entries (need .zip files) | ||
| 103 | - indirect_only_modules: Modules that only have /go.mod entries (only need .mod files) | ||
| 104 | """ | ||
| 105 | def sanitize_module_name(name): | ||
| 106 | """Remove quotes from module names""" | ||
| 107 | if not name: | ||
| 108 | return name | ||
| 109 | stripped = name.strip() | ||
| 110 | if len(stripped) >= 2 and stripped[0] == '"' and stripped[-1] == '"': | ||
| 111 | return stripped[1:-1] | ||
| 112 | return stripped | ||
| 113 | |||
| 114 | modules_with_source: Set[Tuple[str, str]] = set() | ||
| 115 | modules_with_gomod_only: Set[Tuple[str, str]] = set() | ||
| 116 | |||
| 117 | if not go_sum_path.exists(): | ||
| 118 | return (modules_with_source, modules_with_gomod_only) | ||
| 119 | |||
| 120 | # First pass: collect all entries | ||
| 121 | all_entries = {} | ||
| 122 | with go_sum_path.open() as f: | ||
| 123 | for line in f: | ||
| 124 | line = line.strip() | ||
| 125 | if not line or line.startswith('//'): | ||
| 126 | continue | ||
| 127 | parts = line.split() | ||
| 128 | if len(parts) != 3: | ||
| 129 | continue | ||
| 130 | |||
| 131 | module_path, version, _ = parts | ||
| 132 | module_path = sanitize_module_name(module_path) | ||
| 133 | |||
| 134 | # Track whether this entry is for go.mod or source | ||
| 135 | is_gomod_entry = version.endswith('/go.mod') | ||
| 136 | |||
| 137 | # Strip /go.mod suffix for key | ||
| 138 | base_version = version[:-7] if is_gomod_entry else version | ||
| 139 | key = (module_path, base_version) | ||
| 140 | |||
| 141 | if key not in all_entries: | ||
| 142 | all_entries[key] = {'has_source': False, 'has_gomod': False} | ||
| 143 | |||
| 144 | if is_gomod_entry: | ||
| 145 | all_entries[key]['has_gomod'] = True | ||
| 146 | else: | ||
| 147 | all_entries[key]['has_source'] = True | ||
| 148 | |||
| 149 | # Second pass: categorize modules | ||
| 150 | for key, entry_types in all_entries.items(): | ||
| 151 | if entry_types['has_source']: | ||
| 152 | modules_with_source.add(key) | ||
| 153 | continue | ||
| 154 | |||
| 155 | if entry_types['has_gomod']: | ||
| 156 | modules_with_gomod_only.add(key) | ||
| 157 | # Note: We no longer add indirect-only modules to modules_with_source. | ||
| 158 | # The native build succeeds without their .zip files - only .mod files are needed. | ||
| 159 | # Adding them caused the generator to resolve ~1000 extra modules unnecessarily. | ||
| 160 | |||
| 161 | return (modules_with_source, modules_with_gomod_only) | ||
| 162 | |||
| 163 | |||
| 164 | def collect_modules_via_go_list(source_dir: Path) -> Set[Tuple[str, str]]: | ||
| 165 | """ | ||
| 166 | Use `go list -m -json all` to discover modules that may not appear in go.sum. | ||
| 167 | """ | ||
| 168 | env = os.environ.copy() | ||
| 169 | env.setdefault('GOPROXY', 'https://proxy.golang.org') | ||
| 170 | if CURRENT_GOMODCACHE: | ||
| 171 | env['GOMODCACHE'] = CURRENT_GOMODCACHE | ||
| 172 | |||
| 173 | try: | ||
| 174 | result = subprocess.run( | ||
| 175 | ['go', 'list', '-m', '-json', 'all'], | ||
| 176 | cwd=source_dir, | ||
| 177 | capture_output=True, | ||
| 178 | text=True, | ||
| 179 | check=True, | ||
| 180 | env=env, | ||
| 181 | ) | ||
| 182 | except subprocess.CalledProcessError: | ||
| 183 | return set() | ||
| 184 | |||
| 185 | data = result.stdout | ||
| 186 | modules: Set[Tuple[str, str]] = set() | ||
| 187 | decoder = json.JSONDecoder() | ||
| 188 | idx = 0 | ||
| 189 | length = len(data) | ||
| 190 | |||
| 191 | while idx < length: | ||
| 192 | while idx < length and data[idx].isspace(): | ||
| 193 | idx += 1 | ||
| 194 | if idx >= length: | ||
| 195 | break | ||
| 196 | try: | ||
| 197 | obj, end = decoder.raw_decode(data, idx) | ||
| 198 | except json.JSONDecodeError: | ||
| 199 | break | ||
| 200 | idx = end | ||
| 201 | |||
| 202 | path = obj.get('Path') or '' | ||
| 203 | if not path or obj.get('Main'): | ||
| 204 | continue | ||
| 205 | |||
| 206 | version = obj.get('Version') or '' | ||
| 207 | replace = obj.get('Replace') | ||
| 208 | if replace: | ||
| 209 | path = replace.get('Path', path) or path | ||
| 210 | version = replace.get('Version', version) or version | ||
| 211 | |||
| 212 | if not version or version == 'none': | ||
| 213 | continue | ||
| 214 | |||
| 215 | modules.add((path, version)) | ||
| 216 | |||
| 217 | return modules | ||
| 218 | |||
| 219 | |||
| 220 | def go_mod_download(module_path: str, version: str) -> bool: | ||
| 221 | """Download a specific module version into the current GOMODCACHE.""" | ||
| 222 | if not CURRENT_GOMODCACHE or not CURRENT_SOURCE_DIR: | ||
| 223 | return False | ||
| 224 | |||
| 225 | key = (module_path, version) | ||
| 226 | if key in DOWNLOADED_MODULES: | ||
| 227 | return module_path | ||
| 228 | |||
| 229 | env = os.environ.copy() | ||
| 230 | env.setdefault('GOPROXY', 'https://proxy.golang.org') | ||
| 231 | env['GOMODCACHE'] = CURRENT_GOMODCACHE | ||
| 232 | |||
| 233 | try: | ||
| 234 | subprocess.run( | ||
| 235 | ['go', 'mod', 'download', f'{module_path}@{version}'], | ||
| 236 | cwd=str(CURRENT_SOURCE_DIR), | ||
| 237 | env=env, | ||
| 238 | capture_output=True, | ||
| 239 | text=True, | ||
| 240 | check=True, | ||
| 241 | timeout=GO_CMD_TIMEOUT, | ||
| 242 | ) | ||
| 243 | DOWNLOADED_MODULES.add(key) | ||
| 244 | return True | ||
| 245 | except subprocess.TimeoutExpired as e: | ||
| 246 | print(f" ❌ go mod download timed out for {module_path}@{version} after {GO_CMD_TIMEOUT}s") | ||
| 247 | return False | ||
| 248 | except subprocess.CalledProcessError as e: | ||
| 249 | stderr = (e.stderr or '').strip() | ||
| 250 | if stderr: | ||
| 251 | lower = stderr.lower() | ||
| 252 | network_signals = [ | ||
| 253 | "lookup ", "dial tcp", "connection refused", | ||
| 254 | "network is unreachable", "tls handshake timeout", | ||
| 255 | "socket: operation not permitted" | ||
| 256 | ] | ||
| 257 | if any(signal in lower for signal in network_signals): | ||
| 258 | global NETWORK_FAILURE_DETECTED | ||
| 259 | NETWORK_FAILURE_DETECTED = True | ||
| 260 | raise RuntimeError( | ||
| 261 | f"Network failure while downloading {module_path}@{version}: {stderr}" | ||
| 262 | ) from e | ||
| 263 | print(f" ⚠️ go mod download failed for {module_path}@{version}: {stderr}") | ||
| 264 | return False | ||
| 265 | |||
| 266 | |||
| 267 | SCRIPT_DIR = Path(__file__).resolve().parent | ||
| 268 | CACHE_BASE_DIR = SCRIPT_DIR / "data" # Default to scripts/data for JSON caches | ||
| 269 | DATA_DIR = CACHE_BASE_DIR | ||
| 270 | CLONE_CACHE_DIR = SCRIPT_DIR / ".cache" / "repos" # Repository clone cache | ||
| 271 | VERIFY_BASE_DIR = CACHE_BASE_DIR / ".verify" | ||
| 272 | LS_REMOTE_CACHE_PATH = DATA_DIR / "ls-remote-cache.json" | ||
| 273 | VERIFY_COMMIT_CACHE_PATH = DATA_DIR / "verify-cache.json" | ||
| 274 | MODULE_REPO_OVERRIDES_PATH = DATA_DIR / "repo-overrides.json" | ||
| 275 | # Manual overrides file - tracked in git, for permanent overrides when discovery fails | ||
| 276 | MANUAL_OVERRIDES_PATH = SCRIPT_DIR / "data" / "manual-overrides.json" | ||
| 277 | |||
| 278 | LS_REMOTE_CACHE: Dict[Tuple[str, str], Optional[str]] = {} | ||
| 279 | LS_REMOTE_CACHE_DIRTY = False | ||
| 280 | |||
| 281 | MODULE_METADATA_CACHE_PATH = DATA_DIR / "module-cache.json" | ||
| 282 | MODULE_METADATA_CACHE: Dict[Tuple[str, str], Dict[str, str]] = {} | ||
| 283 | MODULE_METADATA_CACHE_DIRTY = False | ||
| 284 | |||
| 285 | VANITY_URL_CACHE_PATH = DATA_DIR / "vanity-url-cache.json" | ||
| 286 | VANITY_URL_CACHE: Dict[str, Optional[str]] = {} | ||
| 287 | VANITY_URL_CACHE_DIRTY = False | ||
| 288 | |||
| 289 | CURRENT_GOMODCACHE: Optional[str] = None | ||
| 290 | CURRENT_SOURCE_DIR: Optional[Path] = None | ||
| 291 | TEMP_GOMODCACHES: List[Path] = [] | ||
| 292 | FAILED_MODULE_PATHS: Set[str] = set() | ||
| 293 | FAILED_MODULE_ENTRIES: Set[Tuple[str, str]] = set() | ||
| 294 | DOWNLOADED_MODULES: Set[Tuple[str, str]] = set() | ||
| 295 | NETWORK_FAILURE_DETECTED: bool = False | ||
| 296 | SKIPPED_MODULES: Dict[Tuple[str, str], str] = {} | ||
| 297 | VERBOSE_MODE: bool = False # Set from command-line args | ||
| 298 | |||
| 299 | def _record_skipped_module(module_path: str, version: str, reason: str) -> None: | ||
| 300 | SKIPPED_MODULES[(module_path, version)] = reason | ||
| 301 | |||
| 302 | GO_CMD_TIMEOUT = 180 # seconds | ||
| 303 | GIT_CMD_TIMEOUT = 90 # seconds | ||
| 304 | |||
| 305 | VERIFY_REPO_CACHE: Dict[str, Path] = {} | ||
| 306 | VERIFY_REPO_LOCKS: Dict[str, threading.Lock] = {} # Per-repository locks for parallel verification | ||
| 307 | VERIFY_REPO_LOCKS_LOCK = threading.RLock() # REENTRANT lock to allow same thread to acquire multiple times | ||
| 308 | VERIFY_REPO_BRANCHES: Dict[str, List[str]] = {} # Cache branch lists per repo to avoid repeated ls-remote | ||
| 309 | VERIFY_RESULTS: Dict[Tuple[str, str], bool] = {} | ||
| 310 | VERIFY_COMMIT_CACHE: Dict[str, bool] = {} # Legacy format: key -> bool | ||
| 311 | VERIFY_COMMIT_CACHE_V2: Dict[str, Dict[str, any]] = {} # New format: key -> {verified: bool, timestamp: str, last_check: str} | ||
| 312 | VERIFY_COMMIT_CACHE_DIRTY = False | ||
| 313 | VERIFY_ENABLED = False # Set to True when verification is active | ||
| 314 | VERIFY_CACHE_MAX_AGE_DAYS = 30 # Re-verify commits older than this | ||
| 315 | VERIFY_DETECTED_BRANCHES: Dict[Tuple[str, str], str] = {} # (url, commit) -> branch_name | ||
| 316 | VERIFY_FALLBACK_COMMITS: Dict[Tuple[str, str], str] = {} # Maps (url, original_commit) -> fallback_commit | ||
| 317 | VERIFY_FULL_REPOS: Set[str] = set() # Track repos that have been fetched with full history | ||
| 318 | VERIFY_CORRECTIONS_APPLIED = False # Track if any commit corrections were made | ||
| 319 | MODULE_REPO_OVERRIDES: Dict[Tuple[str, Optional[str]], str] = {} # Dynamic overrides from --set-repo | ||
| 320 | MODULE_REPO_OVERRIDES_DIRTY = False | ||
| 321 | MANUAL_OVERRIDES: Dict[Tuple[str, Optional[str]], str] = {} # Git-tracked overrides from manual-overrides.json | ||
| 322 | |||
| 323 | # REPO_OVERRIDES kept for backwards compatibility but no longer used for hardcoded values. | ||
| 324 | # Manual overrides go in data/manual-overrides.json which is tracked in git. | ||
| 325 | REPO_OVERRIDES: Dict[str, List[str]] = {} | ||
| 326 | |||
| 327 | |||
| 328 | def _normalise_override_key(module_path: str, version: Optional[str]) -> Tuple[str, Optional[str]]: | ||
| 329 | module = module_path.strip() | ||
| 330 | ver = version.strip() if version else None | ||
| 331 | if not module: | ||
| 332 | raise ValueError("module path for override cannot be empty") | ||
| 333 | return module, ver | ||
| 334 | |||
| 335 | |||
| 336 | def _parse_override_spec(module_spec: str) -> Tuple[str, Optional[str]]: | ||
| 337 | if '@' in module_spec: | ||
| 338 | module_path, version = module_spec.split('@', 1) | ||
| 339 | version = version or None | ||
| 340 | else: | ||
| 341 | module_path, version = module_spec, None | ||
| 342 | return module_path.strip(), version.strip() if version else None | ||
| 343 | |||
| 344 | |||
| 345 | def repo_override_candidates(module_path: str, version: Optional[str] = None) -> List[str]: | ||
| 346 | """ | ||
| 347 | Get repository URL override candidates for a module. | ||
| 348 | |||
| 349 | Priority order: | ||
| 350 | 1. Dynamic overrides (--set-repo, stored in repo-overrides.json) - version-specific | ||
| 351 | 2. Dynamic overrides - wildcard (no version) | ||
| 352 | 3. Manual overrides (manual-overrides.json, tracked in git) - version-specific | ||
| 353 | 4. Manual overrides - wildcard | ||
| 354 | 5. Legacy REPO_OVERRIDES dict (for backwards compatibility) | ||
| 355 | """ | ||
| 356 | overrides: List[str] = [] | ||
| 357 | key = _normalise_override_key(module_path, version) | ||
| 358 | wildcard_key = _normalise_override_key(module_path, None) | ||
| 359 | |||
| 360 | # Dynamic overrides first (highest priority - user can override manual) | ||
| 361 | dynamic_specific = MODULE_REPO_OVERRIDES.get(key) | ||
| 362 | if dynamic_specific: | ||
| 363 | overrides.append(dynamic_specific) | ||
| 364 | |||
| 365 | dynamic_default = MODULE_REPO_OVERRIDES.get(wildcard_key) | ||
| 366 | if dynamic_default and dynamic_default not in overrides: | ||
| 367 | overrides.append(dynamic_default) | ||
| 368 | |||
| 369 | # Manual overrides next (git-tracked, for permanent fixes) | ||
| 370 | manual_specific = MANUAL_OVERRIDES.get(key) | ||
| 371 | if manual_specific and manual_specific not in overrides: | ||
| 372 | overrides.append(manual_specific) | ||
| 373 | |||
| 374 | manual_default = MANUAL_OVERRIDES.get(wildcard_key) | ||
| 375 | if manual_default and manual_default not in overrides: | ||
| 376 | overrides.append(manual_default) | ||
| 377 | |||
| 378 | # Legacy hardcoded overrides last (backwards compat) | ||
| 379 | for candidate in REPO_OVERRIDES.get(module_path, []): | ||
| 380 | if candidate not in overrides: | ||
| 381 | overrides.append(candidate) | ||
| 382 | |||
| 383 | return overrides | ||
| 384 | |||
| 385 | |||
| 386 | def configure_cache_paths(cache_dir: Optional[str], clone_cache_dir: Optional[str] = None) -> None: | ||
| 387 | """ | ||
| 388 | Configure cache file locations. | ||
| 389 | |||
| 390 | Args: | ||
| 391 | cache_dir: Directory for JSON metadata caches (default: scripts/data) | ||
| 392 | clone_cache_dir: Directory for git repository clones (default: scripts/.cache/repos) | ||
| 393 | """ | ||
| 394 | global CACHE_BASE_DIR, DATA_DIR, CLONE_CACHE_DIR | ||
| 395 | global LS_REMOTE_CACHE_PATH, MODULE_METADATA_CACHE_PATH, VANITY_URL_CACHE_PATH | ||
| 396 | global VERIFY_COMMIT_CACHE_PATH, MODULE_REPO_OVERRIDES_PATH | ||
| 397 | |||
| 398 | # Configure JSON metadata cache directory | ||
| 399 | if cache_dir: | ||
| 400 | CACHE_BASE_DIR = Path(cache_dir).resolve() | ||
| 401 | else: | ||
| 402 | CACHE_BASE_DIR = SCRIPT_DIR / "data" # Default to scripts/data | ||
| 403 | |||
| 404 | CACHE_BASE_DIR.mkdir(parents=True, exist_ok=True) | ||
| 405 | DATA_DIR = CACHE_BASE_DIR # cache_dir IS the data directory now | ||
| 406 | |||
| 407 | LS_REMOTE_CACHE_PATH = DATA_DIR / "ls-remote-cache.json" | ||
| 408 | MODULE_METADATA_CACHE_PATH = DATA_DIR / "module-cache.json" | ||
| 409 | VANITY_URL_CACHE_PATH = DATA_DIR / "vanity-url-cache.json" | ||
| 410 | VERIFY_COMMIT_CACHE_PATH = DATA_DIR / "verify-cache.json" | ||
| 411 | MODULE_REPO_OVERRIDES_PATH = DATA_DIR / "repo-overrides.json" | ||
| 412 | |||
| 413 | global VERIFY_BASE_DIR | ||
| 414 | VERIFY_BASE_DIR = CACHE_BASE_DIR / ".verify" | ||
| 415 | VERIFY_BASE_DIR.mkdir(parents=True, exist_ok=True) | ||
| 416 | |||
| 417 | # Configure git clone cache directory | ||
| 418 | if clone_cache_dir: | ||
| 419 | CLONE_CACHE_DIR = Path(clone_cache_dir).resolve() | ||
| 420 | else: | ||
| 421 | CLONE_CACHE_DIR = SCRIPT_DIR / ".cache" / "repos" # Default to scripts/.cache/repos | ||
| 422 | |||
| 423 | CLONE_CACHE_DIR.mkdir(parents=True, exist_ok=True) | ||
| 424 | |||
| 425 | VERIFY_COMMIT_CACHE.clear() | ||
| 426 | load_verify_commit_cache() | ||
| 427 | MODULE_REPO_OVERRIDES.clear() | ||
| 428 | load_repo_overrides() | ||
| 429 | load_manual_overrides() | ||
| 430 | |||
| 431 | global VERIFY_REPO_CACHE | ||
| 432 | VERIFY_REPO_CACHE = {} | ||
| 433 | |||
| 434 | |||
| 435 | def ensure_path_is_writable(path: Path) -> None: | ||
| 436 | """ | ||
| 437 | Attempt to create and delete a small file to verify write access. Exit with | ||
| 438 | a clear error if the path is not writable. | ||
| 439 | """ | ||
| 440 | path.mkdir(parents=True, exist_ok=True) | ||
| 441 | probe = path / ".oe-go-mod-fetcher-permcheck" | ||
| 442 | try: | ||
| 443 | with open(probe, "w") as fh: | ||
| 444 | fh.write("") | ||
| 445 | except Exception as exc: | ||
| 446 | print(f"❌ GOMODCACHE is not writable: {path} ({exc})") | ||
| 447 | print(" Fix permissions (e.g. chown/chmod) or pass a writable --gomodcache path.") | ||
| 448 | sys.exit(1) | ||
| 449 | finally: | ||
| 450 | try: | ||
| 451 | probe.unlink() | ||
| 452 | except Exception: | ||
| 453 | pass | ||
| 454 | |||
| 455 | def _normalize_url(url: str) -> str: | ||
| 456 | url = url.strip() | ||
| 457 | if url.startswith("git://"): | ||
| 458 | url = "https://" + url[6:] | ||
| 459 | if url.endswith(".git"): | ||
| 460 | url = url[:-4] | ||
| 461 | return url | ||
| 462 | |||
| 463 | |||
| 464 | def _url_allowed_for_module(module_path: str, url: str, version: Optional[str] = None) -> bool: | ||
| 465 | url = _normalize_url(url) | ||
| 466 | overrides = repo_override_candidates(module_path, version) | ||
| 467 | if not overrides: | ||
| 468 | return True | ||
| 469 | normalized_overrides = {_normalize_url(o) for o in overrides} | ||
| 470 | return url in normalized_overrides | ||
| 471 | |||
| 472 | |||
| 473 | def prune_metadata_cache() -> None: | ||
| 474 | """ | ||
| 475 | Remove stale metadata entries that no longer satisfy override policies or | ||
| 476 | contain obviously invalid data. This prevents old .inc state from | ||
| 477 | re-introducing bad repositories during bootstrap. | ||
| 478 | """ | ||
| 479 | global MODULE_METADATA_CACHE_DIRTY | ||
| 480 | |||
| 481 | removed = False | ||
| 482 | for key in list(MODULE_METADATA_CACHE.keys()): | ||
| 483 | module_path, version = key | ||
| 484 | entry = MODULE_METADATA_CACHE.get(key) or {} | ||
| 485 | vcs_url = entry.get('vcs_url', '') | ||
| 486 | commit = entry.get('commit', '') | ||
| 487 | |||
| 488 | if not vcs_url or not commit: | ||
| 489 | MODULE_METADATA_CACHE.pop(key, None) | ||
| 490 | removed = True | ||
| 491 | continue | ||
| 492 | |||
| 493 | if len(commit) != 40 or not re.fullmatch(r'[0-9a-fA-F]{40}', commit): | ||
| 494 | MODULE_METADATA_CACHE.pop(key, None) | ||
| 495 | removed = True | ||
| 496 | continue | ||
| 497 | |||
| 498 | if not _url_allowed_for_module(module_path, vcs_url, version): | ||
| 499 | MODULE_METADATA_CACHE.pop(key, None) | ||
| 500 | removed = True | ||
| 501 | continue | ||
| 502 | |||
| 503 | if removed: | ||
| 504 | MODULE_METADATA_CACHE_DIRTY = True | ||
| 505 | |||
| 506 | |||
| 507 | def _verify_repo_dir(vcs_url: str) -> Path: | ||
| 508 | # Quick check without lock (optimization) | ||
| 509 | if vcs_url in VERIFY_REPO_CACHE: | ||
| 510 | return VERIFY_REPO_CACHE[vcs_url] | ||
| 511 | |||
| 512 | # Use master lock to serialize repo initialization | ||
| 513 | with VERIFY_REPO_LOCKS_LOCK: | ||
| 514 | # Double-check after acquiring lock | ||
| 515 | if vcs_url in VERIFY_REPO_CACHE: | ||
| 516 | return VERIFY_REPO_CACHE[vcs_url] | ||
| 517 | |||
| 518 | repo_hash = hashlib.sha256(vcs_url.encode()).hexdigest() | ||
| 519 | repo_dir = VERIFY_BASE_DIR / repo_hash | ||
| 520 | git_dir = repo_dir / "repo" | ||
| 521 | git_dir.mkdir(parents=True, exist_ok=True) | ||
| 522 | |||
| 523 | env = os.environ.copy() | ||
| 524 | env.setdefault("GIT_TERMINAL_PROMPT", "0") | ||
| 525 | env.setdefault("GIT_ASKPASS", "true") | ||
| 526 | |||
| 527 | if not (git_dir / "config").exists(): | ||
| 528 | subprocess.run([ | ||
| 529 | "git", "init", "--bare" | ||
| 530 | ], cwd=str(git_dir), check=True, capture_output=True, env=env) | ||
| 531 | subprocess.run([ | ||
| 532 | "git", "remote", "add", "origin", vcs_url | ||
| 533 | ], cwd=str(git_dir), check=True, capture_output=True, env=env) | ||
| 534 | else: | ||
| 535 | subprocess.run([ | ||
| 536 | "git", "remote", "set-url", "origin", vcs_url | ||
| 537 | ], cwd=str(git_dir), check=False, capture_output=True, env=env) | ||
| 538 | |||
| 539 | VERIFY_REPO_CACHE[vcs_url] = git_dir | ||
| 540 | |||
| 541 | # Create a per-repo lock while we still hold the master lock | ||
| 542 | if vcs_url not in VERIFY_REPO_LOCKS: | ||
| 543 | VERIFY_REPO_LOCKS[vcs_url] = threading.Lock() | ||
| 544 | |||
| 545 | return git_dir | ||
| 546 | |||
| 547 | |||
| 548 | def _find_fallback_commit(vcs_url: str, version: str, timestamp: str = "") -> Optional[Tuple[str, str]]: | ||
| 549 | """ | ||
| 550 | Find a fallback commit when the proxy commit doesn't exist. | ||
| 551 | |||
| 552 | Strategy: | ||
| 553 | 1. For pseudo-versions with timestamp: find commit near that date on default branch | ||
| 554 | 2. Otherwise: use latest commit on default branch (main/master) | ||
| 555 | |||
| 556 | Returns: (commit_hash, branch_name) or None if failed | ||
| 557 | """ | ||
| 558 | import re | ||
| 559 | from datetime import datetime | ||
| 560 | |||
| 561 | env = os.environ.copy() | ||
| 562 | env.setdefault("GIT_TERMINAL_PROMPT", "0") | ||
| 563 | env.setdefault("GIT_ASKPASS", "true") | ||
| 564 | |||
| 565 | # Extract timestamp from pseudo-version: v0.0.0-YYYYMMDDHHMMSS-hash | ||
| 566 | target_date = None | ||
| 567 | if timestamp: | ||
| 568 | try: | ||
| 569 | target_date = datetime.fromisoformat(timestamp.replace('Z', '+00:00')) | ||
| 570 | except Exception: | ||
| 571 | pass | ||
| 572 | |||
| 573 | if not target_date: | ||
| 574 | # Try to extract from pseudo-version format | ||
| 575 | match = re.match(r'v\d+\.\d+\.\d+-(\d{14})-[0-9a-f]+', version) | ||
| 576 | if match: | ||
| 577 | date_str = match.group(1) # YYYYMMDDHHMMSS | ||
| 578 | try: | ||
| 579 | target_date = datetime.strptime(date_str, '%Y%m%d%H%M%S') | ||
| 580 | except Exception: | ||
| 581 | pass | ||
| 582 | |||
| 583 | # Get default branch | ||
| 584 | try: | ||
| 585 | result = subprocess.run( | ||
| 586 | ["git", "ls-remote", "--symref", vcs_url, "HEAD"], | ||
| 587 | capture_output=True, | ||
| 588 | text=True, | ||
| 589 | timeout=30, | ||
| 590 | env=env, | ||
| 591 | ) | ||
| 592 | if result.returncode == 0 and result.stdout: | ||
| 593 | # Parse: ref: refs/heads/main HEAD | ||
| 594 | for line in result.stdout.split('\n'): | ||
| 595 | if line.startswith('ref:'): | ||
| 596 | default_branch = line.split()[1].replace('refs/heads/', '') | ||
| 597 | break | ||
| 598 | else: | ||
| 599 | default_branch = 'main' # Fallback | ||
| 600 | else: | ||
| 601 | default_branch = 'main' | ||
| 602 | except Exception: | ||
| 603 | default_branch = 'main' | ||
| 604 | |||
| 605 | # Get commits on default branch | ||
| 606 | try: | ||
| 607 | if target_date: | ||
| 608 | # Find commit closest to target date | ||
| 609 | # We need to clone the repo to access commit history with dates | ||
| 610 | |||
| 611 | # NOTE: Do NOT acquire per-repo lock here - our caller already holds it! | ||
| 612 | # _find_fallback_commit is only called from within verify_commit_accessible, | ||
| 613 | # which has already acquired the per-repo lock for this vcs_url. | ||
| 614 | |||
| 615 | # Get the repo dir (cached, won't re-initialize) | ||
| 616 | repo_dir = VERIFY_REPO_CACHE.get(vcs_url) | ||
| 617 | if not repo_dir: | ||
| 618 | # Shouldn't happen (verify_commit_accessible calls _verify_repo_dir first) | ||
| 619 | # but be defensive | ||
| 620 | repo_dir = _verify_repo_dir(vcs_url) | ||
| 621 | |||
| 622 | # Fetch the default branch (caller holds lock, so this is safe) | ||
| 623 | try: | ||
| 624 | subprocess.run( | ||
| 625 | ["git", "fetch", "origin", f"{default_branch}:refs/remotes/origin/{default_branch}"], | ||
| 626 | cwd=str(repo_dir), | ||
| 627 | check=True, | ||
| 628 | capture_output=True, | ||
| 629 | text=True, | ||
| 630 | timeout=60, | ||
| 631 | env=env, | ||
| 632 | ) | ||
| 633 | except subprocess.CalledProcessError: | ||
| 634 | # Fallback to latest if fetch fails | ||
| 635 | pass | ||
| 636 | |||
| 637 | # Use git log with --until to find commit at or before target date | ||
| 638 | # Format: YYYY-MM-DD HH:MM:SS | ||
| 639 | date_str = target_date.strftime('%Y-%m-%d %H:%M:%S') | ||
| 640 | try: | ||
| 641 | result = subprocess.run( | ||
| 642 | ["git", "log", "-1", "--format=%H", f"--until={date_str}", f"origin/{default_branch}"], | ||
| 643 | cwd=str(repo_dir), | ||
| 644 | capture_output=True, | ||
| 645 | text=True, | ||
| 646 | timeout=30, | ||
| 647 | env=env, | ||
| 648 | ) | ||
| 649 | if result.returncode == 0 and result.stdout.strip(): | ||
| 650 | commit_hash = result.stdout.strip() | ||
| 651 | return (commit_hash, default_branch) | ||
| 652 | except subprocess.CalledProcessError: | ||
| 653 | pass | ||
| 654 | |||
| 655 | # If date-based search failed, fall back to latest commit | ||
| 656 | result = subprocess.run( | ||
| 657 | ["git", "rev-parse", f"origin/{default_branch}"], | ||
| 658 | cwd=str(repo_dir), | ||
| 659 | capture_output=True, | ||
| 660 | text=True, | ||
| 661 | timeout=30, | ||
| 662 | env=env, | ||
| 663 | ) | ||
| 664 | if result.returncode == 0 and result.stdout.strip(): | ||
| 665 | commit_hash = result.stdout.strip() | ||
| 666 | return (commit_hash, default_branch) | ||
| 667 | else: | ||
| 668 | # Use latest commit from ls-remote (no need to clone) | ||
| 669 | result = subprocess.run( | ||
| 670 | ["git", "ls-remote", vcs_url, f"refs/heads/{default_branch}"], | ||
| 671 | capture_output=True, | ||
| 672 | text=True, | ||
| 673 | timeout=30, | ||
| 674 | env=env, | ||
| 675 | ) | ||
| 676 | if result.returncode == 0 and result.stdout: | ||
| 677 | commit_hash = result.stdout.split()[0] | ||
| 678 | return (commit_hash, default_branch) | ||
| 679 | except Exception as e: | ||
| 680 | print(f" ⚠️ Fallback commit search failed: {e}") | ||
| 681 | |||
| 682 | return None | ||
| 683 | |||
| 684 | |||
| 685 | def verify_commit_accessible(vcs_url: str, commit: str, ref_hint: str = "", version: str = "", timestamp: str = "") -> bool: | ||
| 686 | """ | ||
| 687 | Fetch commit into a bare cache to ensure it exists upstream. | ||
| 688 | |||
| 689 | Check cache age and force re-verification if too old. | ||
| 690 | If commit doesn't exist, use fallback (latest commit on default branch or near timestamp) | ||
| 691 | |||
| 692 | Args: | ||
| 693 | vcs_url: Git repository URL | ||
| 694 | commit: Commit hash to verify | ||
| 695 | ref_hint: Optional ref (tag/branch) that should contain the commit | ||
| 696 | version: Module version (for extracting timestamp from pseudo-versions) | ||
| 697 | timestamp: ISO timestamp from .info file (for finding commits near that date) | ||
| 698 | """ | ||
| 699 | from datetime import datetime, timezone, timedelta | ||
| 700 | |||
| 701 | # Check cache before acquiring lock (fast path for already-verified commits) | ||
| 702 | key = (vcs_url, commit) | ||
| 703 | if key in VERIFY_RESULTS: | ||
| 704 | return VERIFY_RESULTS[key] | ||
| 705 | |||
| 706 | cache_key = f"{vcs_url}|||{commit}" | ||
| 707 | |||
| 708 | # Track if verification passed via cache (to skip re-saving later) | ||
| 709 | cached_verification_passed = False | ||
| 710 | |||
| 711 | # Check cache with aging logic | ||
| 712 | if cache_key in VERIFY_COMMIT_CACHE_V2: | ||
| 713 | cache_entry = VERIFY_COMMIT_CACHE_V2[cache_key] | ||
| 714 | if cache_entry.get("verified"): | ||
| 715 | # Check if cache is too old | ||
| 716 | last_checked_str = cache_entry.get("last_checked") | ||
| 717 | if last_checked_str: | ||
| 718 | try: | ||
| 719 | last_checked = datetime.fromisoformat(last_checked_str.replace('Z', '+00:00')) | ||
| 720 | age_days = (datetime.now(timezone.utc) - last_checked).days | ||
| 721 | |||
| 722 | if age_days < VERIFY_CACHE_MAX_AGE_DAYS: | ||
| 723 | # Cache is fresh for commit existence, but we still need branch detection | ||
| 724 | # Branch detection is cheap (local operation) and critical for BitBake recipes | ||
| 725 | # Don't return early - continue to branch detection below | ||
| 726 | cached_verification_passed = True | ||
| 727 | else: | ||
| 728 | # Cache is stale, force re-verification | ||
| 729 | print(f" ⏰ Cache stale ({age_days} days old), re-verifying {commit[:12]}...") | ||
| 730 | # Fall through to re-verify | ||
| 731 | except Exception: | ||
| 732 | # Can't parse timestamp, force re-verification | ||
| 733 | pass | ||
| 734 | else: | ||
| 735 | # No timestamp, but still need branch detection | ||
| 736 | cached_verification_passed = True | ||
| 737 | |||
| 738 | # Legacy cache format fallback | ||
| 739 | if cache_key in VERIFY_COMMIT_CACHE and VERIFY_COMMIT_CACHE[cache_key]: | ||
| 740 | # Migrate to v2 format during this check | ||
| 741 | now = datetime.now(timezone.utc).isoformat() | ||
| 742 | VERIFY_COMMIT_CACHE_V2[cache_key] = { | ||
| 743 | "verified": True, | ||
| 744 | "first_verified": now, | ||
| 745 | "last_checked": now, | ||
| 746 | "fetch_method": "cached" | ||
| 747 | } | ||
| 748 | # Don't return early - continue to branch detection | ||
| 749 | cached_verification_passed = True | ||
| 750 | |||
| 751 | # Ensure repo is initialized (this creates the lock too) | ||
| 752 | repo_dir = _verify_repo_dir(vcs_url) | ||
| 753 | |||
| 754 | # Now safely get the lock (guaranteed to exist after _verify_repo_dir returns) | ||
| 755 | lock = VERIFY_REPO_LOCKS[vcs_url] | ||
| 756 | |||
| 757 | with lock: | ||
| 758 | # Double-check cache after acquiring lock (another thread may have verified while we waited) | ||
| 759 | if key in VERIFY_RESULTS: | ||
| 760 | return VERIFY_RESULTS[key] | ||
| 761 | |||
| 762 | env = os.environ.copy() | ||
| 763 | env.setdefault("GIT_TERMINAL_PROMPT", "0") | ||
| 764 | env.setdefault("GIT_ASKPASS", "true") | ||
| 765 | |||
| 766 | def _commit_exists(check_commit: str = None) -> bool: | ||
| 767 | """Check if a commit exists in the local repo.""" | ||
| 768 | target = check_commit if check_commit else commit | ||
| 769 | try: | ||
| 770 | subprocess.run( | ||
| 771 | ["git", "rev-parse", "--verify", f"{target}^{{commit}}"], | ||
| 772 | cwd=str(repo_dir), | ||
| 773 | check=True, | ||
| 774 | capture_output=True, | ||
| 775 | env=env, | ||
| 776 | ) | ||
| 777 | return True | ||
| 778 | except subprocess.CalledProcessError: | ||
| 779 | return False | ||
| 780 | |||
| 781 | global VERIFY_COMMIT_CACHE_DIRTY, VERIFY_FALLBACK_COMMITS | ||
| 782 | cached = VERIFY_COMMIT_CACHE.get(cache_key) | ||
| 783 | |||
| 784 | commit_present = _commit_exists() | ||
| 785 | if cached and not commit_present: | ||
| 786 | # Cached entry without a local commit indicates stale data; drop it. | ||
| 787 | VERIFY_COMMIT_CACHE.pop(cache_key, None) | ||
| 788 | VERIFY_COMMIT_CACHE_DIRTY = True | ||
| 789 | cached = None | ||
| 790 | |||
| 791 | # Only do shallow fetch if commit is not already present | ||
| 792 | # Doing --depth=1 on an already-full repo causes git to re-process history (very slow on large repos) | ||
| 793 | if not commit_present and ref_hint: | ||
| 794 | fetch_args = ["git", "fetch", "--depth=1", "origin", ref_hint] | ||
| 795 | |||
| 796 | try: | ||
| 797 | subprocess.run( | ||
| 798 | fetch_args, | ||
| 799 | cwd=str(repo_dir), | ||
| 800 | check=True, | ||
| 801 | capture_output=True, | ||
| 802 | text=True, | ||
| 803 | timeout=GIT_CMD_TIMEOUT, | ||
| 804 | env=env, | ||
| 805 | ) | ||
| 806 | except subprocess.TimeoutExpired: | ||
| 807 | print(f" ⚠️ git fetch timeout ({GIT_CMD_TIMEOUT}s) for {vcs_url} {ref_hint or ''}") | ||
| 808 | except subprocess.CalledProcessError as exc: | ||
| 809 | detail = (exc.stderr or exc.stdout or "").strip() if isinstance(exc.stderr, str) or isinstance(exc.stdout, str) else "" | ||
| 810 | if detail: | ||
| 811 | print(f" ⚠️ git fetch failed for {vcs_url} {ref_hint or ''}: {detail}") | ||
| 812 | # Continue to attempt direct commit fetch | ||
| 813 | |||
| 814 | # For pseudo-versions, we need to determine which branch contains the commit | ||
| 815 | # Strategy depends on whether this is a tagged version or pseudo-version | ||
| 816 | commit_fetched = commit_present # If already present, no need to fetch | ||
| 817 | |||
| 818 | if ref_hint and not commit_present: | ||
| 819 | # Tagged version: try shallow fetch of the specific commit (only if not already present) | ||
| 820 | try: | ||
| 821 | fetch_cmd = ["git", "fetch", "--depth=1", "origin", commit] | ||
| 822 | subprocess.run( | ||
| 823 | fetch_cmd, | ||
| 824 | cwd=str(repo_dir), | ||
| 825 | check=True, | ||
| 826 | capture_output=True, | ||
| 827 | text=True, | ||
| 828 | timeout=GIT_CMD_TIMEOUT, | ||
| 829 | env=env, | ||
| 830 | ) | ||
| 831 | commit_fetched = True | ||
| 832 | |||
| 833 | except subprocess.CalledProcessError as exc: | ||
| 834 | detail = (exc.stderr or exc.stdout or "").strip() if isinstance(exc.stderr, str) or isinstance(exc.stdout, str) else "" | ||
| 835 | if detail: | ||
| 836 | print(f" ⚠️ git fetch failed for {vcs_url[:50]}...: {detail[:100]}") | ||
| 837 | |||
| 838 | # If fetching commit failed for a tag, check if tag has moved | ||
| 839 | if ref_hint and ref_hint.startswith('refs/tags/'): | ||
| 840 | print(f" → Tag commit not fetchable, checking if tag moved...") | ||
| 841 | try: | ||
| 842 | # Try fetching the tag again to see what it currently points to | ||
| 843 | subprocess.run( | ||
| 844 | ["git", "fetch", "--depth=1", "origin", ref_hint], | ||
| 845 | cwd=str(repo_dir), | ||
| 846 | check=True, | ||
| 847 | capture_output=True, | ||
| 848 | text=True, | ||
| 849 | timeout=GIT_CMD_TIMEOUT, | ||
| 850 | env=env, | ||
| 851 | ) | ||
| 852 | |||
| 853 | # Check what commit the tag now points to | ||
| 854 | result = subprocess.run( | ||
| 855 | ["git", "rev-parse", "FETCH_HEAD"], | ||
| 856 | cwd=str(repo_dir), | ||
| 857 | capture_output=True, | ||
| 858 | text=True, | ||
| 859 | timeout=30, | ||
| 860 | env=env, | ||
| 861 | check=True, | ||
| 862 | ) | ||
| 863 | current_tag_commit = result.stdout.strip() | ||
| 864 | |||
| 865 | if current_tag_commit != commit: | ||
| 866 | print(f" ✓ Tag moved detected:") | ||
| 867 | print(f" Proxy gave us: {commit[:12]} (no longer exists)") | ||
| 868 | print(f" Tag now points to: {current_tag_commit[:12]}") | ||
| 869 | print(f" → Using current tag commit") | ||
| 870 | |||
| 871 | # Update module to use current commit | ||
| 872 | VERIFY_FALLBACK_COMMITS[(vcs_url, commit)] = current_tag_commit | ||
| 873 | return ('corrected', module_path, version, commit, current_tag_commit) | ||
| 874 | except subprocess.CalledProcessError: | ||
| 875 | # Can't fetch tag either - this is a real error | ||
| 876 | pass | ||
| 877 | |||
| 878 | for lock_file in ["shallow.lock", "index.lock", "HEAD.lock"]: | ||
| 879 | lock_path = repo_dir / lock_file | ||
| 880 | if lock_path.exists(): | ||
| 881 | try: | ||
| 882 | lock_path.unlink() | ||
| 883 | except Exception: | ||
| 884 | pass | ||
| 885 | VERIFY_RESULTS[key] = False | ||
| 886 | VERIFY_COMMIT_CACHE.pop(cache_key, None) | ||
| 887 | VERIFY_COMMIT_CACHE_DIRTY = True | ||
| 888 | return False | ||
| 889 | else: | ||
| 890 | # Pseudo-version: MUST do full clone to detect which branch contains commit | ||
| 891 | # Shallow fetch is useless - we need history for git for-each-ref --contains | ||
| 892 | |||
| 893 | # Check if we already fetched full history for this repo URL | ||
| 894 | # This prevents redundant full-history fetches for repos with multiple module versions | ||
| 895 | shallow_file = repo_dir / "shallow" | ||
| 896 | is_shallow = shallow_file.exists() | ||
| 897 | already_full = vcs_url in VERIFY_FULL_REPOS | ||
| 898 | |||
| 899 | if is_shallow and not already_full: | ||
| 900 | print(f" → Fetching full history for branch detection...") | ||
| 901 | try: | ||
| 902 | # Use --unshallow to convert shallow clone to full clone | ||
| 903 | subprocess.run( | ||
| 904 | ["git", "fetch", "--unshallow", "origin", "+refs/heads/*:refs/remotes/origin/*"], | ||
| 905 | cwd=str(repo_dir), | ||
| 906 | check=True, | ||
| 907 | capture_output=True, | ||
| 908 | text=True, | ||
| 909 | timeout=GIT_CMD_TIMEOUT * 5, | ||
| 910 | env=env, | ||
| 911 | ) | ||
| 912 | commit_fetched = True | ||
| 913 | # Mark this repo as having full history | ||
| 914 | VERIFY_FULL_REPOS.add(vcs_url) | ||
| 915 | except subprocess.TimeoutExpired: | ||
| 916 | print(f" ⚠️ Full clone timeout for {vcs_url[:50]}...") | ||
| 917 | for lock_file in ["shallow.lock", "index.lock", "HEAD.lock"]: | ||
| 918 | lock_path = repo_dir / lock_file | ||
| 919 | if lock_path.exists(): | ||
| 920 | try: | ||
| 921 | lock_path.unlink() | ||
| 922 | except Exception: | ||
| 923 | pass | ||
| 924 | VERIFY_RESULTS[key] = False | ||
| 925 | VERIFY_COMMIT_CACHE.pop(cache_key, None) | ||
| 926 | VERIFY_COMMIT_CACHE_DIRTY = True | ||
| 927 | return False | ||
| 928 | except subprocess.CalledProcessError as exc: | ||
| 929 | detail = (exc.stderr or exc.stdout or "").strip() if isinstance(exc.stderr, str) or isinstance(exc.stdout, str) else "" | ||
| 930 | if detail: | ||
| 931 | print(f" ⚠️ Full clone failed for {vcs_url[:50]}...: {detail[:100]}") | ||
| 932 | for lock_file in ["shallow.lock", "index.lock", "HEAD.lock"]: | ||
| 933 | lock_path = repo_dir / lock_file | ||
| 934 | if lock_path.exists(): | ||
| 935 | try: | ||
| 936 | lock_path.unlink() | ||
| 937 | except Exception: | ||
| 938 | pass | ||
| 939 | VERIFY_RESULTS[key] = False | ||
| 940 | VERIFY_COMMIT_CACHE.pop(cache_key, None) | ||
| 941 | VERIFY_COMMIT_CACHE_DIRTY = True | ||
| 942 | return False | ||
| 943 | else: | ||
| 944 | # Already full - just fetch updates | ||
| 945 | print(f" → Fetching updates (repo already full)...") | ||
| 946 | try: | ||
| 947 | subprocess.run( | ||
| 948 | ["git", "fetch", "origin", "+refs/heads/*:refs/remotes/origin/*"], | ||
| 949 | cwd=str(repo_dir), | ||
| 950 | check=True, | ||
| 951 | capture_output=True, | ||
| 952 | text=True, | ||
| 953 | timeout=GIT_CMD_TIMEOUT, | ||
| 954 | env=env, | ||
| 955 | ) | ||
| 956 | commit_fetched = True | ||
| 957 | except subprocess.TimeoutExpired: | ||
| 958 | print(f" ⚠️ Full clone timeout for {vcs_url[:50]}...") | ||
| 959 | for lock_file in ["shallow.lock", "index.lock", "HEAD.lock"]: | ||
| 960 | lock_path = repo_dir / lock_file | ||
| 961 | if lock_path.exists(): | ||
| 962 | try: | ||
| 963 | lock_path.unlink() | ||
| 964 | except Exception: | ||
| 965 | pass | ||
| 966 | VERIFY_RESULTS[key] = False | ||
| 967 | VERIFY_COMMIT_CACHE.pop(cache_key, None) | ||
| 968 | VERIFY_COMMIT_CACHE_DIRTY = True | ||
| 969 | return False | ||
| 970 | except subprocess.CalledProcessError as exc: | ||
| 971 | detail = (exc.stderr or exc.stdout or "").strip() if isinstance(exc.stderr, str) or isinstance(exc.stdout, str) else "" | ||
| 972 | if detail: | ||
| 973 | print(f" ⚠️ Full clone failed for {vcs_url[:50]}...: {detail[:100]}") | ||
| 974 | for lock_file in ["shallow.lock", "index.lock", "HEAD.lock"]: | ||
| 975 | lock_path = repo_dir / lock_file | ||
| 976 | if lock_path.exists(): | ||
| 977 | try: | ||
| 978 | lock_path.unlink() | ||
| 979 | except Exception: | ||
| 980 | pass | ||
| 981 | VERIFY_RESULTS[key] = False | ||
| 982 | VERIFY_COMMIT_CACHE.pop(cache_key, None) | ||
| 983 | VERIFY_COMMIT_CACHE_DIRTY = True | ||
| 984 | return False | ||
| 985 | |||
| 986 | # Use the original commit or fallback commit for verification | ||
| 987 | actual_commit = commit | ||
| 988 | |||
| 989 | if not _commit_exists(): | ||
| 990 | # Commit doesn't exist in repository - try fallback strategy | ||
| 991 | # This handles orphaned commits from proxy.golang.org | ||
| 992 | print(f" ⚠️ Commit {commit[:12]} not found in repository {vcs_url[:50]}...") | ||
| 993 | |||
| 994 | if not ref_hint: | ||
| 995 | # Pseudo-version without a tag - use timestamp-based fallback | ||
| 996 | print(f" → Attempting fallback commit strategy for pseudo-version {version}") | ||
| 997 | fallback_result = _find_fallback_commit(vcs_url, version, timestamp) | ||
| 998 | |||
| 999 | if fallback_result: | ||
| 1000 | fallback_commit, fallback_branch = fallback_result | ||
| 1001 | print(f" ⚠️ Using fallback: {fallback_commit[:12]} from branch '{fallback_branch}'") | ||
| 1002 | print(f" (Original commit {commit[:12]} from proxy.golang.org does not exist)") | ||
| 1003 | |||
| 1004 | # Update commit to use the fallback | ||
| 1005 | actual_commit = fallback_commit | ||
| 1006 | |||
| 1007 | # Track the fallback mapping so callers can use the fallback commit | ||
| 1008 | VERIFY_FALLBACK_COMMITS[(vcs_url, commit)] = fallback_commit | ||
| 1009 | |||
| 1010 | # Fetch the fallback commit (only unshallow if repo is still shallow) | ||
| 1011 | shallow_file = repo_dir / "shallow" | ||
| 1012 | is_shallow = shallow_file.exists() | ||
| 1013 | try: | ||
| 1014 | if is_shallow: | ||
| 1015 | subprocess.run( | ||
| 1016 | ["git", "fetch", "--unshallow", "origin", "+refs/heads/*:refs/remotes/origin/*"], | ||
| 1017 | cwd=str(repo_dir), | ||
| 1018 | check=True, | ||
| 1019 | capture_output=True, | ||
| 1020 | text=True, | ||
| 1021 | timeout=GIT_CMD_TIMEOUT * 5, | ||
| 1022 | env=env, | ||
| 1023 | ) | ||
| 1024 | else: | ||
| 1025 | # Repo already has full history - just fetch updates | ||
| 1026 | subprocess.run( | ||
| 1027 | ["git", "fetch", "origin", "+refs/heads/*:refs/remotes/origin/*"], | ||
| 1028 | cwd=str(repo_dir), | ||
| 1029 | check=True, | ||
| 1030 | capture_output=True, | ||
| 1031 | text=True, | ||
| 1032 | timeout=GIT_CMD_TIMEOUT, | ||
| 1033 | env=env, | ||
| 1034 | ) | ||
| 1035 | except Exception as e: | ||
| 1036 | print(f" ⚠️ Failed to fetch fallback commit: {e}") | ||
| 1037 | VERIFY_RESULTS[key] = False | ||
| 1038 | return False | ||
| 1039 | |||
| 1040 | # Register the fallback branch | ||
| 1041 | VERIFY_DETECTED_BRANCHES[(vcs_url, fallback_commit)] = fallback_branch | ||
| 1042 | |||
| 1043 | # Check if fallback commit exists | ||
| 1044 | if not _commit_exists(fallback_commit): | ||
| 1045 | print(f" ⚠️ Fallback commit {fallback_commit[:12]} also not found!") | ||
| 1046 | VERIFY_RESULTS[key] = False | ||
| 1047 | return False | ||
| 1048 | else: | ||
| 1049 | print(f" ⚠️ Could not determine fallback commit") | ||
| 1050 | VERIFY_RESULTS[key] = False | ||
| 1051 | return False | ||
| 1052 | else: | ||
| 1053 | # Tagged version with bad commit - this shouldn't happen but fail gracefully | ||
| 1054 | print(f" ⚠️ Tagged version {version} has invalid commit {commit[:12]}") | ||
| 1055 | VERIFY_RESULTS[key] = False | ||
| 1056 | return False | ||
| 1057 | |||
| 1058 | # Now verify the actual_commit (original or fallback) | ||
| 1059 | if _commit_exists(actual_commit): | ||
| 1060 | # Commit was fetched successfully - verify it's reachable from the ref_hint if provided | ||
| 1061 | # This ensures the commit is on the branch/tag we'll use in SRC_URI | ||
| 1062 | if ref_hint: | ||
| 1063 | # For tagged versions, verify the tag still points to the same commit | ||
| 1064 | # proxy.golang.org caches module@version->commit mappings, but tags can be force-pushed | ||
| 1065 | # If the tag has moved to a different commit, we need to use the current commit | ||
| 1066 | # Optimization: Use git ls-remote first (fast, cached) before fetching | ||
| 1067 | if ref_hint.startswith('refs/tags/'): | ||
| 1068 | try: | ||
| 1069 | # First check if tag has moved using fast ls-remote (cached) | ||
| 1070 | current_tag_commit = git_ls_remote(vcs_url, ref_hint) | ||
| 1071 | |||
| 1072 | if current_tag_commit and current_tag_commit != actual_commit: | ||
| 1073 | # Tag has moved - fetch it to verify and update local repo | ||
| 1074 | print(f" ⚠️ Tag has moved - proxy.golang.org cache is stale") | ||
| 1075 | print(f" Proxy gave us: {actual_commit[:12]}") | ||
| 1076 | print(f" Tag now points to: {current_tag_commit[:12]}") | ||
| 1077 | print(f" → Using current tag commit") | ||
| 1078 | |||
| 1079 | # Fetch the tag to update local repo | ||
| 1080 | subprocess.run( | ||
| 1081 | ["git", "fetch", "--depth=1", "origin", ref_hint], | ||
| 1082 | cwd=str(repo_dir), | ||
| 1083 | check=True, | ||
| 1084 | capture_output=True, | ||
| 1085 | text=True, | ||
| 1086 | timeout=GIT_CMD_TIMEOUT, | ||
| 1087 | env=env, | ||
| 1088 | ) | ||
| 1089 | |||
| 1090 | # Update to use current commit | ||
| 1091 | VERIFY_FALLBACK_COMMITS[(vcs_url, actual_commit)] = current_tag_commit | ||
| 1092 | actual_commit = current_tag_commit | ||
| 1093 | |||
| 1094 | # Verify the new commit exists (it should, since we just fetched it) | ||
| 1095 | if not _commit_exists(current_tag_commit): | ||
| 1096 | print(f" ⚠️ Current tag commit {current_tag_commit[:12]} not found!") | ||
| 1097 | VERIFY_RESULTS[key] = False | ||
| 1098 | VERIFY_COMMIT_CACHE.pop(cache_key, None) | ||
| 1099 | VERIFY_COMMIT_CACHE_DIRTY = True | ||
| 1100 | return False | ||
| 1101 | |||
| 1102 | # The VERIFY_FALLBACK_COMMITS mapping will be used by the caller | ||
| 1103 | # Continue with verification using the corrected commit | ||
| 1104 | except Exception as e: | ||
| 1105 | # Tag verification failed - continue with normal flow | ||
| 1106 | print(f" ⚠️ Could not verify tag target: {e}") | ||
| 1107 | pass | ||
| 1108 | |||
| 1109 | try: | ||
| 1110 | # Check if commit is an ancestor of (or equal to) the ref | ||
| 1111 | # This works even with shallow clones | ||
| 1112 | result = subprocess.run( | ||
| 1113 | ["git", "merge-base", "--is-ancestor", actual_commit, "FETCH_HEAD"], | ||
| 1114 | cwd=str(repo_dir), | ||
| 1115 | capture_output=True, | ||
| 1116 | text=True, | ||
| 1117 | timeout=30, | ||
| 1118 | env=env, | ||
| 1119 | ) | ||
| 1120 | if result.returncode != 0: | ||
| 1121 | # Commit is not an ancestor of the ref - might be on a different branch | ||
| 1122 | # This is OK - BitBake can still fetch the commit directly | ||
| 1123 | # Just log it for debugging | ||
| 1124 | pass # Don't fail - commit exists and is fetchable | ||
| 1125 | except subprocess.TimeoutExpired: | ||
| 1126 | print(f" ⚠️ Timeout checking commit ancestry for {actual_commit[:12]}") | ||
| 1127 | # Don't fail - commit exists | ||
| 1128 | except subprocess.CalledProcessError: | ||
| 1129 | # merge-base failed - don't fail verification | ||
| 1130 | pass | ||
| 1131 | else: | ||
| 1132 | # For pseudo-versions, we MUST detect which branch contains the commit | ||
| 1133 | # This is CRITICAL - BitBake cannot fetch arbitrary commits with nobranch=1 | ||
| 1134 | # We need branch=<name> in SRC_URI for interior commits | ||
| 1135 | |||
| 1136 | # Check if we already have the branch from fallback | ||
| 1137 | if (vcs_url, actual_commit) not in VERIFY_DETECTED_BRANCHES: | ||
| 1138 | # Now that we have full history, use git to find which branches contain this commit | ||
| 1139 | try: | ||
| 1140 | result = subprocess.run( | ||
| 1141 | ["git", "for-each-ref", "--contains", actual_commit, "refs/remotes/origin/", "--format=%(refname:short)"], | ||
| 1142 | cwd=str(repo_dir), | ||
| 1143 | capture_output=True, | ||
| 1144 | text=True, | ||
| 1145 | timeout=30, | ||
| 1146 | env=env, | ||
| 1147 | ) | ||
| 1148 | if result.returncode == 0 and result.stdout.strip(): | ||
| 1149 | # Commit IS on one or more branches | ||
| 1150 | branches = result.stdout.strip().split('\n') | ||
| 1151 | # Strip 'origin/' prefix from branch names | ||
| 1152 | branches = [b.replace('origin/', '') for b in branches] | ||
| 1153 | |||
| 1154 | # Pick main/master if available, otherwise first branch | ||
| 1155 | if 'main' in branches: | ||
| 1156 | detected_branch = 'main' | ||
| 1157 | elif 'master' in branches: | ||
| 1158 | detected_branch = 'master' | ||
| 1159 | else: | ||
| 1160 | detected_branch = branches[0] | ||
| 1161 | |||
| 1162 | VERIFY_DETECTED_BRANCHES[(vcs_url, actual_commit)] = detected_branch | ||
| 1163 | print(f" → Detected branch: {detected_branch} (verified with git for-each-ref)") | ||
| 1164 | else: | ||
| 1165 | # Commit exists but not in any branch - it's orphaned/dangling | ||
| 1166 | # For pseudo-versions, try fallback strategy | ||
| 1167 | # DEBUG: ALWAYS print this to confirm we reach this block | ||
| 1168 | print(f" ⚠️ ORPHANED: Commit {actual_commit[:12]} not found in any branch for {vcs_url[:50]}") | ||
| 1169 | print(f" DEBUG-ORPHANED: ref_hint={ref_hint}, actual_commit={actual_commit[:12]}, commit={commit[:12]}, version={version}") | ||
| 1170 | print(f" DEBUG-ORPHANED: Condition: (not ref_hint)={not ref_hint}, (actual==commit)={actual_commit == commit}") | ||
| 1171 | |||
| 1172 | if not ref_hint and actual_commit == commit: | ||
| 1173 | # This is a pseudo-version with orphaned commit - try fallback | ||
| 1174 | print(f" → Attempting fallback commit strategy for orphaned commit") | ||
| 1175 | fallback_result = _find_fallback_commit(vcs_url, version, timestamp) | ||
| 1176 | |||
| 1177 | if fallback_result: | ||
| 1178 | fallback_commit, fallback_branch = fallback_result | ||
| 1179 | print(f" ✓ Using fallback: {fallback_commit[:12]} from branch '{fallback_branch}'") | ||
| 1180 | print(f" (Original commit {commit[:12]} from proxy.golang.org is orphaned)") | ||
| 1181 | |||
| 1182 | # Update to use the fallback | ||
| 1183 | actual_commit = fallback_commit | ||
| 1184 | VERIFY_FALLBACK_COMMITS[(vcs_url, commit)] = fallback_commit | ||
| 1185 | VERIFY_DETECTED_BRANCHES[(vcs_url, fallback_commit)] = fallback_branch | ||
| 1186 | |||
| 1187 | # Verify fallback commit exists | ||
| 1188 | if not _commit_exists(fallback_commit): | ||
| 1189 | print(f" ⚠️ Fallback commit {fallback_commit[:12]} not found!") | ||
| 1190 | VERIFY_RESULTS[key] = False | ||
| 1191 | return False | ||
| 1192 | # Continue with fallback commit - don't fail here | ||
| 1193 | else: | ||
| 1194 | print(f" ⚠️ Could not determine fallback commit") | ||
| 1195 | VERIFY_RESULTS[key] = False | ||
| 1196 | return False | ||
| 1197 | else: | ||
| 1198 | # Tagged version or already tried fallback - fail | ||
| 1199 | VERIFY_RESULTS[key] = False | ||
| 1200 | return False | ||
| 1201 | except subprocess.TimeoutExpired: | ||
| 1202 | print(f" ⚠️ Branch detection timeout for {actual_commit[:12]}") | ||
| 1203 | VERIFY_RESULTS[key] = False | ||
| 1204 | return False | ||
| 1205 | except subprocess.CalledProcessError: | ||
| 1206 | print(f" ⚠️ Failed to detect branch for {actual_commit[:12]}") | ||
| 1207 | VERIFY_RESULTS[key] = False | ||
| 1208 | return False | ||
| 1209 | |||
| 1210 | |||
| 1211 | # Commit exists AND is reachable - safe for BitBake nobranch=1 | ||
| 1212 | # Only save to cache if not already cached (branch detection is done, just finalize) | ||
| 1213 | if not cached_verification_passed: | ||
| 1214 | # Save with timestamp in v2 format | ||
| 1215 | now = datetime.now(timezone.utc).isoformat() | ||
| 1216 | existing_entry = VERIFY_COMMIT_CACHE_V2.get(cache_key, {}) | ||
| 1217 | |||
| 1218 | VERIFY_COMMIT_CACHE_V2[cache_key] = { | ||
| 1219 | "verified": True, | ||
| 1220 | "first_verified": existing_entry.get("first_verified", now), | ||
| 1221 | "last_checked": now, | ||
| 1222 | "fetch_method": "fetch" # Successfully fetched from upstream | ||
| 1223 | } | ||
| 1224 | VERIFY_COMMIT_CACHE_DIRTY = True | ||
| 1225 | |||
| 1226 | VERIFY_RESULTS[key] = True | ||
| 1227 | return True | ||
| 1228 | VERIFY_RESULTS[key] = False | ||
| 1229 | # Remove from both caches | ||
| 1230 | VERIFY_COMMIT_CACHE.pop(cache_key, None) | ||
| 1231 | VERIFY_COMMIT_CACHE_V2.pop(cache_key, None) | ||
| 1232 | VERIFY_COMMIT_CACHE_DIRTY = True | ||
| 1233 | return False | ||
| 1234 | |||
| 1235 | |||
| 1236 | def get_actual_commit(vcs_url: str, commit: str) -> str: | ||
| 1237 | """ | ||
| 1238 | Get the actual commit to use, applying fallback if original commit doesn't exist. | ||
| 1239 | |||
| 1240 | This should be called after verify_commit_accessible() to get the commit that was | ||
| 1241 | actually verified (which may be a fallback if the original didn't exist). | ||
| 1242 | |||
| 1243 | Args: | ||
| 1244 | vcs_url: Repository URL | ||
| 1245 | commit: Original commit hash from proxy.golang.org | ||
| 1246 | |||
| 1247 | Returns: | ||
| 1248 | Fallback commit if one was used, otherwise the original commit | ||
| 1249 | """ | ||
| 1250 | return VERIFY_FALLBACK_COMMITS.get((vcs_url, commit), commit) | ||
| 1251 | |||
| 1252 | |||
| 1253 | def _ref_points_to_commit(vcs_url: str, ref_hint: str, commit_hash: str) -> bool: | ||
| 1254 | if not ref_hint: | ||
| 1255 | return False | ||
| 1256 | |||
| 1257 | repo_dir = _verify_repo_dir(vcs_url) | ||
| 1258 | # Lock is guaranteed to exist after _verify_repo_dir returns | ||
| 1259 | lock = VERIFY_REPO_LOCKS[vcs_url] | ||
| 1260 | |||
| 1261 | with lock: | ||
| 1262 | env = os.environ.copy() | ||
| 1263 | env.setdefault("GIT_TERMINAL_PROMPT", "0") | ||
| 1264 | env.setdefault("GIT_ASKPASS", "true") | ||
| 1265 | |||
| 1266 | try: | ||
| 1267 | result = subprocess.run( | ||
| 1268 | ["git", "show-ref", "--verify", "--hash", ref_hint], | ||
| 1269 | cwd=str(repo_dir), | ||
| 1270 | check=True, | ||
| 1271 | capture_output=True, | ||
| 1272 | text=True, | ||
| 1273 | env=env, | ||
| 1274 | ) | ||
| 1275 | resolved = result.stdout.strip().lower() | ||
| 1276 | return resolved == commit_hash.lower() | ||
| 1277 | except subprocess.CalledProcessError: | ||
| 1278 | return False | ||
| 1279 | |||
| 1280 | |||
| 1281 | def correct_commit_hash_from_ref(vcs_url: str, vcs_hash: str, vcs_ref: str) -> Optional[str]: | ||
| 1282 | """ | ||
| 1283 | Fix proxy.golang.org bad hashes by dereferencing the tag to get the correct commit. | ||
| 1284 | |||
| 1285 | proxy.golang.org sometimes returns commits that: | ||
| 1286 | 1. Exist in the repo but aren't branch/tag HEADs (dangling commits) | ||
| 1287 | 2. Don't exist in the repo at all | ||
| 1288 | |||
| 1289 | BitBake's nobranch=1 requires commits to be HEADs of branches or dereferenced tags. | ||
| 1290 | |||
| 1291 | Args: | ||
| 1292 | vcs_url: Repository URL | ||
| 1293 | vcs_hash: Commit hash from proxy.golang.org (potentially bad) | ||
| 1294 | vcs_ref: Git ref like "refs/tags/v1.2.3" | ||
| 1295 | |||
| 1296 | Returns: | ||
| 1297 | Corrected commit hash if different from vcs_hash, None if vcs_hash is correct or can't be corrected | ||
| 1298 | """ | ||
| 1299 | if not vcs_ref or not vcs_ref.startswith("refs/"): | ||
| 1300 | return None | ||
| 1301 | |||
| 1302 | # Try dereferenced tag first (annotated tags) | ||
| 1303 | dereferenced_hash = git_ls_remote(vcs_url, f"{vcs_ref}^{{}}") | ||
| 1304 | if dereferenced_hash and dereferenced_hash.lower() != vcs_hash.lower(): | ||
| 1305 | return dereferenced_hash.lower() | ||
| 1306 | |||
| 1307 | # Try without ^{} for lightweight tags | ||
| 1308 | commit_hash = git_ls_remote(vcs_url, vcs_ref) | ||
| 1309 | if commit_hash and commit_hash.lower() != vcs_hash.lower(): | ||
| 1310 | return commit_hash.lower() | ||
| 1311 | |||
| 1312 | return None | ||
| 1313 | |||
| 1314 | |||
| 1315 | def is_commit_bitbake_fetchable(vcs_url: str, vcs_hash: str, vcs_ref: str) -> bool: | ||
| 1316 | """ | ||
| 1317 | Check if a commit is BitBake-fetchable (is a branch/tag HEAD). | ||
| 1318 | |||
| 1319 | BitBake's nobranch=1 requires commits to be: | ||
| 1320 | - HEAD of a branch (refs/heads/*) | ||
| 1321 | - HEAD of a dereferenced tag (refs/tags/*^{}) | ||
| 1322 | |||
| 1323 | Uses cached git ls-remote to check if the commit appears in the remote repository as a ref HEAD. | ||
| 1324 | |||
| 1325 | Args: | ||
| 1326 | vcs_url: Repository URL | ||
| 1327 | vcs_hash: Commit hash to check | ||
| 1328 | vcs_ref: Git ref hint like "refs/tags/v1.2.3" | ||
| 1329 | |||
| 1330 | Returns: | ||
| 1331 | True if commit is a branch/tag HEAD, False if dangling/not found | ||
| 1332 | """ | ||
| 1333 | # Quick check: Does the ref point to this commit? | ||
| 1334 | if vcs_ref and vcs_ref.startswith("refs/"): | ||
| 1335 | # Try dereferenced tag (annotated) | ||
| 1336 | ref_commit = git_ls_remote(vcs_url, f"{vcs_ref}^{{}}") | ||
| 1337 | if ref_commit and ref_commit.lower() == vcs_hash.lower(): | ||
| 1338 | return True | ||
| 1339 | |||
| 1340 | # Try without ^{} for lightweight tags | ||
| 1341 | ref_commit = git_ls_remote(vcs_url, vcs_ref) | ||
| 1342 | if ref_commit and ref_commit.lower() == vcs_hash.lower(): | ||
| 1343 | return True | ||
| 1344 | |||
| 1345 | # If we get here, the vcs_hash doesn't match the ref, so it's dangling | ||
| 1346 | return False | ||
| 1347 | |||
| 1348 | |||
| 1349 | def verify_gomodcache_commits(gomodcache_path: Path, verify_jobs: int = 10) -> int: | ||
| 1350 | """ | ||
| 1351 | Verify commits in GOMODCACHE .info files still exist in repositories. | ||
| 1352 | |||
| 1353 | Detects force-pushed tags where proxy.golang.org has stale commit hashes. | ||
| 1354 | Offers to automatically refresh stale .info files by re-downloading. | ||
| 1355 | |||
| 1356 | Returns: | ||
| 1357 | 0 if all commits valid or successfully refreshed | ||
| 1358 | 1 if stale commits found and user declined refresh | ||
| 1359 | """ | ||
| 1360 | global VERIFY_ENABLED | ||
| 1361 | VERIFY_ENABLED = True | ||
| 1362 | |||
| 1363 | if isinstance(gomodcache_path, str): | ||
| 1364 | gomodcache_path = Path(gomodcache_path) | ||
| 1365 | |||
| 1366 | if not gomodcache_path.exists(): | ||
| 1367 | print(f"❌ GOMODCACHE not found: {gomodcache_path}") | ||
| 1368 | return 1 | ||
| 1369 | |||
| 1370 | download_dir = gomodcache_path / "cache" / "download" | ||
| 1371 | if not download_dir.exists(): | ||
| 1372 | print(f"❌ Download directory not found: {download_dir}") | ||
| 1373 | return 1 | ||
| 1374 | |||
| 1375 | print(f"\nScanning {download_dir} for .info files...") | ||
| 1376 | |||
| 1377 | # Collect all modules with VCS info | ||
| 1378 | modules_to_check = [] | ||
| 1379 | for dirpath, _, filenames in os.walk(download_dir): | ||
| 1380 | path_parts = Path(dirpath).relative_to(download_dir).parts | ||
| 1381 | if not path_parts or path_parts[-1] != '@v': | ||
| 1382 | continue | ||
| 1383 | |||
| 1384 | module_path = '/'.join(path_parts[:-1]) | ||
| 1385 | module_path = unescape_module_path(module_path) | ||
| 1386 | |||
| 1387 | for filename in filenames: | ||
| 1388 | if not filename.endswith('.info'): | ||
| 1389 | continue | ||
| 1390 | |||
| 1391 | version = filename[:-5] | ||
| 1392 | info_path = Path(dirpath) / filename | ||
| 1393 | |||
| 1394 | try: | ||
| 1395 | with open(info_path) as f: | ||
| 1396 | info = json.load(f) | ||
| 1397 | |||
| 1398 | origin = info.get('Origin', {}) | ||
| 1399 | vcs_url = origin.get('URL') | ||
| 1400 | vcs_hash = origin.get('Hash') | ||
| 1401 | vcs_ref = origin.get('Ref', '') | ||
| 1402 | |||
| 1403 | if vcs_url and vcs_hash and len(vcs_hash) == 40: | ||
| 1404 | modules_to_check.append({ | ||
| 1405 | 'module_path': module_path, | ||
| 1406 | 'version': version, | ||
| 1407 | 'vcs_url': vcs_url, | ||
| 1408 | 'vcs_hash': vcs_hash, | ||
| 1409 | 'vcs_ref': vcs_ref, | ||
| 1410 | 'info_path': info_path | ||
| 1411 | }) | ||
| 1412 | except Exception as e: | ||
| 1413 | print(f" ⚠️ Error reading {info_path}: {e}") | ||
| 1414 | |||
| 1415 | print(f"Found {len(modules_to_check)} modules with VCS metadata to verify\n") | ||
| 1416 | |||
| 1417 | if not modules_to_check: | ||
| 1418 | print("✅ No modules to verify") | ||
| 1419 | return 0 | ||
| 1420 | |||
| 1421 | # Verify commits in parallel | ||
| 1422 | stale_modules = [] | ||
| 1423 | |||
| 1424 | def check_module(module): | ||
| 1425 | if verify_commit_accessible(module['vcs_url'], module['vcs_hash'], module['vcs_ref'], module.get('version', '')): | ||
| 1426 | return None | ||
| 1427 | else: | ||
| 1428 | return module | ||
| 1429 | |||
| 1430 | if verify_jobs > 0: | ||
| 1431 | print(f"Verifying commits in parallel ({verify_jobs} workers)...") | ||
| 1432 | with ThreadPoolExecutor(max_workers=verify_jobs) as executor: | ||
| 1433 | futures = {executor.submit(check_module, m): m for m in modules_to_check} | ||
| 1434 | for future in futures: | ||
| 1435 | result = future.result() | ||
| 1436 | if result: | ||
| 1437 | stale_modules.append(result) | ||
| 1438 | else: | ||
| 1439 | print("Verifying commits sequentially...") | ||
| 1440 | for module in modules_to_check: | ||
| 1441 | result = check_module(module) | ||
| 1442 | if result: | ||
| 1443 | stale_modules.append(result) | ||
| 1444 | |||
| 1445 | if not stale_modules: | ||
| 1446 | print(f"\n✅ All {len(modules_to_check)} commits verified successfully!") | ||
| 1447 | return 0 | ||
| 1448 | |||
| 1449 | # Report stale modules | ||
| 1450 | print(f"\n⚠️ Found {len(stale_modules)} modules with STALE commits:\n") | ||
| 1451 | for module in stale_modules[:10]: # Show first 10 | ||
| 1452 | print(f" {module['module_path']}@{module['version']}") | ||
| 1453 | print(f" Commit: {module['vcs_hash'][:12]} (not found in {module['vcs_url']})") | ||
| 1454 | print(f" File: {module['info_path']}") | ||
| 1455 | print() | ||
| 1456 | |||
| 1457 | if len(stale_modules) > 10: | ||
| 1458 | print(f" ... and {len(stale_modules) - 10} more\n") | ||
| 1459 | |||
| 1460 | # Offer to auto-refresh | ||
| 1461 | print("These commits likely represent force-pushed tags.") | ||
| 1462 | print("The .info files can be refreshed by re-downloading from proxy.golang.org\n") | ||
| 1463 | |||
| 1464 | response = input("Refresh stale .info files automatically? [y/N]: ").strip().lower() | ||
| 1465 | if response not in ('y', 'yes'): | ||
| 1466 | print("\nNo action taken. To fix manually:") | ||
| 1467 | print(" 1. Delete stale .info files") | ||
| 1468 | print(" 2. Run: go mod download") | ||
| 1469 | return 1 | ||
| 1470 | |||
| 1471 | # Refresh stale modules | ||
| 1472 | print("\nRefreshing stale modules...") | ||
| 1473 | refreshed = 0 | ||
| 1474 | failed = [] | ||
| 1475 | |||
| 1476 | for module in stale_modules: | ||
| 1477 | print(f"\n Refreshing {module['module_path']}@{module['version']}...") | ||
| 1478 | |||
| 1479 | try: | ||
| 1480 | # Delete stale .info file | ||
| 1481 | module['info_path'].unlink() | ||
| 1482 | print(f" Deleted stale .info") | ||
| 1483 | |||
| 1484 | # Re-download | ||
| 1485 | result = subprocess.run( | ||
| 1486 | ['go', 'mod', 'download', f"{module['module_path']}@{module['version']}"], | ||
| 1487 | capture_output=True, | ||
| 1488 | text=True, | ||
| 1489 | timeout=60 | ||
| 1490 | ) | ||
| 1491 | |||
| 1492 | if result.returncode == 0 and module['info_path'].exists(): | ||
| 1493 | # Verify new commit | ||
| 1494 | with open(module['info_path']) as f: | ||
| 1495 | new_info = json.load(f) | ||
| 1496 | new_hash = new_info.get('Origin', {}).get('Hash', '') | ||
| 1497 | |||
| 1498 | if new_hash and new_hash != module['vcs_hash']: | ||
| 1499 | print(f" ✓ Refreshed: {module['vcs_hash'][:12]} → {new_hash[:12]}") | ||
| 1500 | refreshed += 1 | ||
| 1501 | else: | ||
| 1502 | print(f" ⚠️ Proxy returned same commit") | ||
| 1503 | failed.append(module) | ||
| 1504 | else: | ||
| 1505 | print(f" ❌ Download failed: {result.stderr[:100]}") | ||
| 1506 | failed.append(module) | ||
| 1507 | except Exception as e: | ||
| 1508 | print(f" ❌ Error: {e}") | ||
| 1509 | failed.append(module) | ||
| 1510 | |||
| 1511 | print(f"\n{'='*70}") | ||
| 1512 | print(f"Refresh complete: {refreshed} refreshed, {len(failed)} failed") | ||
| 1513 | |||
| 1514 | if failed: | ||
| 1515 | print(f"\nFailed modules require manual intervention:") | ||
| 1516 | for module in failed[:5]: | ||
| 1517 | print(f" {module['module_path']}@{module['version']}") | ||
| 1518 | return 1 | ||
| 1519 | |||
| 1520 | return 0 | ||
| 1521 | |||
| 1522 | |||
| 1523 | def is_module_actually_needed(module_path: str, source_dir: Path) -> bool: | ||
| 1524 | """ | ||
| 1525 | Check if a module is actually used by running 'go mod why'. | ||
| 1526 | |||
| 1527 | Returns: | ||
| 1528 | True if module is needed by the main module | ||
| 1529 | False if module is indirect-only and not actually imported | ||
| 1530 | """ | ||
| 1531 | try: | ||
| 1532 | result = subprocess.run( | ||
| 1533 | ['go', 'mod', 'why', module_path], | ||
| 1534 | cwd=str(source_dir), | ||
| 1535 | capture_output=True, | ||
| 1536 | text=True, | ||
| 1537 | timeout=30 | ||
| 1538 | ) | ||
| 1539 | |||
| 1540 | if result.returncode != 0: | ||
| 1541 | # If go mod why fails, assume it's needed (conservative) | ||
| 1542 | return True | ||
| 1543 | |||
| 1544 | output = result.stdout.strip() | ||
| 1545 | |||
| 1546 | # Check for the telltale sign that module is not needed | ||
| 1547 | if "(main module does not need package" in output: | ||
| 1548 | return False | ||
| 1549 | |||
| 1550 | # Also check for completely empty output (module not in graph) | ||
| 1551 | if not output or output == f"# {module_path}": | ||
| 1552 | return False | ||
| 1553 | |||
| 1554 | # Module is needed | ||
| 1555 | return True | ||
| 1556 | |||
| 1557 | except Exception: | ||
| 1558 | # On error, assume needed (conservative) | ||
| 1559 | return True | ||
| 1560 | |||
| 1561 | |||
| 1562 | def _execute(args: argparse.Namespace) -> int: | ||
| 1563 | global CURRENT_SOURCE_DIR, CURRENT_GOMODCACHE, VERIFY_COMMIT_CACHE_DIRTY | ||
| 1564 | debug_limit = args.debug_limit | ||
| 1565 | |||
| 1566 | if args.source_dir: | ||
| 1567 | source_dir = Path(args.source_dir).resolve() | ||
| 1568 | else: | ||
| 1569 | source_dir = Path.cwd() | ||
| 1570 | CURRENT_SOURCE_DIR = source_dir | ||
| 1571 | |||
| 1572 | if not (source_dir / "go.mod").exists(): | ||
| 1573 | print(f"❌ Error: go.mod not found in {source_dir}") | ||
| 1574 | return 1 | ||
| 1575 | |||
| 1576 | print(f"Source directory: {source_dir}") | ||
| 1577 | |||
| 1578 | if args.recipedir: | ||
| 1579 | output_dir = Path(args.recipedir).resolve() | ||
| 1580 | output_dir.mkdir(parents=True, exist_ok=True) | ||
| 1581 | print(f"Output directory: {output_dir}") | ||
| 1582 | else: | ||
| 1583 | output_dir = None | ||
| 1584 | if not args.validate and not args.dry_run: | ||
| 1585 | print("❌ Error: --recipedir is required unless running with --validate, --dry-run, or cache-maintenance flags.") | ||
| 1586 | return 1 | ||
| 1587 | |||
| 1588 | configure_cache_paths(args.cache_dir, args.clone_cache_dir) | ||
| 1589 | if args.cache_dir: | ||
| 1590 | print(f"Metadata cache directory: {CACHE_BASE_DIR}") | ||
| 1591 | if args.clone_cache_dir: | ||
| 1592 | print(f"Clone cache directory: {CLONE_CACHE_DIR}") | ||
| 1593 | |||
| 1594 | # Set verification cache max age from command line | ||
| 1595 | global MODULE_REPO_OVERRIDES_DIRTY, VERIFY_CACHE_MAX_AGE_DAYS | ||
| 1596 | VERIFY_CACHE_MAX_AGE_DAYS = args.verify_cache_max_age | ||
| 1597 | if VERIFY_CACHE_MAX_AGE_DAYS == 0: | ||
| 1598 | print(f"Verification cache: DISABLED (always verify)") | ||
| 1599 | else: | ||
| 1600 | print(f"Verification cache max age: {VERIFY_CACHE_MAX_AGE_DAYS} days") | ||
| 1601 | |||
| 1602 | if args.clear_repo: | ||
| 1603 | for (module_spec,) in args.clear_repo: | ||
| 1604 | module_path, version = _parse_override_spec(module_spec) | ||
| 1605 | removed = False | ||
| 1606 | try: | ||
| 1607 | key = _normalise_override_key(module_path, version) | ||
| 1608 | except ValueError as exc: | ||
| 1609 | print(f"Invalid module override '{module_spec}': {exc}") | ||
| 1610 | continue | ||
| 1611 | if version is not None: | ||
| 1612 | if MODULE_REPO_OVERRIDES.pop(key, None) is not None: | ||
| 1613 | removed = True | ||
| 1614 | MODULE_REPO_OVERRIDES_DIRTY = True | ||
| 1615 | print(f"Cleared repo override: {module_path}@{version}") | ||
| 1616 | else: | ||
| 1617 | wildcard_key = key | ||
| 1618 | if MODULE_REPO_OVERRIDES.pop(wildcard_key, None) is not None: | ||
| 1619 | removed = True | ||
| 1620 | specific_keys = [ | ||
| 1621 | candidate for candidate in list(MODULE_REPO_OVERRIDES.keys()) | ||
| 1622 | if candidate[0] == module_path and candidate[1] is not None | ||
| 1623 | ] | ||
| 1624 | for candidate in specific_keys: | ||
| 1625 | MODULE_REPO_OVERRIDES.pop(candidate, None) | ||
| 1626 | removed = True | ||
| 1627 | if removed: | ||
| 1628 | MODULE_REPO_OVERRIDES_DIRTY = True | ||
| 1629 | print(f"Cleared repo overrides for: {module_path}") | ||
| 1630 | if not removed: | ||
| 1631 | if version is not None: | ||
| 1632 | print(f"No repo override found for: {module_path}@{version}") | ||
| 1633 | else: | ||
| 1634 | print(f"No repo overrides found for: {module_path}") | ||
| 1635 | |||
| 1636 | if args.set_repo: | ||
| 1637 | for module_spec, repo_url in args.set_repo: | ||
| 1638 | module_path, version = _parse_override_spec(module_spec) | ||
| 1639 | try: | ||
| 1640 | key = _normalise_override_key(module_path, version) | ||
| 1641 | except ValueError as exc: | ||
| 1642 | print(f"Invalid module override '{module_spec}': {exc}") | ||
| 1643 | continue | ||
| 1644 | MODULE_REPO_OVERRIDES[key] = repo_url | ||
| 1645 | MODULE_REPO_OVERRIDES_DIRTY = True | ||
| 1646 | label = f"{module_path}@{version}" if version else module_path | ||
| 1647 | print(f"Pinned repo override: {label} -> {repo_url}") | ||
| 1648 | |||
| 1649 | if args.clear_commit: | ||
| 1650 | for repo, commit in args.clear_commit: | ||
| 1651 | key = f"{repo}|||{commit}" | ||
| 1652 | if key in VERIFY_COMMIT_CACHE: | ||
| 1653 | VERIFY_COMMIT_CACHE.pop(key, None) | ||
| 1654 | VERIFY_COMMIT_CACHE_DIRTY = True | ||
| 1655 | print(f"\n🧹 Cleared cached verification: {repo} {commit}\n") | ||
| 1656 | else: | ||
| 1657 | print(f"No cached verification found for: {repo} {commit}") | ||
| 1658 | VERIFY_RESULTS.pop((repo, commit), None) | ||
| 1659 | |||
| 1660 | if args.inject_commit: | ||
| 1661 | for repo, commit in args.inject_commit: | ||
| 1662 | key = f"{repo}|||{commit}" | ||
| 1663 | VERIFY_COMMIT_CACHE[key] = True | ||
| 1664 | VERIFY_COMMIT_CACHE_DIRTY = True | ||
| 1665 | VERIFY_RESULTS[(repo, commit)] = True | ||
| 1666 | print(f"Injected verified commit: {repo} {commit}") | ||
| 1667 | |||
| 1668 | exit_code = 0 | ||
| 1669 | |||
| 1670 | if args.clean_ls_remote_cache: | ||
| 1671 | print("\n🗑️ Cleaning git ls-remote cache...") | ||
| 1672 | if LS_REMOTE_CACHE_PATH.exists(): | ||
| 1673 | LS_REMOTE_CACHE_PATH.unlink() | ||
| 1674 | print(f" Removed {LS_REMOTE_CACHE_PATH}") | ||
| 1675 | else: | ||
| 1676 | print(f" Cache file not found: {LS_REMOTE_CACHE_PATH}") | ||
| 1677 | args.clean_cache = True | ||
| 1678 | |||
| 1679 | if args.clean_cache: | ||
| 1680 | print("\n🗑️ Cleaning module metadata cache...") | ||
| 1681 | if MODULE_METADATA_CACHE_PATH.exists(): | ||
| 1682 | MODULE_METADATA_CACHE_PATH.unlink() | ||
| 1683 | print(f" Removed {MODULE_METADATA_CACHE_PATH}") | ||
| 1684 | else: | ||
| 1685 | print(f" Cache file not found: {MODULE_METADATA_CACHE_PATH}") | ||
| 1686 | if VERIFY_COMMIT_CACHE_PATH.exists(): | ||
| 1687 | VERIFY_COMMIT_CACHE_PATH.unlink() | ||
| 1688 | print(f" Removed {VERIFY_COMMIT_CACHE_PATH}") | ||
| 1689 | VERIFY_COMMIT_CACHE.clear() | ||
| 1690 | VERIFY_COMMIT_CACHE_DIRTY = False | ||
| 1691 | print(" Note: Bootstrap from .inc files DISABLED to avoid reloading stale data.") | ||
| 1692 | skip_inc_files = True | ||
| 1693 | else: | ||
| 1694 | skip_inc_files = False | ||
| 1695 | |||
| 1696 | skip_legacy_module_cache = args.skip_legacy_module_cache | ||
| 1697 | bootstrap_metadata_cache( | ||
| 1698 | output_dir, | ||
| 1699 | skip_inc_files=skip_inc_files, | ||
| 1700 | skip_legacy_module_cache=skip_legacy_module_cache, | ||
| 1701 | ) | ||
| 1702 | prune_metadata_cache() | ||
| 1703 | load_ls_remote_cache() | ||
| 1704 | load_vanity_url_cache() | ||
| 1705 | |||
| 1706 | if args.dry_run: | ||
| 1707 | print("\n--dry-run requested; skipping discovery/validation") | ||
| 1708 | return 0 | ||
| 1709 | |||
| 1710 | # --verify-cached command to check GOMODCACHE for stale commits | ||
| 1711 | if args.verify_cached: | ||
| 1712 | print("\n" + "=" * 70) | ||
| 1713 | print("VERIFYING CACHED COMMITS IN GOMODCACHE") | ||
| 1714 | print("=" * 70) | ||
| 1715 | return verify_gomodcache_commits(args.gomodcache or source_dir / ".gomodcache", args.verify_jobs) | ||
| 1716 | |||
| 1717 | # Check for --discovered-modules (bootstrap strategy) | ||
| 1718 | if args.discovered_modules: | ||
| 1719 | print("\n" + "=" * 70) | ||
| 1720 | print("PRE-DISCOVERED MODULES MODE") | ||
| 1721 | print("=" * 70) | ||
| 1722 | print("\nUsing pre-discovered module metadata from BitBake discovery build") | ||
| 1723 | print("Skipping discovery phase - generator will convert to BitBake format\n") | ||
| 1724 | |||
| 1725 | discovered_modules_path = Path(args.discovered_modules).resolve() | ||
| 1726 | modules = load_discovered_modules(discovered_modules_path) | ||
| 1727 | |||
| 1728 | if modules is None: | ||
| 1729 | print("\n❌ Failed to load discovered modules - falling back to discovery") | ||
| 1730 | modules = discover_modules(source_dir, args.gomodcache) | ||
| 1731 | else: | ||
| 1732 | print(f"\n✓ Successfully loaded {len(modules)} modules from discovery metadata") | ||
| 1733 | print(" Skipping 'go mod download' discovery phase") | ||
| 1734 | print(" Will use go.sum to resolve modules without Origin metadata") | ||
| 1735 | |||
| 1736 | # Auto-correction of dangling commits happens in Phase 2 during parallel verification | ||
| 1737 | else: | ||
| 1738 | # Normal discovery path | ||
| 1739 | modules = discover_modules(source_dir, args.gomodcache) | ||
| 1740 | if debug_limit is not None and len(modules) > debug_limit: | ||
| 1741 | print(f"\n⚙️ Debug limit active: truncating discovered modules to first {debug_limit} entries") | ||
| 1742 | modules = modules[:debug_limit] | ||
| 1743 | |||
| 1744 | # Set VERIFY_ENABLED based on whether verification is requested | ||
| 1745 | global VERIFY_ENABLED | ||
| 1746 | VERIFY_ENABLED = not args.skip_verify | ||
| 1747 | |||
| 1748 | # Parse go.mod replace directives for fork resolution | ||
| 1749 | # Example: github.com/containerd/containerd/v2 => github.com/k3s-io/containerd/v2 v2.1.4-k3s2 | ||
| 1750 | go_mod_replaces = parse_go_mod_replaces(source_dir / "go.mod") | ||
| 1751 | if go_mod_replaces: | ||
| 1752 | print(f"\n✓ Parsed {len(go_mod_replaces)} replace directives from go.mod") | ||
| 1753 | if VERBOSE_MODE: | ||
| 1754 | for old_path, (new_path, new_version) in sorted(go_mod_replaces.items())[:5]: | ||
| 1755 | print(f" {old_path} => {new_path} {new_version}") | ||
| 1756 | if len(go_mod_replaces) > 5: | ||
| 1757 | print(f" ... and {len(go_mod_replaces) - 5} more") | ||
| 1758 | |||
| 1759 | # Parse go.sum for fallback resolution | ||
| 1760 | discovered_keys = {(m['module_path'], m['version']) for m in modules} | ||
| 1761 | go_sum_modules_with_source, go_sum_indirect_only = parse_go_sum(source_dir / "go.sum") | ||
| 1762 | |||
| 1763 | FAILED_MODULE_PATHS.clear() | ||
| 1764 | FAILED_MODULE_ENTRIES.clear() | ||
| 1765 | SKIPPED_MODULES.clear() | ||
| 1766 | |||
| 1767 | print(f"\nFound {len(go_sum_indirect_only)} indirect-only dependencies (skipping - only need .mod files)") | ||
| 1768 | |||
| 1769 | if args.discovered_modules: | ||
| 1770 | # With discovered modules, only resolve what's in go.sum but missing from discovery | ||
| 1771 | # Do NOT call go list -m all - we already know what we need from the successful build | ||
| 1772 | missing_from_discovery = go_sum_modules_with_source - discovered_keys | ||
| 1773 | print(f"Discovered modules provided {len(discovered_keys)} modules with Origin metadata") | ||
| 1774 | print(f"go.sum has {len(go_sum_modules_with_source)} modules total") | ||
| 1775 | print(f"Resolving {len(missing_from_discovery)} modules without Origin metadata...") | ||
| 1776 | else: | ||
| 1777 | # Normal discovery - also use go list to find additional modules | ||
| 1778 | go_list_modules = collect_modules_via_go_list(source_dir) | ||
| 1779 | go_sum_modules_with_source |= go_list_modules | ||
| 1780 | missing_from_discovery = go_sum_modules_with_source - discovered_keys | ||
| 1781 | print(f"Resolving {len(missing_from_discovery)} additional modules discovered from go.sum/go list...") | ||
| 1782 | |||
| 1783 | modules_by_path: Dict[str, List[Dict]] = {} | ||
| 1784 | for m in modules: | ||
| 1785 | modules_by_path.setdefault(m['module_path'], []).append(m) | ||
| 1786 | |||
| 1787 | limit_reached = False | ||
| 1788 | for module_path, version in sorted(go_sum_modules_with_source): | ||
| 1789 | if debug_limit is not None and len(modules) >= debug_limit: | ||
| 1790 | limit_reached = True | ||
| 1791 | break | ||
| 1792 | if module_path in FAILED_MODULE_PATHS: | ||
| 1793 | print(f" ⚠️ Skipping {module_path}@{version} (previous resolution failure)") | ||
| 1794 | continue | ||
| 1795 | |||
| 1796 | if (module_path, version) in discovered_keys: | ||
| 1797 | continue | ||
| 1798 | |||
| 1799 | # Apply replace directives for k3s forks | ||
| 1800 | # If module path is replaced in go.mod, try to resolve using the replacement path | ||
| 1801 | resolved_path = module_path | ||
| 1802 | resolved_version = version | ||
| 1803 | if module_path in go_mod_replaces: | ||
| 1804 | new_path, new_version = go_mod_replaces[module_path] | ||
| 1805 | if new_version: # Replace has explicit version | ||
| 1806 | resolved_path = new_path | ||
| 1807 | resolved_version = new_version | ||
| 1808 | if VERBOSE_MODE: | ||
| 1809 | print(f" [replace] {module_path}@{version} => {resolved_path}@{resolved_version}") | ||
| 1810 | # Check if we already have the replacement module | ||
| 1811 | if (resolved_path, resolved_version) in discovered_keys: | ||
| 1812 | # Copy the existing module entry with original path | ||
| 1813 | for m in modules: | ||
| 1814 | if m['module_path'] == resolved_path and m['version'] == resolved_version: | ||
| 1815 | replacement_entry = m.copy() | ||
| 1816 | replacement_entry['module_path'] = module_path | ||
| 1817 | replacement_entry['version'] = version | ||
| 1818 | modules.append(replacement_entry) | ||
| 1819 | discovered_keys.add((module_path, version)) | ||
| 1820 | modules_by_path.setdefault(module_path, []).append(replacement_entry) | ||
| 1821 | print(f" ✓ {module_path}@{version} (using replace directive -> {resolved_path}@{resolved_version})") | ||
| 1822 | continue | ||
| 1823 | |||
| 1824 | fallback = resolve_module_metadata(resolved_path, resolved_version) | ||
| 1825 | if fallback: | ||
| 1826 | # If we used a replace directive, update the entry to use the original path | ||
| 1827 | if resolved_path != module_path or resolved_version != version: | ||
| 1828 | fallback['module_path'] = module_path | ||
| 1829 | fallback['version'] = version | ||
| 1830 | print(f" ✓ {module_path}@{version} (resolved via replace -> {resolved_path}@{resolved_version})") | ||
| 1831 | modules.append(fallback) | ||
| 1832 | discovered_keys.add((module_path, version)) | ||
| 1833 | modules_by_path.setdefault(module_path, []).append(fallback) | ||
| 1834 | if debug_limit is not None and len(modules) >= debug_limit: | ||
| 1835 | limit_reached = True | ||
| 1836 | break | ||
| 1837 | else: | ||
| 1838 | # Handle monorepo submodule replacements (e.g., github.com/k3s-io/etcd/server/v3) | ||
| 1839 | # When a replacement points to a submodule path that doesn't have its own VCS entry, | ||
| 1840 | # try to find the base repository and use it with a subdir. | ||
| 1841 | # Example: github.com/k3s-io/etcd/server/v3 -> base: github.com/k3s-io/etcd, subdir: server/v3 | ||
| 1842 | monorepo_handled = False | ||
| 1843 | if resolved_path != module_path and '/' in resolved_path: | ||
| 1844 | # Check if this looks like a submodule path (has version suffix like /v2, /v3, etc.) | ||
| 1845 | parts = resolved_path.rsplit('/', 1) | ||
| 1846 | if len(parts) == 2: | ||
| 1847 | potential_base = parts[0] | ||
| 1848 | potential_subdir = parts[1] | ||
| 1849 | |||
| 1850 | # Look for version-suffixed paths (e.g., /v2, /v3, /server/v3, /client/v3) | ||
| 1851 | # Try progressively shorter base paths | ||
| 1852 | base_candidates = [] | ||
| 1853 | path_segments = resolved_path.split('/') | ||
| 1854 | |||
| 1855 | # For github.com/k3s-io/etcd/server/v3: | ||
| 1856 | # Try: github.com/k3s-io/etcd/server, github.com/k3s-io/etcd | ||
| 1857 | for i in range(len(path_segments) - 1, 2, -1): # At least keep domain + org | ||
| 1858 | candidate_base = '/'.join(path_segments[:i]) | ||
| 1859 | candidate_subdir = '/'.join(path_segments[i:]) | ||
| 1860 | base_candidates.append((candidate_base, candidate_subdir)) | ||
| 1861 | |||
| 1862 | # Try each candidate base path | ||
| 1863 | for base_path, subdir in base_candidates: | ||
| 1864 | if base_path in modules_by_path: | ||
| 1865 | # Found the base repository! Create a submodule entry | ||
| 1866 | base_module = modules_by_path[base_path][0] | ||
| 1867 | vcs_url = base_module['vcs_url'] | ||
| 1868 | |||
| 1869 | # Use the replacement version for the tag | ||
| 1870 | tag = resolved_version.split('+')[0] | ||
| 1871 | commit = git_ls_remote(vcs_url, f"refs/tags/{tag}") or git_ls_remote(vcs_url, tag) | ||
| 1872 | |||
| 1873 | if commit: | ||
| 1874 | timestamp = derive_timestamp_from_version(resolved_version) | ||
| 1875 | fallback = { | ||
| 1876 | "module_path": module_path, # Original path (go.etcd.io/etcd/server/v3) | ||
| 1877 | "version": version, | ||
| 1878 | "vcs_url": vcs_url, | ||
| 1879 | "vcs_hash": commit, | ||
| 1880 | "vcs_ref": f"refs/tags/{tag}" if git_ls_remote(vcs_url, f"refs/tags/{tag}") else tag, | ||
| 1881 | "timestamp": timestamp, | ||
| 1882 | "subdir": subdir, # e.g., "server/v3" | ||
| 1883 | } | ||
| 1884 | modules.append(fallback) | ||
| 1885 | discovered_keys.add((module_path, version)) | ||
| 1886 | modules_by_path.setdefault(module_path, []).append(fallback) | ||
| 1887 | print(f" ✓ {module_path}@{version} (monorepo submodule: base={base_path}, subdir={subdir})") | ||
| 1888 | monorepo_handled = True | ||
| 1889 | if debug_limit is not None and len(modules) >= debug_limit: | ||
| 1890 | limit_reached = True | ||
| 1891 | break | ||
| 1892 | |||
| 1893 | if monorepo_handled: | ||
| 1894 | if limit_reached: | ||
| 1895 | break | ||
| 1896 | continue | ||
| 1897 | |||
| 1898 | if module_path in modules_by_path: | ||
| 1899 | reference_module = modules_by_path[module_path][0] | ||
| 1900 | vcs_url = reference_module['vcs_url'] | ||
| 1901 | tag = version.split('+')[0] | ||
| 1902 | commit = None | ||
| 1903 | pseudo_info = parse_pseudo_version_tag(tag) | ||
| 1904 | |||
| 1905 | if pseudo_info: | ||
| 1906 | timestamp_str, short_commit = pseudo_info | ||
| 1907 | commit = resolve_pseudo_version_commit( | ||
| 1908 | vcs_url, | ||
| 1909 | timestamp_str, | ||
| 1910 | short_commit, | ||
| 1911 | clone_cache_dir=CLONE_CACHE_DIR | ||
| 1912 | ) | ||
| 1913 | if commit: | ||
| 1914 | print(f" ✓ {module_path}@{version} (resolved pseudo-version via repository clone)") | ||
| 1915 | else: | ||
| 1916 | commit = git_ls_remote(vcs_url, f"refs/tags/{tag}") or git_ls_remote(vcs_url, tag) | ||
| 1917 | if commit: | ||
| 1918 | print(f" ✓ {module_path}@{version} (resolved using VCS URL from sibling version)") | ||
| 1919 | |||
| 1920 | if commit: | ||
| 1921 | timestamp = derive_timestamp_from_version(version) | ||
| 1922 | subdir = reference_module.get('subdir', '') | ||
| 1923 | update_metadata_cache(module_path, version, vcs_url, commit, timestamp, subdir, '', dirty=True) | ||
| 1924 | fallback = { | ||
| 1925 | "module_path": module_path, | ||
| 1926 | "version": version, | ||
| 1927 | "vcs_url": vcs_url, | ||
| 1928 | "vcs_hash": commit, | ||
| 1929 | "vcs_ref": "", | ||
| 1930 | "timestamp": timestamp, | ||
| 1931 | "subdir": subdir, | ||
| 1932 | } | ||
| 1933 | modules.append(fallback) | ||
| 1934 | discovered_keys.add((module_path, version)) | ||
| 1935 | modules_by_path[module_path].append(fallback) | ||
| 1936 | if debug_limit is not None and len(modules) >= debug_limit: | ||
| 1937 | limit_reached = True | ||
| 1938 | break | ||
| 1939 | continue | ||
| 1940 | |||
| 1941 | # Skip monorepo root modules that fail resolution when we have submodules | ||
| 1942 | # Example: go.etcd.io/etcd/v3 (root) when we have github.com/k3s-io/etcd/server/v3, etc. | ||
| 1943 | # Handles both direct prefix match and forked monorepos (via VCS URL comparison) | ||
| 1944 | # These are never actually imported - they just exist in go.sum due to the monorepo go.mod | ||
| 1945 | is_monorepo_root = False | ||
| 1946 | |||
| 1947 | # Check 1: Direct prefix match (same repository, e.g., go.etcd.io/etcd/v3 → go.etcd.io/etcd/server/v3) | ||
| 1948 | if any(existing_path.startswith(module_path + '/') for existing_path in modules_by_path.keys()): | ||
| 1949 | is_monorepo_root = True | ||
| 1950 | |||
| 1951 | # Check 2: Forked monorepo (e.g., go.etcd.io/etcd/v3 → github.com/k3s-io/etcd/server/v3) | ||
| 1952 | # If we failed to derive a repository, try checking if any existing module's last path segment | ||
| 1953 | # matches our module's last segment (e.g., both end in /v3) | ||
| 1954 | if not is_monorepo_root and module_path.count('/') >= 2: | ||
| 1955 | module_segments = module_path.split('/') | ||
| 1956 | # For go.etcd.io/etcd/v3: domain=go.etcd.io, repo=etcd, suffix=v3 | ||
| 1957 | # Check if we have modules like */etcd/*/v3 (forked versions) | ||
| 1958 | for existing_path in modules_by_path.keys(): | ||
| 1959 | if '/' in existing_path: | ||
| 1960 | # Check if the existing path is a submodule of a similar repository | ||
| 1961 | # Example: github.com/k3s-io/etcd/server/v3 shares repository 'etcd' with go.etcd.io/etcd/v3 | ||
| 1962 | if '/etcd/' in existing_path and module_path.endswith('/v3'): | ||
| 1963 | is_monorepo_root = True | ||
| 1964 | break | ||
| 1965 | |||
| 1966 | if is_monorepo_root: | ||
| 1967 | print(f" ⊙ {module_path}@{version} (monorepo root - submodules already resolved)") | ||
| 1968 | continue | ||
| 1969 | |||
| 1970 | if module_path in modules_by_path: | ||
| 1971 | FAILED_MODULE_PATHS.add(module_path) | ||
| 1972 | FAILED_MODULE_ENTRIES.add((module_path, version)) | ||
| 1973 | print(f" ⚠️ Skipping {module_path}@{version} (indirect-only dependency)") | ||
| 1974 | if limit_reached: | ||
| 1975 | break | ||
| 1976 | |||
| 1977 | if limit_reached: | ||
| 1978 | print(f"\n⚠️ Debug limit {debug_limit} reached; skipping remaining modules discovered from go.sum/go list.") | ||
| 1979 | |||
| 1980 | # Resolve /go.mod-only (indirect) dependencies using sibling versions | ||
| 1981 | # Even though these are "indirect", Go may still need them during compilation | ||
| 1982 | # (e.g., due to complex replace directives or transitive dependencies). | ||
| 1983 | # If we have a sibling version with Origin metadata, resolve the indirect version too. | ||
| 1984 | print(f"\n⚙️ Resolving /go.mod-only dependencies from sibling versions...") | ||
| 1985 | gomod_only_resolved = 0 | ||
| 1986 | gomod_only_skipped = 0 | ||
| 1987 | for module_path, version in sorted(go_sum_indirect_only): | ||
| 1988 | try: | ||
| 1989 | if (module_path, version) in discovered_keys: | ||
| 1990 | continue # Already have this version | ||
| 1991 | |||
| 1992 | if module_path in modules_by_path: | ||
| 1993 | # We have a sibling version - try to resolve this one using the sibling's VCS URL | ||
| 1994 | reference_module = modules_by_path[module_path][0] | ||
| 1995 | vcs_url = reference_module['vcs_url'] | ||
| 1996 | tag = version.split('+')[0] | ||
| 1997 | commit = None | ||
| 1998 | pseudo_info = parse_pseudo_version_tag(tag) | ||
| 1999 | |||
| 2000 | if pseudo_info: | ||
| 2001 | timestamp_str, short_commit = pseudo_info | ||
| 2002 | try: | ||
| 2003 | commit = resolve_pseudo_version_commit( | ||
| 2004 | vcs_url, | ||
| 2005 | timestamp_str, | ||
| 2006 | short_commit, | ||
| 2007 | clone_cache_dir=CLONE_CACHE_DIR | ||
| 2008 | ) | ||
| 2009 | except Exception as e: | ||
| 2010 | print(f" ❌ Error resolving pseudo-version {module_path}@{version} (timestamp={timestamp_str}, commit={short_commit}): {e}") | ||
| 2011 | gomod_only_skipped += 1 | ||
| 2012 | continue | ||
| 2013 | else: | ||
| 2014 | # For semantic version tags, try to find the tag reference | ||
| 2015 | # This enables to detect orphaned tags for sibling-resolved modules | ||
| 2016 | vcs_ref = "" | ||
| 2017 | commit = git_ls_remote(vcs_url, f"refs/tags/{tag}") | ||
| 2018 | if commit: | ||
| 2019 | vcs_ref = f"refs/tags/{tag}" | ||
| 2020 | else: | ||
| 2021 | commit = git_ls_remote(vcs_url, tag) | ||
| 2022 | |||
| 2023 | if commit: | ||
| 2024 | timestamp = derive_timestamp_from_version(version) | ||
| 2025 | subdir = reference_module.get('subdir', '') | ||
| 2026 | update_metadata_cache(module_path, version, vcs_url, commit, timestamp, subdir, '', dirty=True) | ||
| 2027 | fallback = { | ||
| 2028 | "module_path": module_path, | ||
| 2029 | "version": version, | ||
| 2030 | "vcs_url": vcs_url, | ||
| 2031 | "vcs_hash": commit, | ||
| 2032 | "vcs_ref": vcs_ref, | ||
| 2033 | "timestamp": timestamp, | ||
| 2034 | "subdir": subdir, | ||
| 2035 | } | ||
| 2036 | modules.append(fallback) | ||
| 2037 | discovered_keys.add((module_path, version)) | ||
| 2038 | modules_by_path[module_path].append(fallback) | ||
| 2039 | gomod_only_resolved += 1 | ||
| 2040 | print(f" ✓ {module_path}@{version} (/go.mod-only resolved using sibling version)") | ||
| 2041 | else: | ||
| 2042 | gomod_only_skipped += 1 | ||
| 2043 | else: | ||
| 2044 | gomod_only_skipped += 1 | ||
| 2045 | except Exception as e: | ||
| 2046 | print(f" ❌ Error resolving {module_path}@{version}: {e}") | ||
| 2047 | gomod_only_skipped += 1 | ||
| 2048 | |||
| 2049 | if gomod_only_resolved > 0: | ||
| 2050 | print(f"✓ Resolved {gomod_only_resolved} /go.mod-only dependencies using sibling versions") | ||
| 2051 | if gomod_only_skipped > 0: | ||
| 2052 | print(f" ⚠️ Skipped {gomod_only_skipped} /go.mod-only dependencies (no sibling version available)") | ||
| 2053 | |||
| 2054 | if FAILED_MODULE_ENTRIES: | ||
| 2055 | print("\n❌ Failed to resolve metadata for the following modules:") | ||
| 2056 | for mod, ver in sorted(FAILED_MODULE_ENTRIES): | ||
| 2057 | print(f" - {mod}@{ver}") | ||
| 2058 | print("Aborting to avoid emitting invalid SRCREVs.") | ||
| 2059 | return 1 | ||
| 2060 | |||
| 2061 | if not modules: | ||
| 2062 | print("❌ No modules discovered") | ||
| 2063 | return 1 | ||
| 2064 | |||
| 2065 | success = generate_recipe( | ||
| 2066 | modules, | ||
| 2067 | source_dir, | ||
| 2068 | output_dir, | ||
| 2069 | args.git_repo or "unknown", | ||
| 2070 | args.git_ref or "unknown", | ||
| 2071 | validate_only=args.validate, | ||
| 2072 | debug_limit=debug_limit, | ||
| 2073 | skip_verify=args.skip_verify, | ||
| 2074 | verify_jobs=args.verify_jobs, | ||
| 2075 | ) | ||
| 2076 | |||
| 2077 | if success: | ||
| 2078 | if args.validate: | ||
| 2079 | print("\n" + "=" * 70) | ||
| 2080 | print("✅ SUCCESS - Validation complete") | ||
| 2081 | print("=" * 70) | ||
| 2082 | else: | ||
| 2083 | print("\n" + "=" * 70) | ||
| 2084 | print("✅ SUCCESS - Recipe generation complete") | ||
| 2085 | print("=" * 70) | ||
| 2086 | |||
| 2087 | # Write corrected modules back to JSON for future runs | ||
| 2088 | if args.discovered_modules and VERIFY_CORRECTIONS_APPLIED: | ||
| 2089 | corrected_json = args.discovered_modules.replace('.json', '-corrected.json') | ||
| 2090 | try: | ||
| 2091 | with open(corrected_json, 'w') as f: | ||
| 2092 | json.dump(modules, f, indent=2) | ||
| 2093 | print(f"\n✓ Wrote corrected module metadata to: {corrected_json}") | ||
| 2094 | print(f" Use this file for future runs to avoid re-detecting orphaned commits") | ||
| 2095 | except Exception as e: | ||
| 2096 | print(f"\n⚠️ Could not write corrected JSON: {e}") | ||
| 2097 | |||
| 2098 | exit_code = 0 | ||
| 2099 | else: | ||
| 2100 | print("\n❌ FAILED - Recipe generation failed") | ||
| 2101 | exit_code = 1 | ||
| 2102 | |||
| 2103 | if SKIPPED_MODULES: | ||
| 2104 | print("\n⚠️ Skipped modules (no repository metadata)") | ||
| 2105 | for (module_path, version), reason in sorted(SKIPPED_MODULES.items()): | ||
| 2106 | print(f" - {module_path}@{version} [{reason}]") | ||
| 2107 | print(" Use --set-repo / --inject-commit to add missing metadata before building.") | ||
| 2108 | |||
| 2109 | return exit_code | ||
| 2110 | |||
| 2111 | |||
| 2112 | def parse_go_mod_replaces(go_mod_path: Path) -> Dict[str, Tuple[str, str]]: | ||
| 2113 | """ | ||
| 2114 | Parse replace directives from go.mod file. | ||
| 2115 | |||
| 2116 | Returns: | ||
| 2117 | Dict mapping old_path to (new_path, new_version) | ||
| 2118 | Example: {"github.com/containerd/containerd/v2": ("github.com/k3s-io/containerd/v2", "v2.1.4-k3s2")} | ||
| 2119 | """ | ||
| 2120 | replaces = {} | ||
| 2121 | if not go_mod_path.exists(): | ||
| 2122 | return replaces | ||
| 2123 | |||
| 2124 | try: | ||
| 2125 | content = go_mod_path.read_text() | ||
| 2126 | # Match: old_path => new_path version | ||
| 2127 | # Example: github.com/containerd/containerd/v2 => github.com/k3s-io/containerd/v2 v2.1.4-k3s2 | ||
| 2128 | for line in content.splitlines(): | ||
| 2129 | line = line.strip() | ||
| 2130 | if not line.startswith('replace ') and '=>' not in line: | ||
| 2131 | continue | ||
| 2132 | |||
| 2133 | # Remove 'replace ' prefix if present | ||
| 2134 | if line.startswith('replace '): | ||
| 2135 | line = line[8:].strip() | ||
| 2136 | |||
| 2137 | parts = line.split('=>') | ||
| 2138 | if len(parts) != 2: | ||
| 2139 | continue | ||
| 2140 | |||
| 2141 | left = parts[0].strip().split() | ||
| 2142 | right = parts[1].strip().split() | ||
| 2143 | |||
| 2144 | if len(left) == 0 or len(right) == 0: | ||
| 2145 | continue | ||
| 2146 | |||
| 2147 | old_path = left[0] | ||
| 2148 | new_path = right[0] | ||
| 2149 | new_version = right[1] if len(right) > 1 else "" | ||
| 2150 | |||
| 2151 | replaces[old_path] = (new_path, new_version) | ||
| 2152 | except Exception as e: | ||
| 2153 | print(f"⚠️ Failed to parse go.mod replaces: {e}", file=sys.stderr) | ||
| 2154 | |||
| 2155 | return replaces | ||
| 2156 | |||
| 2157 | |||
| 2158 | def parse_pseudo_version_tag(tag: str) -> Optional[Tuple[str, str]]: | ||
| 2159 | """Return (timestamp, short_commit) for Go pseudo-versions.""" | ||
| 2160 | tag = tag.split('+', 1)[0] | ||
| 2161 | parts = tag.split('-') | ||
| 2162 | if len(parts) < 3: | ||
| 2163 | return None | ||
| 2164 | |||
| 2165 | short_commit = parts[-1] | ||
| 2166 | timestamp_part = parts[-2] | ||
| 2167 | timestamp_str = timestamp_part.split('.')[-1] | ||
| 2168 | |||
| 2169 | if len(timestamp_str) != 14 or not timestamp_str.isdigit(): | ||
| 2170 | return None | ||
| 2171 | |||
| 2172 | if not re.fullmatch(r'[0-9a-fA-F]{6,40}', short_commit): | ||
| 2173 | return None | ||
| 2174 | |||
| 2175 | return timestamp_str, short_commit | ||
| 2176 | |||
| 2177 | |||
| 2178 | def _cache_key(url: str, ref: str) -> str: | ||
| 2179 | return f"{url}|||{ref}" | ||
| 2180 | |||
| 2181 | |||
| 2182 | def load_ls_remote_cache() -> None: | ||
| 2183 | if not LS_REMOTE_CACHE_PATH.exists(): | ||
| 2184 | return | ||
| 2185 | try: | ||
| 2186 | data = json.loads(LS_REMOTE_CACHE_PATH.read_text()) | ||
| 2187 | except Exception: | ||
| 2188 | return | ||
| 2189 | for key, value in data.items(): | ||
| 2190 | try: | ||
| 2191 | url, ref = key.split("|||", 1) | ||
| 2192 | except ValueError: | ||
| 2193 | continue | ||
| 2194 | LS_REMOTE_CACHE[(url, ref)] = value | ||
| 2195 | |||
| 2196 | |||
| 2197 | def save_ls_remote_cache() -> None: | ||
| 2198 | if not LS_REMOTE_CACHE_DIRTY: | ||
| 2199 | return | ||
| 2200 | try: | ||
| 2201 | payload = { | ||
| 2202 | _cache_key(url, ref): value | ||
| 2203 | for (url, ref), value in LS_REMOTE_CACHE.items() | ||
| 2204 | } | ||
| 2205 | LS_REMOTE_CACHE_PATH.write_text(json.dumps(payload, indent=2, sort_keys=True)) | ||
| 2206 | except Exception: | ||
| 2207 | pass | ||
| 2208 | |||
| 2209 | |||
| 2210 | def git_ls_remote(url: str, ref: str, *, debug: bool = False) -> Optional[str]: | ||
| 2211 | """ | ||
| 2212 | Query git repository for commit hash of a ref. | ||
| 2213 | Uses disk-based cache and local clones to minimize network calls. | ||
| 2214 | |||
| 2215 | Args: | ||
| 2216 | url: Git repository URL | ||
| 2217 | ref: Git ref (tag, branch, commit, etc.) | ||
| 2218 | debug: If True, print whether result came from cache or network | ||
| 2219 | |||
| 2220 | Returns: | ||
| 2221 | Commit hash or None if not found | ||
| 2222 | """ | ||
| 2223 | global LS_REMOTE_CACHE_DIRTY | ||
| 2224 | key = (url, ref) | ||
| 2225 | |||
| 2226 | # Check in-memory cache first | ||
| 2227 | if key in LS_REMOTE_CACHE: | ||
| 2228 | if debug or VERBOSE_MODE: | ||
| 2229 | result = LS_REMOTE_CACHE[key] | ||
| 2230 | status = "cached" if result else "cached (not found)" | ||
| 2231 | print(f" [ls-remote {status}] {url} {ref}", file=sys.stderr) | ||
| 2232 | return LS_REMOTE_CACHE[key] | ||
| 2233 | |||
| 2234 | # Try local repository clone if available | ||
| 2235 | repo_hash = hashlib.sha256(url.encode()).hexdigest()[:16] | ||
| 2236 | local_repo = CLONE_CACHE_DIR / f"repo_{repo_hash}" | ||
| 2237 | |||
| 2238 | if local_repo.exists() and (local_repo / 'HEAD').exists(): | ||
| 2239 | try: | ||
| 2240 | # Query local repository instead of network | ||
| 2241 | result = subprocess.run( | ||
| 2242 | ["git", "show-ref", "--hash", ref], | ||
| 2243 | cwd=local_repo, | ||
| 2244 | capture_output=True, | ||
| 2245 | text=True, | ||
| 2246 | timeout=10, | ||
| 2247 | ) | ||
| 2248 | if result.returncode == 0 and result.stdout.strip(): | ||
| 2249 | commit_hash = result.stdout.strip().split()[0] | ||
| 2250 | LS_REMOTE_CACHE[key] = commit_hash | ||
| 2251 | LS_REMOTE_CACHE_DIRTY = True | ||
| 2252 | if debug or VERBOSE_MODE: | ||
| 2253 | print(f" [ls-remote local] {url} {ref} -> {commit_hash[:12]}", file=sys.stderr) | ||
| 2254 | return commit_hash | ||
| 2255 | except (subprocess.TimeoutExpired, subprocess.CalledProcessError, Exception): | ||
| 2256 | # Fall through to network query | ||
| 2257 | pass | ||
| 2258 | |||
| 2259 | if debug or VERBOSE_MODE: | ||
| 2260 | print(f" [ls-remote network] {url} {ref}", file=sys.stderr) | ||
| 2261 | |||
| 2262 | try: | ||
| 2263 | env = os.environ.copy() | ||
| 2264 | env.setdefault("GIT_TERMINAL_PROMPT", "0") | ||
| 2265 | env.setdefault("GIT_ASKPASS", "true") | ||
| 2266 | |||
| 2267 | # FIX: For tags, also query the dereferenced commit (^{}) to handle annotated tags | ||
| 2268 | # Annotated tags have a tag object hash that differs from the commit hash. | ||
| 2269 | # We need the actual commit hash for git archive/checkout operations. | ||
| 2270 | refs_to_query = [ref] | ||
| 2271 | if ref.startswith("refs/tags/"): | ||
| 2272 | refs_to_query.append(f"{ref}^{{}}") # Add dereferenced query | ||
| 2273 | |||
| 2274 | result = subprocess.run( | ||
| 2275 | ["git", "ls-remote", url] + refs_to_query, | ||
| 2276 | capture_output=True, | ||
| 2277 | text=True, | ||
| 2278 | check=True, | ||
| 2279 | env=env, | ||
| 2280 | timeout=GIT_CMD_TIMEOUT, | ||
| 2281 | ) | ||
| 2282 | |||
| 2283 | # Parse results - prefer dereferenced commit (^{}) over annotated tag object | ||
| 2284 | tag_object_hash = None | ||
| 2285 | dereferenced_hash = None | ||
| 2286 | |||
| 2287 | for line in result.stdout.strip().splitlines(): | ||
| 2288 | if not line: | ||
| 2289 | continue | ||
| 2290 | parts = line.split() | ||
| 2291 | if len(parts) >= 2: | ||
| 2292 | hash_val, ref_name = parts[0], parts[1] | ||
| 2293 | if ref_name.endswith("^{}"): | ||
| 2294 | # This is the dereferenced commit - preferred! | ||
| 2295 | dereferenced_hash = hash_val | ||
| 2296 | else: | ||
| 2297 | # This is either a lightweight tag or annotated tag object | ||
| 2298 | tag_object_hash = hash_val | ||
| 2299 | |||
| 2300 | # Prefer dereferenced commit, fall back to tag object (for lightweight tags) | ||
| 2301 | commit_hash = dereferenced_hash or tag_object_hash | ||
| 2302 | if commit_hash: | ||
| 2303 | LS_REMOTE_CACHE[key] = commit_hash | ||
| 2304 | LS_REMOTE_CACHE_DIRTY = True | ||
| 2305 | return commit_hash | ||
| 2306 | |||
| 2307 | except subprocess.TimeoutExpired: | ||
| 2308 | print(f" ⚠️ git ls-remote timeout ({GIT_CMD_TIMEOUT}s) for {url} {ref}") | ||
| 2309 | LS_REMOTE_CACHE[key] = None | ||
| 2310 | LS_REMOTE_CACHE_DIRTY = True | ||
| 2311 | return None | ||
| 2312 | except subprocess.CalledProcessError: | ||
| 2313 | LS_REMOTE_CACHE[key] = None | ||
| 2314 | LS_REMOTE_CACHE_DIRTY = True | ||
| 2315 | return None | ||
| 2316 | return None | ||
| 2317 | |||
| 2318 | |||
| 2319 | def load_vanity_url_cache() -> None: | ||
| 2320 | """Load vanity URL resolution cache from disk.""" | ||
| 2321 | if not VANITY_URL_CACHE_PATH.exists(): | ||
| 2322 | return | ||
| 2323 | try: | ||
| 2324 | data = json.loads(VANITY_URL_CACHE_PATH.read_text()) | ||
| 2325 | VANITY_URL_CACHE.update(data) | ||
| 2326 | except Exception: | ||
| 2327 | pass | ||
| 2328 | |||
| 2329 | |||
| 2330 | def save_vanity_url_cache() -> None: | ||
| 2331 | """Save vanity URL resolution cache to disk.""" | ||
| 2332 | if not VANITY_URL_CACHE_DIRTY: | ||
| 2333 | return | ||
| 2334 | try: | ||
| 2335 | VANITY_URL_CACHE_PATH.write_text(json.dumps(VANITY_URL_CACHE, indent=2, sort_keys=True)) | ||
| 2336 | except Exception: | ||
| 2337 | pass | ||
| 2338 | |||
| 2339 | |||
| 2340 | def load_verify_commit_cache() -> None: | ||
| 2341 | """ | ||
| 2342 | Load verification cache with timestamp support for aging detection. | ||
| 2343 | |||
| 2344 | Cache format v2: | ||
| 2345 | { | ||
| 2346 | "repo|||commit": { | ||
| 2347 | "verified": true, | ||
| 2348 | "first_verified": "2025-01-15T10:30:00Z", # When first verified | ||
| 2349 | "last_checked": "2025-02-10T14:20:00Z", # When last re-verified | ||
| 2350 | "fetch_method": "fetch" # "fetch", "ref", or "cached" | ||
| 2351 | } | ||
| 2352 | } | ||
| 2353 | """ | ||
| 2354 | global VERIFY_COMMIT_CACHE_DIRTY, VERIFY_COMMIT_CACHE_V2 | ||
| 2355 | if not VERIFY_COMMIT_CACHE_PATH.exists(): | ||
| 2356 | return | ||
| 2357 | try: | ||
| 2358 | data = json.loads(VERIFY_COMMIT_CACHE_PATH.read_text()) | ||
| 2359 | except Exception: | ||
| 2360 | return | ||
| 2361 | |||
| 2362 | if isinstance(data, dict): | ||
| 2363 | # Detect format: v1 (bool values) vs v2 (dict values) | ||
| 2364 | sample_value = next(iter(data.values())) if data else None | ||
| 2365 | |||
| 2366 | if isinstance(sample_value, bool): | ||
| 2367 | # Legacy format: convert to v2 | ||
| 2368 | from datetime import datetime, timezone | ||
| 2369 | now = datetime.now(timezone.utc).isoformat() | ||
| 2370 | for k, v in data.items(): | ||
| 2371 | if v: # Only migrate verified=True entries | ||
| 2372 | VERIFY_COMMIT_CACHE_V2[k] = { | ||
| 2373 | "verified": True, | ||
| 2374 | "first_verified": now, | ||
| 2375 | "last_checked": now, | ||
| 2376 | "fetch_method": "cached" # Unknown how it was verified | ||
| 2377 | } | ||
| 2378 | VERIFY_COMMIT_CACHE_DIRTY = True # Mark dirty to save in new format | ||
| 2379 | elif isinstance(sample_value, dict): | ||
| 2380 | # V2 format | ||
| 2381 | VERIFY_COMMIT_CACHE_V2.update(data) | ||
| 2382 | |||
| 2383 | VERIFY_COMMIT_CACHE_DIRTY = False | ||
| 2384 | |||
| 2385 | |||
| 2386 | def save_verify_commit_cache(force: bool = False) -> None: | ||
| 2387 | """Save verification cache in v2 format with timestamps. | ||
| 2388 | |||
| 2389 | Args: | ||
| 2390 | force: If True, save even if not dirty (for incremental saves during long runs) | ||
| 2391 | """ | ||
| 2392 | global VERIFY_COMMIT_CACHE_DIRTY | ||
| 2393 | |||
| 2394 | if not force and not VERIFY_COMMIT_CACHE_DIRTY: | ||
| 2395 | return | ||
| 2396 | try: | ||
| 2397 | VERIFY_COMMIT_CACHE_PATH.write_text(json.dumps(VERIFY_COMMIT_CACHE_V2, indent=2, sort_keys=True)) | ||
| 2398 | VERIFY_COMMIT_CACHE_DIRTY = False | ||
| 2399 | except Exception as e: | ||
| 2400 | print(f"⚠️ Failed to save verification cache: {e}") | ||
| 2401 | pass | ||
| 2402 | |||
| 2403 | |||
| 2404 | def _load_overrides_from_file(path: Path, target_dict: Dict[Tuple[str, Optional[str]], str]) -> None: | ||
| 2405 | """ | ||
| 2406 | Load module->repo overrides from a JSON file into the target dictionary. | ||
| 2407 | |||
| 2408 | File format: | ||
| 2409 | { | ||
| 2410 | "module/path": "https://github.com/org/repo", | ||
| 2411 | "module/path@v1.2.3": "https://github.com/org/repo" | ||
| 2412 | } | ||
| 2413 | |||
| 2414 | The @version suffix is optional. Use it to override only a specific version. | ||
| 2415 | """ | ||
| 2416 | if not path.exists(): | ||
| 2417 | return | ||
| 2418 | try: | ||
| 2419 | data = json.loads(path.read_text()) | ||
| 2420 | except Exception: | ||
| 2421 | return | ||
| 2422 | if not isinstance(data, dict): | ||
| 2423 | return | ||
| 2424 | |||
| 2425 | for raw_key, repo_url in data.items(): | ||
| 2426 | if not isinstance(repo_url, str): | ||
| 2427 | continue | ||
| 2428 | module_path = str(raw_key) | ||
| 2429 | version: Optional[str] = None | ||
| 2430 | |||
| 2431 | # Support both "module|||version" (legacy) and "module@version" (new) formats | ||
| 2432 | if "|||" in module_path: | ||
| 2433 | module_part, version_part = module_path.split("|||", 1) | ||
| 2434 | version = None if version_part == "*" else version_part | ||
| 2435 | module_path = module_part | ||
| 2436 | elif "@" in module_path and not module_path.startswith("@"): | ||
| 2437 | # Handle module@version format (but not @org/pkg scoped packages) | ||
| 2438 | at_pos = module_path.rfind("@") | ||
| 2439 | version = module_path[at_pos + 1:] | ||
| 2440 | module_path = module_path[:at_pos] | ||
| 2441 | |||
| 2442 | try: | ||
| 2443 | key = _normalise_override_key(module_path, version) | ||
| 2444 | except ValueError: | ||
| 2445 | continue | ||
| 2446 | target_dict[key] = repo_url | ||
| 2447 | |||
| 2448 | |||
| 2449 | def load_manual_overrides() -> None: | ||
| 2450 | """Load git-tracked manual overrides from manual-overrides.json.""" | ||
| 2451 | global MANUAL_OVERRIDES | ||
| 2452 | MANUAL_OVERRIDES.clear() | ||
| 2453 | _load_overrides_from_file(MANUAL_OVERRIDES_PATH, MANUAL_OVERRIDES) | ||
| 2454 | if MANUAL_OVERRIDES: | ||
| 2455 | print(f" Loaded {len(MANUAL_OVERRIDES)} manual repository override(s)") | ||
| 2456 | |||
| 2457 | |||
| 2458 | def load_repo_overrides() -> None: | ||
| 2459 | """Load dynamic overrides from repo-overrides.json (created via --set-repo).""" | ||
| 2460 | global MODULE_REPO_OVERRIDES_DIRTY | ||
| 2461 | MODULE_REPO_OVERRIDES.clear() | ||
| 2462 | _load_overrides_from_file(MODULE_REPO_OVERRIDES_PATH, MODULE_REPO_OVERRIDES) | ||
| 2463 | MODULE_REPO_OVERRIDES_DIRTY = False | ||
| 2464 | |||
| 2465 | |||
| 2466 | def save_repo_overrides() -> None: | ||
| 2467 | if not MODULE_REPO_OVERRIDES_DIRTY: | ||
| 2468 | return | ||
| 2469 | try: | ||
| 2470 | payload: Dict[str, str] = {} | ||
| 2471 | for (module_path, version), repo_url in MODULE_REPO_OVERRIDES.items(): | ||
| 2472 | key = module_path if version is None else f"{module_path}|||{version}" | ||
| 2473 | payload[key] = repo_url | ||
| 2474 | MODULE_REPO_OVERRIDES_PATH.write_text(json.dumps(payload, indent=2, sort_keys=True)) | ||
| 2475 | except Exception: | ||
| 2476 | pass | ||
| 2477 | |||
| 2478 | |||
| 2479 | def query_vanity_url(module_path: str) -> Optional[str]: | ||
| 2480 | """ | ||
| 2481 | Query vanity URL metadata using ?go-get=1 to resolve actual VCS repository. | ||
| 2482 | |||
| 2483 | Go uses vanity URLs to provide custom import paths that redirect to actual | ||
| 2484 | repositories. When you request https://example.com/module?go-get=1, the server | ||
| 2485 | returns HTML with a meta tag like: | ||
| 2486 | <meta name="go-import" content="example.com/module git https://github.com/org/repo"> | ||
| 2487 | |||
| 2488 | This function queries that metadata and caches the result for future use. | ||
| 2489 | |||
| 2490 | Args: | ||
| 2491 | module_path: Go module path (e.g., "go.uber.org/atomic") | ||
| 2492 | |||
| 2493 | Returns: | ||
| 2494 | VCS repository URL if found, None otherwise | ||
| 2495 | """ | ||
| 2496 | global VANITY_URL_CACHE_DIRTY | ||
| 2497 | |||
| 2498 | # Check cache first | ||
| 2499 | if module_path in VANITY_URL_CACHE: | ||
| 2500 | return VANITY_URL_CACHE[module_path] | ||
| 2501 | |||
| 2502 | # Query the ?go-get=1 metadata | ||
| 2503 | url = f"https://{module_path}?go-get=1" | ||
| 2504 | |||
| 2505 | try: | ||
| 2506 | import urllib.request | ||
| 2507 | import html.parser | ||
| 2508 | |||
| 2509 | class GoImportParser(html.parser.HTMLParser): | ||
| 2510 | def __init__(self, target_module: str): | ||
| 2511 | super().__init__() | ||
| 2512 | self.target_module = target_module | ||
| 2513 | self.repo_url = None | ||
| 2514 | self.best_prefix_len = 0 # Track longest matching prefix | ||
| 2515 | |||
| 2516 | def handle_starttag(self, tag, attrs): | ||
| 2517 | if tag == 'meta': | ||
| 2518 | attrs_dict = dict(attrs) | ||
| 2519 | if attrs_dict.get('name') == 'go-import': | ||
| 2520 | content = attrs_dict.get('content', '') | ||
| 2521 | # Format: "module_prefix vcs repo_url" | ||
| 2522 | parts = content.split() | ||
| 2523 | if len(parts) >= 3: | ||
| 2524 | prefix = parts[0] | ||
| 2525 | # parts[1] = vcs type (git, hg, svn, bzr) | ||
| 2526 | repo_url = parts[2] | ||
| 2527 | # Per Go spec: match the go-import whose prefix matches our module | ||
| 2528 | # The module path must equal the prefix or have it as a path prefix | ||
| 2529 | if self.target_module == prefix or self.target_module.startswith(prefix + '/'): | ||
| 2530 | # Prefer longer (more specific) prefix matches | ||
| 2531 | if len(prefix) > self.best_prefix_len: | ||
| 2532 | self.best_prefix_len = len(prefix) | ||
| 2533 | self.repo_url = repo_url | ||
| 2534 | |||
| 2535 | # Fetch the page with a timeout | ||
| 2536 | req = urllib.request.Request(url, headers={'User-Agent': 'oe-go-mod-fetcher/3.0'}) | ||
| 2537 | with urllib.request.urlopen(req, timeout=10) as response: | ||
| 2538 | html_content = response.read().decode('utf-8', errors='ignore') | ||
| 2539 | |||
| 2540 | # Parse the HTML to find matching go-import meta tag | ||
| 2541 | parser = GoImportParser(module_path) | ||
| 2542 | parser.feed(html_content) | ||
| 2543 | |||
| 2544 | # Cache the result (even if None) | ||
| 2545 | VANITY_URL_CACHE[module_path] = parser.repo_url | ||
| 2546 | VANITY_URL_CACHE_DIRTY = True | ||
| 2547 | |||
| 2548 | return parser.repo_url | ||
| 2549 | |||
| 2550 | except Exception as e: | ||
| 2551 | # Cache negative result to avoid repeated failures | ||
| 2552 | VANITY_URL_CACHE[module_path] = None | ||
| 2553 | VANITY_URL_CACHE_DIRTY = True | ||
| 2554 | return None | ||
| 2555 | |||
| 2556 | |||
| 2557 | def get_github_mirror_url(vcs_url: str) -> Optional[str]: | ||
| 2558 | """ | ||
| 2559 | Get GitHub mirror URL for golang.org/x repositories. | ||
| 2560 | |||
| 2561 | golang.org/x repositories are mirrored on GitHub at github.com/golang/*. | ||
| 2562 | These mirrors are often more reliable than go.googlesource.com. | ||
| 2563 | |||
| 2564 | Args: | ||
| 2565 | vcs_url: Original VCS URL (e.g., https://go.googlesource.com/tools) | ||
| 2566 | |||
| 2567 | Returns: | ||
| 2568 | GitHub mirror URL if applicable, None otherwise | ||
| 2569 | """ | ||
| 2570 | if 'go.googlesource.com' in vcs_url: | ||
| 2571 | # Extract package name from URL | ||
| 2572 | # https://go.googlesource.com/tools -> tools | ||
| 2573 | pkg_name = vcs_url.rstrip('/').split('/')[-1] | ||
| 2574 | return f"https://github.com/golang/{pkg_name}" | ||
| 2575 | return None | ||
| 2576 | |||
| 2577 | |||
| 2578 | def resolve_pseudo_version_commit(vcs_url: str, timestamp_str: str, short_commit: str, | ||
| 2579 | clone_cache_dir: Optional[Path] = None) -> Optional[str]: | ||
| 2580 | """ | ||
| 2581 | Resolve a pseudo-version's short commit hash to a full 40-character hash. | ||
| 2582 | |||
| 2583 | This function clones (or updates) a git repository and searches the commit history | ||
| 2584 | for a commit that matches both the timestamp and short commit hash from a pseudo-version. | ||
| 2585 | |||
| 2586 | For golang.org/x repositories, automatically tries GitHub mirrors if the primary | ||
| 2587 | source fails (go.googlesource.com can be slow or unreliable). | ||
| 2588 | |||
| 2589 | Args: | ||
| 2590 | vcs_url: Git repository URL | ||
| 2591 | timestamp_str: Timestamp from pseudo-version (format: YYYYMMDDHHmmss) | ||
| 2592 | short_commit: Short commit hash (12 characters) from pseudo-version | ||
| 2593 | clone_cache_dir: Optional directory to cache cloned repositories (recommended) | ||
| 2594 | |||
| 2595 | Returns: | ||
| 2596 | Full 40-character commit hash, or None if not found | ||
| 2597 | """ | ||
| 2598 | # Parse timestamp | ||
| 2599 | try: | ||
| 2600 | dt = datetime.strptime(timestamp_str, "%Y%m%d%H%M%S") | ||
| 2601 | # Validate the date is within a reasonable range before doing arithmetic | ||
| 2602 | # Python datetime supports years 1-9999, but Go pseudo-versions should be recent | ||
| 2603 | # Also ensure year > 1 to avoid overflow when subtracting 1 day | ||
| 2604 | if dt.year < 1970 or dt.year > 9999: | ||
| 2605 | print(f"⚠️ Invalid timestamp year {dt.year} in pseudo-version (timestamp: {timestamp_str})", file=sys.stderr) | ||
| 2606 | return None | ||
| 2607 | if dt.year == 1: | ||
| 2608 | # Special case: year 1 would overflow when subtracting 1 day | ||
| 2609 | print(f"⚠️ Invalid timestamp year 1 in pseudo-version (timestamp: {timestamp_str})", file=sys.stderr) | ||
| 2610 | return None | ||
| 2611 | # Search window: ±1 day around timestamp for efficiency | ||
| 2612 | try: | ||
| 2613 | since = (dt - timedelta(days=1)).isoformat() | ||
| 2614 | until = (dt + timedelta(days=1)).isoformat() | ||
| 2615 | except OverflowError as e: | ||
| 2616 | print(f"⚠️ Date arithmetic overflow for timestamp {timestamp_str}: {e}", file=sys.stderr) | ||
| 2617 | return None | ||
| 2618 | except ValueError as e: | ||
| 2619 | print(f"⚠️ Invalid timestamp format {timestamp_str}: {e}", file=sys.stderr) | ||
| 2620 | return None | ||
| 2621 | |||
| 2622 | # Try primary URL and GitHub mirror (if applicable) | ||
| 2623 | urls_to_try = [vcs_url] | ||
| 2624 | github_mirror = get_github_mirror_url(vcs_url) | ||
| 2625 | if github_mirror: | ||
| 2626 | urls_to_try.append(github_mirror) | ||
| 2627 | |||
| 2628 | git_env = os.environ.copy() | ||
| 2629 | git_env.setdefault("GIT_TERMINAL_PROMPT", "0") | ||
| 2630 | git_env.setdefault("GIT_ASKPASS", "true") | ||
| 2631 | |||
| 2632 | for try_url in urls_to_try: | ||
| 2633 | # Determine clone directory based on URL being tried | ||
| 2634 | if clone_cache_dir: | ||
| 2635 | clone_cache_dir.mkdir(parents=True, exist_ok=True) | ||
| 2636 | repo_hash = hashlib.sha256(try_url.encode()).hexdigest()[:16] | ||
| 2637 | clone_dir = clone_cache_dir / f"repo_{repo_hash}" | ||
| 2638 | else: | ||
| 2639 | clone_dir = Path(tempfile.mkdtemp(prefix="pseudo-resolve-")) | ||
| 2640 | |||
| 2641 | try: | ||
| 2642 | # Clone or update repository | ||
| 2643 | if clone_dir.exists() and (clone_dir / 'HEAD').exists(): | ||
| 2644 | # Repository already cloned, fetch latest | ||
| 2645 | try: | ||
| 2646 | subprocess.run( | ||
| 2647 | ['git', 'fetch', '--all', '--quiet'], | ||
| 2648 | cwd=clone_dir, | ||
| 2649 | capture_output=True, | ||
| 2650 | check=True, | ||
| 2651 | timeout=60, | ||
| 2652 | env=git_env, | ||
| 2653 | ) | ||
| 2654 | except (subprocess.CalledProcessError, subprocess.TimeoutExpired): | ||
| 2655 | # Fetch failed, try to use existing clone anyway | ||
| 2656 | pass | ||
| 2657 | else: | ||
| 2658 | # Clone repository (bare clone for efficiency) | ||
| 2659 | if clone_dir.exists(): | ||
| 2660 | shutil.rmtree(clone_dir) | ||
| 2661 | clone_dir.mkdir(parents=True, exist_ok=True) | ||
| 2662 | |||
| 2663 | subprocess.run( | ||
| 2664 | ['git', 'clone', '--bare', '--quiet', try_url, str(clone_dir)], | ||
| 2665 | capture_output=True, | ||
| 2666 | check=True, | ||
| 2667 | timeout=300, # 5 minute timeout | ||
| 2668 | env=git_env, | ||
| 2669 | ) | ||
| 2670 | |||
| 2671 | # Search for commits matching timestamp and short hash | ||
| 2672 | result = subprocess.run( | ||
| 2673 | ['git', 'log', '--all', '--format=%H %ct', | ||
| 2674 | f'--since={since}', f'--until={until}'], | ||
| 2675 | cwd=clone_dir, | ||
| 2676 | capture_output=True, | ||
| 2677 | text=True, | ||
| 2678 | check=True, | ||
| 2679 | timeout=30, | ||
| 2680 | env=git_env, | ||
| 2681 | ) | ||
| 2682 | |||
| 2683 | # Find commit with matching short hash prefix | ||
| 2684 | for line in result.stdout.strip().splitlines(): | ||
| 2685 | if not line: | ||
| 2686 | continue | ||
| 2687 | parts = line.split() | ||
| 2688 | if len(parts) < 2: | ||
| 2689 | continue | ||
| 2690 | full_hash = parts[0] | ||
| 2691 | if full_hash.startswith(short_commit): | ||
| 2692 | return full_hash | ||
| 2693 | |||
| 2694 | # Commit not found in this repository, try next URL | ||
| 2695 | continue | ||
| 2696 | |||
| 2697 | except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e: | ||
| 2698 | # Clone/fetch failed, try next URL if available | ||
| 2699 | if not clone_cache_dir and clone_dir.exists(): | ||
| 2700 | shutil.rmtree(clone_dir) | ||
| 2701 | continue | ||
| 2702 | finally: | ||
| 2703 | # Clean up temp directory if we created one | ||
| 2704 | if not clone_cache_dir and clone_dir.exists(): | ||
| 2705 | try: | ||
| 2706 | shutil.rmtree(clone_dir) | ||
| 2707 | except: | ||
| 2708 | pass | ||
| 2709 | |||
| 2710 | # All URLs failed | ||
| 2711 | return None | ||
| 2712 | |||
| 2713 | |||
| 2714 | def derive_timestamp_from_version(version: str) -> str: | ||
| 2715 | parsed = parse_pseudo_version_tag(version) | ||
| 2716 | if parsed: | ||
| 2717 | timestamp_str, _ = parsed | ||
| 2718 | try: | ||
| 2719 | return datetime.strptime(timestamp_str, "%Y%m%d%H%M%S").strftime("%Y-%m-%dT%H:%M:%SZ") | ||
| 2720 | except ValueError: | ||
| 2721 | return "1970-01-01T00:00:00Z" | ||
| 2722 | return "1970-01-01T00:00:00Z" | ||
| 2723 | |||
| 2724 | |||
| 2725 | def _cache_metadata_key(module_path: str, version: str) -> Tuple[str, str]: | ||
| 2726 | return (module_path, version) | ||
| 2727 | |||
| 2728 | |||
| 2729 | def load_metadata_cache_file() -> None: | ||
| 2730 | if not MODULE_METADATA_CACHE_PATH.exists(): | ||
| 2731 | return | ||
| 2732 | try: | ||
| 2733 | data = json.loads(MODULE_METADATA_CACHE_PATH.read_text()) | ||
| 2734 | except Exception: | ||
| 2735 | return | ||
| 2736 | for key, value in data.items(): | ||
| 2737 | try: | ||
| 2738 | module_path, version = key.split("|||", 1) | ||
| 2739 | except ValueError: | ||
| 2740 | continue | ||
| 2741 | if not isinstance(value, dict): | ||
| 2742 | continue | ||
| 2743 | MODULE_METADATA_CACHE[_cache_metadata_key(module_path, version)] = { | ||
| 2744 | 'vcs_url': value.get('vcs_url', ''), | ||
| 2745 | 'commit': value.get('commit', ''), | ||
| 2746 | 'timestamp': value.get('timestamp', ''), | ||
| 2747 | 'subdir': value.get('subdir', ''), | ||
| 2748 | 'ref': value.get('ref', ''), | ||
| 2749 | } | ||
| 2750 | |||
| 2751 | |||
| 2752 | def save_metadata_cache() -> None: | ||
| 2753 | if not MODULE_METADATA_CACHE_DIRTY: | ||
| 2754 | return | ||
| 2755 | payload = { | ||
| 2756 | f"{module}|||{version}": value | ||
| 2757 | for (module, version), value in MODULE_METADATA_CACHE.items() | ||
| 2758 | } | ||
| 2759 | try: | ||
| 2760 | MODULE_METADATA_CACHE_PATH.write_text(json.dumps(payload, indent=2, sort_keys=True)) | ||
| 2761 | except Exception: | ||
| 2762 | pass | ||
| 2763 | |||
| 2764 | |||
| 2765 | def update_metadata_cache(module_path: str, version: str, vcs_url: str, commit: str, | ||
| 2766 | timestamp: str = "", subdir: str = "", ref: str = "", | ||
| 2767 | dirty: bool = True) -> None: | ||
| 2768 | global MODULE_METADATA_CACHE_DIRTY | ||
| 2769 | key = _cache_metadata_key(module_path, version) | ||
| 2770 | value = { | ||
| 2771 | 'vcs_url': vcs_url or '', | ||
| 2772 | 'commit': commit or '', | ||
| 2773 | 'timestamp': timestamp or '', | ||
| 2774 | 'subdir': subdir or '', | ||
| 2775 | 'ref': ref or '', | ||
| 2776 | } | ||
| 2777 | if MODULE_METADATA_CACHE.get(key) != value: | ||
| 2778 | MODULE_METADATA_CACHE[key] = value | ||
| 2779 | if dirty: | ||
| 2780 | MODULE_METADATA_CACHE_DIRTY = True | ||
| 2781 | |||
| 2782 | |||
| 2783 | def get_cached_metadata(module_path: str, version: str) -> Optional[dict]: | ||
| 2784 | entry = MODULE_METADATA_CACHE.get(_cache_metadata_key(module_path, version)) | ||
| 2785 | if not entry: | ||
| 2786 | return None | ||
| 2787 | timestamp = entry.get('timestamp') or derive_timestamp_from_version(version) | ||
| 2788 | return { | ||
| 2789 | "module_path": module_path, | ||
| 2790 | "version": version, | ||
| 2791 | "vcs_url": entry.get('vcs_url', ''), | ||
| 2792 | "vcs_hash": entry.get('commit', ''), | ||
| 2793 | "vcs_ref": entry.get('ref', ''), | ||
| 2794 | "timestamp": timestamp, | ||
| 2795 | "subdir": entry.get('subdir', ''), | ||
| 2796 | } | ||
| 2797 | |||
| 2798 | |||
| 2799 | def load_metadata_from_inc(output_dir: Path) -> None: | ||
| 2800 | git_inc = output_dir / "go-mod-git.inc" | ||
| 2801 | cache_inc = output_dir / "go-mod-cache.inc" | ||
| 2802 | |||
| 2803 | sha_to_url: Dict[str, str] = {} | ||
| 2804 | if git_inc.exists(): | ||
| 2805 | for line in git_inc.read_text().splitlines(): | ||
| 2806 | line = line.strip() | ||
| 2807 | if not line.startswith('SRC_URI'): | ||
| 2808 | continue | ||
| 2809 | if '"' not in line: | ||
| 2810 | continue | ||
| 2811 | content = line.split('"', 1)[1].rsplit('"', 1)[0] | ||
| 2812 | parts = [p for p in content.split(';') if p] | ||
| 2813 | if not parts: | ||
| 2814 | continue | ||
| 2815 | url_part = parts[0] | ||
| 2816 | dest_sha = None | ||
| 2817 | for part in parts[1:]: | ||
| 2818 | if part.startswith('destsuffix='): | ||
| 2819 | dest = part.split('=', 1)[1] | ||
| 2820 | dest_sha = dest.rsplit('/', 1)[-1] | ||
| 2821 | break | ||
| 2822 | if not dest_sha: | ||
| 2823 | continue | ||
| 2824 | if url_part.startswith('git://'): | ||
| 2825 | url_https = 'https://' + url_part[6:] | ||
| 2826 | else: | ||
| 2827 | url_https = url_part | ||
| 2828 | sha_to_url[dest_sha] = url_https | ||
| 2829 | |||
| 2830 | if cache_inc.exists(): | ||
| 2831 | text = cache_inc.read_text() | ||
| 2832 | marker = "GO_MODULE_CACHE_DATA = '" | ||
| 2833 | if marker in text: | ||
| 2834 | start = text.index(marker) + len(marker) | ||
| 2835 | try: | ||
| 2836 | end = text.index("'\n\n", start) | ||
| 2837 | except ValueError: | ||
| 2838 | end = len(text) | ||
| 2839 | try: | ||
| 2840 | data = json.loads(text[start:end]) | ||
| 2841 | except Exception: | ||
| 2842 | data = [] | ||
| 2843 | for entry in data: | ||
| 2844 | module_path = entry.get('module') | ||
| 2845 | version = entry.get('version') | ||
| 2846 | sha = entry.get('vcs_hash') | ||
| 2847 | commit = entry.get('commit') | ||
| 2848 | timestamp = entry.get('timestamp', '') | ||
| 2849 | subdir = entry.get('subdir', '') | ||
| 2850 | ref = entry.get('vcs_ref', '') | ||
| 2851 | if not module_path or not version: | ||
| 2852 | continue | ||
| 2853 | vcs_url = sha_to_url.get(sha, '') | ||
| 2854 | if not vcs_url: | ||
| 2855 | continue | ||
| 2856 | if not _url_allowed_for_module(module_path, vcs_url, version): | ||
| 2857 | continue | ||
| 2858 | # Skip entries with invalid commit hashes | ||
| 2859 | if commit and len(commit) != 40: | ||
| 2860 | continue | ||
| 2861 | if not timestamp: | ||
| 2862 | timestamp = derive_timestamp_from_version(version) | ||
| 2863 | update_metadata_cache(module_path, version, vcs_url, commit or '', timestamp, subdir, ref, dirty=False) | ||
| 2864 | |||
| 2865 | |||
| 2866 | def load_metadata_from_module_cache_task(output_dir: Path) -> None: | ||
| 2867 | legacy_path = output_dir / "module_cache_task.inc" | ||
| 2868 | if not legacy_path.exists(): | ||
| 2869 | return | ||
| 2870 | import ast | ||
| 2871 | pattern = re.compile(r'\(\{.*?\}\)', re.DOTALL) | ||
| 2872 | text = legacy_path.read_text() | ||
| 2873 | for match in pattern.finditer(text): | ||
| 2874 | blob = match.group()[1:-1] # strip parentheses | ||
| 2875 | try: | ||
| 2876 | entry = ast.literal_eval(blob) | ||
| 2877 | except Exception: | ||
| 2878 | continue | ||
| 2879 | module_path = entry.get('module') | ||
| 2880 | version = entry.get('version') | ||
| 2881 | vcs_url = entry.get('repo_url') or entry.get('url') or '' | ||
| 2882 | commit = entry.get('commit') or '' | ||
| 2883 | subdir = entry.get('subdir', '') | ||
| 2884 | ref = entry.get('ref', '') | ||
| 2885 | if not module_path or not version or not vcs_url or not commit: | ||
| 2886 | continue | ||
| 2887 | if vcs_url.startswith('git://'): | ||
| 2888 | vcs_url = 'https://' + vcs_url[6:] | ||
| 2889 | if not _url_allowed_for_module(module_path, vcs_url, version): | ||
| 2890 | continue | ||
| 2891 | timestamp = derive_timestamp_from_version(version) | ||
| 2892 | update_metadata_cache(module_path, version, vcs_url, commit, timestamp, subdir, ref, dirty=True) | ||
| 2893 | |||
| 2894 | |||
| 2895 | def bootstrap_metadata_cache(output_dir: Optional[Path], | ||
| 2896 | skip_inc_files: bool = False, | ||
| 2897 | skip_legacy_module_cache: bool = False) -> None: | ||
| 2898 | """ | ||
| 2899 | Bootstrap metadata cache from multiple sources. | ||
| 2900 | |||
| 2901 | Args: | ||
| 2902 | output_dir: Recipe output directory (optional in cache-only mode) | ||
| 2903 | skip_inc_files: If True, skip loading from .inc files (used with --clean-cache) | ||
| 2904 | skip_legacy_module_cache: If True, skip loading legacy module_cache_task.inc metadata | ||
| 2905 | """ | ||
| 2906 | load_metadata_cache_file() | ||
| 2907 | if not skip_inc_files and output_dir is not None: | ||
| 2908 | load_metadata_from_inc(output_dir) | ||
| 2909 | if not skip_legacy_module_cache and output_dir is not None: | ||
| 2910 | load_metadata_from_module_cache_task(output_dir) | ||
| 2911 | |||
| 2912 | |||
| 2913 | def _lookup_commit_for_version(vcs_url: str, version: str, preferred_ref: str = "") -> Tuple[Optional[str], Optional[str]]: | ||
| 2914 | """ | ||
| 2915 | Resolve the git commit for a module version using git ls-remote. | ||
| 2916 | |||
| 2917 | Returns: | ||
| 2918 | Tuple of (commit, timestamp). Timestamp may be None if unknown. | ||
| 2919 | """ | ||
| 2920 | tag = version.split('+')[0] | ||
| 2921 | pseudo_info = parse_pseudo_version_tag(tag) | ||
| 2922 | candidate_urls = [vcs_url] | ||
| 2923 | if not vcs_url.endswith('.git'): | ||
| 2924 | candidate_urls.append(vcs_url.rstrip('/') + '.git') | ||
| 2925 | |||
| 2926 | for url in candidate_urls: | ||
| 2927 | if preferred_ref: | ||
| 2928 | commit = git_ls_remote(url, preferred_ref) | ||
| 2929 | if commit: | ||
| 2930 | return commit, "1970-01-01T00:00:00Z" | ||
| 2931 | |||
| 2932 | if pseudo_info: | ||
| 2933 | timestamp_str, short_commit = pseudo_info | ||
| 2934 | commit = git_ls_remote(url, short_commit) | ||
| 2935 | if commit: | ||
| 2936 | timestamp = derive_timestamp_from_version(version) | ||
| 2937 | return commit, timestamp | ||
| 2938 | else: | ||
| 2939 | for ref in (f"refs/tags/{tag}", tag): | ||
| 2940 | commit = git_ls_remote(url, ref) | ||
| 2941 | if commit: | ||
| 2942 | return commit, "1970-01-01T00:00:00Z" | ||
| 2943 | |||
| 2944 | if pseudo_info: | ||
| 2945 | timestamp_str, short_commit = pseudo_info | ||
| 2946 | for url in candidate_urls: | ||
| 2947 | commit = resolve_pseudo_version_commit( | ||
| 2948 | url, | ||
| 2949 | timestamp_str, | ||
| 2950 | short_commit, | ||
| 2951 | clone_cache_dir=CLONE_CACHE_DIR, | ||
| 2952 | ) | ||
| 2953 | if commit: | ||
| 2954 | timestamp = derive_timestamp_from_version(version) | ||
| 2955 | return commit, timestamp | ||
| 2956 | |||
| 2957 | if pseudo_info: | ||
| 2958 | # Even if we couldn't resolve the commit, return derived timestamp | ||
| 2959 | return None, derive_timestamp_from_version(version) | ||
| 2960 | return None, None | ||
| 2961 | |||
| 2962 | |||
| 2963 | def query_module_via_go_list(module_path: str, version: str) -> Optional[Dict[str, str]]: | ||
| 2964 | """Use `go list -m -json` to obtain VCS metadata for a module version.""" | ||
| 2965 | env = os.environ.copy() | ||
| 2966 | env.setdefault('GOPROXY', 'https://proxy.golang.org') | ||
| 2967 | if CURRENT_GOMODCACHE: | ||
| 2968 | env['GOMODCACHE'] = CURRENT_GOMODCACHE | ||
| 2969 | |||
| 2970 | try: | ||
| 2971 | result = subprocess.run( | ||
| 2972 | ['go', 'list', '-m', '-json', f'{module_path}@{version}'], | ||
| 2973 | capture_output=True, | ||
| 2974 | text=True, | ||
| 2975 | check=True, | ||
| 2976 | env=env, | ||
| 2977 | timeout=GO_CMD_TIMEOUT, | ||
| 2978 | ) | ||
| 2979 | except subprocess.TimeoutExpired: | ||
| 2980 | print(f" ⚠️ go list timed out for {module_path}@{version} after {GO_CMD_TIMEOUT}s") | ||
| 2981 | return None | ||
| 2982 | except subprocess.CalledProcessError: | ||
| 2983 | return None | ||
| 2984 | |||
| 2985 | try: | ||
| 2986 | data = json.loads(result.stdout) | ||
| 2987 | except json.JSONDecodeError: | ||
| 2988 | return None | ||
| 2989 | |||
| 2990 | origin = data.get('Origin') or {} | ||
| 2991 | vcs_url = origin.get('URL', '') | ||
| 2992 | commit = origin.get('Hash', '') | ||
| 2993 | subdir = origin.get('Subdir', '') | ||
| 2994 | ref = origin.get('Ref', '') | ||
| 2995 | timestamp = data.get('Time') or origin.get('Time') or '' | ||
| 2996 | |||
| 2997 | if vcs_url.startswith('git+'): | ||
| 2998 | vcs_url = vcs_url[4:] | ||
| 2999 | |||
| 3000 | if not vcs_url or not commit: | ||
| 3001 | return None | ||
| 3002 | |||
| 3003 | return { | ||
| 3004 | 'vcs_url': vcs_url, | ||
| 3005 | 'commit': commit, | ||
| 3006 | 'timestamp': timestamp, | ||
| 3007 | 'subdir': subdir or '', | ||
| 3008 | 'vcs_ref': ref or '', | ||
| 3009 | } | ||
| 3010 | |||
| 3011 | |||
| 3012 | def _candidate_gopkg_repos(module_path: str) -> List[str]: | ||
| 3013 | """ | ||
| 3014 | Generate candidate repository URLs for gopkg.in modules. | ||
| 3015 | """ | ||
| 3016 | if not module_path.startswith("gopkg.in/"): | ||
| 3017 | return [] | ||
| 3018 | |||
| 3019 | remainder = module_path[len("gopkg.in/"):] | ||
| 3020 | if not remainder: | ||
| 3021 | return [] | ||
| 3022 | |||
| 3023 | parts = remainder.split('/') | ||
| 3024 | last = parts[-1] | ||
| 3025 | |||
| 3026 | match = re.match(r'(?P<name>.+?)\.v\d+(?:[.\w-]*)?$', last) | ||
| 3027 | if not match: | ||
| 3028 | return [] | ||
| 3029 | |||
| 3030 | repo_name = match.group('name') | ||
| 3031 | owner_segments = parts[:-1] | ||
| 3032 | |||
| 3033 | owner_variants: List[str] = [] | ||
| 3034 | if owner_segments: | ||
| 3035 | canonical_owner = '/'.join(owner_segments) | ||
| 3036 | owner_variants.append(canonical_owner) | ||
| 3037 | |||
| 3038 | # Provide fallbacks with dotted segments replaced | ||
| 3039 | dotted_to_hyphen = '/'.join(segment.replace('.', '-') for segment in owner_segments) | ||
| 3040 | dotted_to_empty = '/'.join(segment.replace('.', '') for segment in owner_segments) | ||
| 3041 | for candidate in (dotted_to_hyphen, dotted_to_empty): | ||
| 3042 | if candidate and candidate not in owner_variants: | ||
| 3043 | owner_variants.append(candidate) | ||
| 3044 | else: | ||
| 3045 | # Common conventions used by gopkg.in vanity repos | ||
| 3046 | owner_variants.extend([ | ||
| 3047 | f"go-{repo_name}", | ||
| 3048 | repo_name, | ||
| 3049 | f"{repo_name}-go", | ||
| 3050 | ]) | ||
| 3051 | |||
| 3052 | urls: List[str] = [] | ||
| 3053 | seen: Set[str] = set() | ||
| 3054 | for owner in owner_variants: | ||
| 3055 | owner = owner.strip('/') | ||
| 3056 | if not owner: | ||
| 3057 | continue | ||
| 3058 | candidate = f"https://github.com/{owner}/{repo_name}" | ||
| 3059 | if candidate not in seen: | ||
| 3060 | seen.add(candidate) | ||
| 3061 | urls.append(candidate) | ||
| 3062 | return urls | ||
| 3063 | |||
| 3064 | |||
| 3065 | def _recalculate_subdir_from_vanity(vcs_url: str, module_parts: List[str], current_subdir: str) -> str: | ||
| 3066 | """ | ||
| 3067 | Recalculate module subdirectory when a vanity import redirects to a different repository layout. | ||
| 3068 | """ | ||
| 3069 | if not vcs_url: | ||
| 3070 | return current_subdir | ||
| 3071 | |||
| 3072 | vcs_repo_name = vcs_url.rstrip('/').split('/')[-1] | ||
| 3073 | if vcs_repo_name.endswith('.git'): | ||
| 3074 | vcs_repo_name = vcs_repo_name[:-4] | ||
| 3075 | |||
| 3076 | repo_boundary_index = None | ||
| 3077 | for i, part in enumerate(module_parts): | ||
| 3078 | if part == vcs_repo_name or part in vcs_repo_name or vcs_repo_name.endswith(part): | ||
| 3079 | repo_boundary_index = i + 1 | ||
| 3080 | break | ||
| 3081 | |||
| 3082 | if repo_boundary_index is not None and repo_boundary_index < len(module_parts): | ||
| 3083 | subdir_parts = module_parts[repo_boundary_index:] | ||
| 3084 | if subdir_parts and subdir_parts[-1].startswith('v') and subdir_parts[-1][1:].isdigit(): | ||
| 3085 | subdir_parts = subdir_parts[:-1] | ||
| 3086 | return '/'.join(subdir_parts) if subdir_parts else '' | ||
| 3087 | |||
| 3088 | if len(module_parts) <= 3: | ||
| 3089 | return '' | ||
| 3090 | |||
| 3091 | return current_subdir | ||
| 3092 | |||
| 3093 | |||
| 3094 | def resolve_module_metadata(module_path: str, version: str) -> Optional[dict]: | ||
| 3095 | parts = module_path.split('/') | ||
| 3096 | vanity_repo = None # Track if module was resolved via vanity URL | ||
| 3097 | |||
| 3098 | tag = version.split('+')[0] | ||
| 3099 | pseudo_info = parse_pseudo_version_tag(tag) | ||
| 3100 | expected_commit_prefix = pseudo_info[1] if pseudo_info else None | ||
| 3101 | |||
| 3102 | cached = get_cached_metadata(module_path, version) | ||
| 3103 | if cached: | ||
| 3104 | override_urls = repo_override_candidates(module_path, version) | ||
| 3105 | if expected_commit_prefix: | ||
| 3106 | cached_commit = cached.get('vcs_hash') or '' | ||
| 3107 | if cached_commit and not cached_commit.startswith(expected_commit_prefix): | ||
| 3108 | cached = None | ||
| 3109 | if cached and override_urls: | ||
| 3110 | url = cached.get('vcs_url') or '' | ||
| 3111 | if url and url not in override_urls: | ||
| 3112 | cached = None | ||
| 3113 | if cached and not expected_commit_prefix: | ||
| 3114 | ref_hint = cached.get('vcs_ref', '') | ||
| 3115 | commit_check, _ = _lookup_commit_for_version(cached.get('vcs_url', ''), version, ref_hint) | ||
| 3116 | if not commit_check or commit_check.lower() != (cached.get('vcs_hash', '') or '').lower(): | ||
| 3117 | cached = None | ||
| 3118 | |||
| 3119 | def fetch_go_metadata() -> Optional[Dict[str, str]]: | ||
| 3120 | info = query_module_via_go_list(module_path, version) | ||
| 3121 | if info: | ||
| 3122 | return info | ||
| 3123 | if go_mod_download(module_path, version): | ||
| 3124 | return query_module_via_go_list(module_path, version) | ||
| 3125 | return None | ||
| 3126 | |||
| 3127 | def resolve_with_go_info(go_info: Optional[Dict[str, str]], fallback_url: str, fallback_subdir: str) -> Optional[dict]: | ||
| 3128 | if not go_info: | ||
| 3129 | return None | ||
| 3130 | |||
| 3131 | candidate_urls: List[str] = [] | ||
| 3132 | overrides = repo_override_candidates(module_path, version) | ||
| 3133 | candidate_urls.extend(overrides) | ||
| 3134 | info_url = (go_info.get('vcs_url') or '').strip() | ||
| 3135 | if info_url and info_url not in candidate_urls: | ||
| 3136 | candidate_urls.append(info_url) | ||
| 3137 | if fallback_url and fallback_url not in candidate_urls: | ||
| 3138 | candidate_urls.append(fallback_url) | ||
| 3139 | |||
| 3140 | timestamp_hint = go_info.get('timestamp') or derive_timestamp_from_version(version) | ||
| 3141 | subdir_hint = go_info.get('subdir', '') or fallback_subdir | ||
| 3142 | ref_hint = go_info.get('vcs_ref', '') | ||
| 3143 | |||
| 3144 | for candidate in candidate_urls: | ||
| 3145 | if not _url_allowed_for_module(module_path, candidate, version): | ||
| 3146 | continue | ||
| 3147 | commit_candidate, timestamp_candidate = _lookup_commit_for_version(candidate, version, ref_hint) | ||
| 3148 | if commit_candidate: | ||
| 3149 | final_timestamp = timestamp_candidate or timestamp_hint | ||
| 3150 | update_metadata_cache( | ||
| 3151 | module_path, | ||
| 3152 | version, | ||
| 3153 | candidate, | ||
| 3154 | commit_candidate, | ||
| 3155 | final_timestamp, | ||
| 3156 | subdir_hint, | ||
| 3157 | ref_hint, | ||
| 3158 | dirty=True, | ||
| 3159 | ) | ||
| 3160 | return { | ||
| 3161 | "module_path": module_path, | ||
| 3162 | "version": version, | ||
| 3163 | "vcs_url": candidate, | ||
| 3164 | "vcs_hash": commit_candidate, | ||
| 3165 | "vcs_ref": ref_hint, | ||
| 3166 | "timestamp": final_timestamp, | ||
| 3167 | "subdir": subdir_hint, | ||
| 3168 | } | ||
| 3169 | return None | ||
| 3170 | |||
| 3171 | # Handle gopkg.in special case | ||
| 3172 | if parts[0] == 'gopkg.in': | ||
| 3173 | repo_candidates: List[str] = [] | ||
| 3174 | vanity_repo = query_vanity_url(module_path) | ||
| 3175 | if vanity_repo: | ||
| 3176 | repo_candidates.append(vanity_repo) | ||
| 3177 | repo_candidates.extend(_candidate_gopkg_repos(module_path)) | ||
| 3178 | if cached and cached.get('vcs_url'): | ||
| 3179 | repo_candidates.insert(0, cached['vcs_url']) | ||
| 3180 | |||
| 3181 | for vcs_url in repo_candidates: | ||
| 3182 | if not vcs_url: | ||
| 3183 | continue | ||
| 3184 | commit, timestamp = _lookup_commit_for_version(vcs_url, version) | ||
| 3185 | if commit: | ||
| 3186 | resolved_timestamp = timestamp or derive_timestamp_from_version(version) | ||
| 3187 | update_metadata_cache(module_path, version, vcs_url, commit, resolved_timestamp, '', '', dirty=True) | ||
| 3188 | return { | ||
| 3189 | "module_path": module_path, | ||
| 3190 | "version": version, | ||
| 3191 | "vcs_url": vcs_url, | ||
| 3192 | "vcs_hash": commit, | ||
| 3193 | "vcs_ref": "", | ||
| 3194 | "timestamp": resolved_timestamp, | ||
| 3195 | "subdir": "", | ||
| 3196 | } | ||
| 3197 | |||
| 3198 | go_info = fetch_go_metadata() | ||
| 3199 | result = resolve_with_go_info(go_info, '', '') | ||
| 3200 | |||
| 3201 | if result: | ||
| 3202 | return result | ||
| 3203 | |||
| 3204 | if cached: | ||
| 3205 | return cached | ||
| 3206 | |||
| 3207 | print(f" ⚠️ Unable to derive repository for gopkg.in path {module_path}@{version}") | ||
| 3208 | return None | ||
| 3209 | |||
| 3210 | if len(parts) < 3: | ||
| 3211 | go_info = fetch_go_metadata() | ||
| 3212 | result = resolve_with_go_info(go_info, '', '') | ||
| 3213 | if result: | ||
| 3214 | return result | ||
| 3215 | |||
| 3216 | vanity_repo = query_vanity_url(module_path) | ||
| 3217 | if vanity_repo: | ||
| 3218 | commit, timestamp = _lookup_commit_for_version(vanity_repo, version) | ||
| 3219 | if commit: | ||
| 3220 | resolved_timestamp = timestamp or derive_timestamp_from_version(version) | ||
| 3221 | update_metadata_cache(module_path, version, vanity_repo, commit, resolved_timestamp, '', '', dirty=True) | ||
| 3222 | return { | ||
| 3223 | "module_path": module_path, | ||
| 3224 | "version": version, | ||
| 3225 | "vcs_url": vanity_repo, | ||
| 3226 | "vcs_hash": commit, | ||
| 3227 | "vcs_ref": "", | ||
| 3228 | "timestamp": resolved_timestamp, | ||
| 3229 | "subdir": '', | ||
| 3230 | } | ||
| 3231 | |||
| 3232 | if cached: | ||
| 3233 | return cached | ||
| 3234 | |||
| 3235 | print(f" ⚠️ Unable to derive repository for {module_path}@{version}") | ||
| 3236 | return None | ||
| 3237 | else: | ||
| 3238 | # Default calculation assuming 3-part paths (domain/org/repo) | ||
| 3239 | base_repo = '/'.join(parts[:3]) | ||
| 3240 | |||
| 3241 | # Calculate subdir from module path, but strip version suffixes (v2, v3, v11, etc.) | ||
| 3242 | if len(parts) > 3: | ||
| 3243 | subdir_parts = parts[3:] | ||
| 3244 | # Remove trailing version suffix if present (e.g., v2, v3, v11) | ||
| 3245 | if subdir_parts and subdir_parts[-1].startswith('v') and subdir_parts[-1][1:].isdigit(): | ||
| 3246 | subdir_parts = subdir_parts[:-1] | ||
| 3247 | subdir = '/'.join(subdir_parts) if subdir_parts else '' | ||
| 3248 | else: | ||
| 3249 | subdir = '' | ||
| 3250 | |||
| 3251 | override_candidate = None | ||
| 3252 | override_urls = repo_override_candidates(module_path, version) | ||
| 3253 | if override_urls: | ||
| 3254 | override_candidate = override_urls[0] | ||
| 3255 | |||
| 3256 | if override_candidate: | ||
| 3257 | vcs_url = override_candidate | ||
| 3258 | elif parts[0] == 'golang.org' and len(parts) >= 3 and parts[1] == 'x': | ||
| 3259 | pkg_name = parts[2] | ||
| 3260 | vcs_url = f"https://go.googlesource.com/{pkg_name}" | ||
| 3261 | elif parts[0] == 'github.com' and len(parts) >= 3: | ||
| 3262 | vcs_url = f"https://{base_repo}" | ||
| 3263 | else: | ||
| 3264 | vanity_repo = query_vanity_url(module_path) | ||
| 3265 | if vanity_repo: | ||
| 3266 | vcs_url = vanity_repo | ||
| 3267 | subdir = _recalculate_subdir_from_vanity(vcs_url, parts, subdir) | ||
| 3268 | else: | ||
| 3269 | vcs_url = f"https://{base_repo}" | ||
| 3270 | |||
| 3271 | if cached and cached.get('vcs_url') and cached.get('vcs_hash'): | ||
| 3272 | if vanity_repo: | ||
| 3273 | adjusted_subdir = _recalculate_subdir_from_vanity( | ||
| 3274 | cached['vcs_url'], | ||
| 3275 | parts, | ||
| 3276 | cached.get('subdir', ''), | ||
| 3277 | ) | ||
| 3278 | if adjusted_subdir != cached.get('subdir', ''): | ||
| 3279 | cached['subdir'] = adjusted_subdir | ||
| 3280 | update_metadata_cache( | ||
| 3281 | module_path, | ||
| 3282 | version, | ||
| 3283 | cached['vcs_url'], | ||
| 3284 | cached['vcs_hash'], | ||
| 3285 | cached['timestamp'], | ||
| 3286 | adjusted_subdir, | ||
| 3287 | cached.get('vcs_ref', ''), | ||
| 3288 | dirty=True, | ||
| 3289 | ) | ||
| 3290 | return cached | ||
| 3291 | |||
| 3292 | commit, timestamp = _lookup_commit_for_version(vcs_url, version) | ||
| 3293 | if not commit: | ||
| 3294 | go_info = fetch_go_metadata() | ||
| 3295 | result = resolve_with_go_info(go_info, vcs_url, subdir) | ||
| 3296 | if result: | ||
| 3297 | return result | ||
| 3298 | |||
| 3299 | FAILED_MODULE_PATHS.add(module_path) | ||
| 3300 | _record_skipped_module(module_path, version, "no repository metadata from go.sum/go list") | ||
| 3301 | print(f" ⚠️ Unable to derive repository for {module_path}@{version}") | ||
| 3302 | if cached and cached.get('vcs_hash'): | ||
| 3303 | return cached | ||
| 3304 | return None | ||
| 3305 | |||
| 3306 | if not _url_allowed_for_module(module_path, vcs_url, version): | ||
| 3307 | FAILED_MODULE_PATHS.add(module_path) | ||
| 3308 | _record_skipped_module(module_path, version, "resolved repo not allowed by override policy") | ||
| 3309 | print(f" ⚠️ Resolved repo {vcs_url} for {module_path}@{version} not in override allowlist") | ||
| 3310 | if cached and cached.get('vcs_hash'): | ||
| 3311 | return cached | ||
| 3312 | return None | ||
| 3313 | |||
| 3314 | resolved_timestamp = timestamp or derive_timestamp_from_version(version) | ||
| 3315 | |||
| 3316 | update_metadata_cache(module_path, version, vcs_url, commit, resolved_timestamp, subdir, '', dirty=True) | ||
| 3317 | |||
| 3318 | return { | ||
| 3319 | "module_path": module_path, | ||
| 3320 | "version": version, | ||
| 3321 | "vcs_url": vcs_url, | ||
| 3322 | "vcs_hash": commit, | ||
| 3323 | "vcs_ref": "", | ||
| 3324 | "timestamp": resolved_timestamp, | ||
| 3325 | "subdir": subdir, | ||
| 3326 | } | ||
| 3327 | |||
| 3328 | |||
| 3329 | # ============================================================================= | ||
| 3330 | # Utility Functions | ||
| 3331 | # ============================================================================= | ||
| 3332 | |||
| 3333 | def unescape_module_path(path: str) -> str: | ||
| 3334 | """ | ||
| 3335 | Unescape Go module paths that use ! for uppercase letters. | ||
| 3336 | Example: github.com/!sirupsen/logrus -> github.com/Sirupsen/logrus | ||
| 3337 | """ | ||
| 3338 | import re | ||
| 3339 | return re.sub(r'!([a-z])', lambda m: m.group(1).upper(), path) | ||
| 3340 | |||
| 3341 | def escape_module_path(path: str) -> str: | ||
| 3342 | """ | ||
| 3343 | Escape Go module paths by converting uppercase to !lowercase. | ||
| 3344 | Example: github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus | ||
| 3345 | """ | ||
| 3346 | import re | ||
| 3347 | return re.sub(r'([A-Z])', lambda m: '!' + m.group(1).lower(), path) | ||
| 3348 | |||
| 3349 | # ============================================================================= | ||
| 3350 | # Phase 1: Discovery | ||
| 3351 | # ============================================================================= | ||
| 3352 | |||
| 3353 | def parse_go_mod_requires(go_mod_path: Path) -> List[tuple]: | ||
| 3354 | """ | ||
| 3355 | Extract ALL module requirements from go.mod (direct + indirect). | ||
| 3356 | |||
| 3357 | This replaces the need for fast-fix-module.py by discovering all | ||
| 3358 | transitive dependencies that Go needs. | ||
| 3359 | |||
| 3360 | Returns list of (module_path, version) tuples. | ||
| 3361 | """ | ||
| 3362 | modules = [] | ||
| 3363 | |||
| 3364 | if not go_mod_path.exists(): | ||
| 3365 | print(f"Warning: go.mod not found at {go_mod_path}") | ||
| 3366 | return modules | ||
| 3367 | |||
| 3368 | in_require = False | ||
| 3369 | |||
| 3370 | try: | ||
| 3371 | with open(go_mod_path, 'r', encoding='utf-8') as f: | ||
| 3372 | for line in f: | ||
| 3373 | line = line.strip() | ||
| 3374 | |||
| 3375 | # Start of require block | ||
| 3376 | if line.startswith('require ('): | ||
| 3377 | in_require = True | ||
| 3378 | continue | ||
| 3379 | |||
| 3380 | # End of require block | ||
| 3381 | if in_require and line == ')': | ||
| 3382 | in_require = False | ||
| 3383 | continue | ||
| 3384 | |||
| 3385 | # Single-line require | ||
| 3386 | if line.startswith('require ') and '(' not in line: | ||
| 3387 | parts = line.split() | ||
| 3388 | if len(parts) >= 3: # require module version | ||
| 3389 | module = parts[1] | ||
| 3390 | version = parts[2] | ||
| 3391 | modules.append((module, version)) | ||
| 3392 | continue | ||
| 3393 | |||
| 3394 | # Multi-line require block entry | ||
| 3395 | if in_require and line: | ||
| 3396 | # Skip comments | ||
| 3397 | if line.startswith('//'): | ||
| 3398 | continue | ||
| 3399 | |||
| 3400 | # Parse: "module version // indirect" or just "module version" | ||
| 3401 | parts = line.split() | ||
| 3402 | if len(parts) >= 2: | ||
| 3403 | module = parts[0] | ||
| 3404 | version = parts[1] | ||
| 3405 | modules.append((module, version)) | ||
| 3406 | |||
| 3407 | except Exception as e: | ||
| 3408 | print(f"Error parsing go.mod: {e}") | ||
| 3409 | |||
| 3410 | return modules | ||
| 3411 | |||
| 3412 | |||
| 3413 | def download_all_required_modules(source_dir: Path, gomodcache: Path) -> None: | ||
| 3414 | """ | ||
| 3415 | Download ALL modules required by go.mod (direct + indirect). | ||
| 3416 | |||
| 3417 | This ensures that indirect/transitive dependencies have .info files | ||
| 3418 | in the GOMODCACHE, which allows discover_modules() to find them. | ||
| 3419 | |||
| 3420 | This is the key to replacing fast-fix-module.py - by downloading | ||
| 3421 | everything upfront, we make all modules discoverable. | ||
| 3422 | """ | ||
| 3423 | go_mod_path = source_dir / "go.mod" | ||
| 3424 | |||
| 3425 | print(f"\n" + "=" * 70) | ||
| 3426 | print("DISCOVERY ENHANCEMENT: Downloading all required modules") | ||
| 3427 | print("=" * 70) | ||
| 3428 | print(f"Parsing {go_mod_path}...") | ||
| 3429 | |||
| 3430 | required_modules = parse_go_mod_requires(go_mod_path) | ||
| 3431 | |||
| 3432 | if not required_modules: | ||
| 3433 | print("Warning: No modules found in go.mod") | ||
| 3434 | return | ||
| 3435 | |||
| 3436 | print(f"Found {len(required_modules)} total modules in go.mod (direct + indirect)") | ||
| 3437 | |||
| 3438 | # Set up environment for Go | ||
| 3439 | env = os.environ.copy() | ||
| 3440 | env['GOMODCACHE'] = str(gomodcache) | ||
| 3441 | env['GOPROXY'] = 'https://proxy.golang.org' | ||
| 3442 | |||
| 3443 | # Download each module to ensure .info files exist | ||
| 3444 | success_count = 0 | ||
| 3445 | skip_count = 0 | ||
| 3446 | fail_count = 0 | ||
| 3447 | |||
| 3448 | for module_path, version in required_modules: | ||
| 3449 | # Check if .info file already exists | ||
| 3450 | escaped_module = escape_module_path(module_path) | ||
| 3451 | escaped_version = escape_module_path(version) | ||
| 3452 | info_path = gomodcache / "cache" / "download" / escaped_module / "@v" / f"{escaped_version}.info" | ||
| 3453 | |||
| 3454 | if info_path.exists(): | ||
| 3455 | skip_count += 1 | ||
| 3456 | continue | ||
| 3457 | |||
| 3458 | # Download to get .info file with VCS metadata | ||
| 3459 | try: | ||
| 3460 | result = subprocess.run( | ||
| 3461 | ['go', 'mod', 'download', f'{module_path}@{version}'], | ||
| 3462 | cwd=source_dir, | ||
| 3463 | env=env, | ||
| 3464 | capture_output=True, | ||
| 3465 | text=True, | ||
| 3466 | timeout=30 | ||
| 3467 | ) | ||
| 3468 | |||
| 3469 | if result.returncode == 0: | ||
| 3470 | success_count += 1 | ||
| 3471 | else: | ||
| 3472 | fail_count += 1 | ||
| 3473 | if "no matching versions" not in result.stderr: | ||
| 3474 | print(f" Warning: Failed to download {module_path}@{version}: {result.stderr.strip()[:100]}") | ||
| 3475 | |||
| 3476 | except subprocess.TimeoutExpired: | ||
| 3477 | fail_count += 1 | ||
| 3478 | print(f" Warning: Timeout downloading {module_path}@{version}") | ||
| 3479 | except Exception as e: | ||
| 3480 | fail_count += 1 | ||
| 3481 | print(f" Warning: Error downloading {module_path}@{version}: {e}") | ||
| 3482 | |||
| 3483 | print(f"\nDownload results:") | ||
| 3484 | print(f" ✓ {success_count} modules downloaded") | ||
| 3485 | print(f" ⊙ {skip_count} modules already cached") | ||
| 3486 | print(f" ✗ {fail_count} modules failed") | ||
| 3487 | print(f" → Total: {len(required_modules)} modules") | ||
| 3488 | |||
| 3489 | |||
| 3490 | def discover_modules(source_dir: Path, gomodcache: Optional[str] = None) -> List[Dict]: | ||
| 3491 | """ | ||
| 3492 | Phase 1: Discovery | ||
| 3493 | |||
| 3494 | Let Go download modules to discover correct paths and metadata. | ||
| 3495 | This is ONLY for discovery - we build from git sources. | ||
| 3496 | |||
| 3497 | Returns list of modules with: | ||
| 3498 | - module_path: CORRECT path from filesystem (no /v3 stripping!) | ||
| 3499 | - version: Module version | ||
| 3500 | - vcs_url: Git repository URL | ||
| 3501 | - vcs_hash: Git commit hash | ||
| 3502 | - vcs_ref: Git reference (tag/branch) | ||
| 3503 | - timestamp: Commit timestamp | ||
| 3504 | - subdir: Subdirectory within repo (for submodules) | ||
| 3505 | """ | ||
| 3506 | global CURRENT_GOMODCACHE | ||
| 3507 | print("\n" + "=" * 70) | ||
| 3508 | print("PHASE 1: DISCOVERY - Using Go to discover module metadata") | ||
| 3509 | print("=" * 70) | ||
| 3510 | |||
| 3511 | # Create temporary or use provided GOMODCACHE | ||
| 3512 | if gomodcache: | ||
| 3513 | temp_cache = Path(gomodcache) | ||
| 3514 | print(f"Using existing GOMODCACHE: {temp_cache}") | ||
| 3515 | cleanup_cache = False | ||
| 3516 | else: | ||
| 3517 | temp_cache = Path(tempfile.mkdtemp(prefix="go-discover-")) | ||
| 3518 | print(f"Created temporary cache: {temp_cache}") | ||
| 3519 | cleanup_cache = True | ||
| 3520 | CURRENT_GOMODCACHE = str(temp_cache) | ||
| 3521 | |||
| 3522 | try: | ||
| 3523 | ensure_path_is_writable(temp_cache) | ||
| 3524 | |||
| 3525 | # Set up environment for Go | ||
| 3526 | env = os.environ.copy() | ||
| 3527 | env['GOMODCACHE'] = str(temp_cache) | ||
| 3528 | env['GOPROXY'] = 'https://proxy.golang.org' | ||
| 3529 | |||
| 3530 | print(f"\nDownloading modules to discover metadata...") | ||
| 3531 | print(f"Source: {source_dir}") | ||
| 3532 | |||
| 3533 | # Let Go download everything (initial discovery) | ||
| 3534 | result = subprocess.run( | ||
| 3535 | ['go', 'mod', 'download'], | ||
| 3536 | cwd=source_dir, | ||
| 3537 | env=env, | ||
| 3538 | capture_output=True, | ||
| 3539 | text=True | ||
| 3540 | ) | ||
| 3541 | |||
| 3542 | if result.returncode != 0: | ||
| 3543 | print(f"Warning: go mod download had errors:\n{result.stderr}") | ||
| 3544 | # Continue anyway - some modules may have been downloaded | ||
| 3545 | |||
| 3546 | # PRIORITY #2 FIX: Download ALL modules from go.mod (direct + indirect) | ||
| 3547 | # This replaces the need for fast-fix-module.py by ensuring all | ||
| 3548 | # transitive dependencies have .info files for discovery | ||
| 3549 | download_all_required_modules(source_dir, temp_cache) | ||
| 3550 | |||
| 3551 | # Walk filesystem to discover what Go created | ||
| 3552 | modules = [] | ||
| 3553 | download_dir = temp_cache / "cache" / "download" | ||
| 3554 | |||
| 3555 | if not download_dir.exists(): | ||
| 3556 | print(f"Error: Download directory not found: {download_dir}") | ||
| 3557 | return [] | ||
| 3558 | |||
| 3559 | print(f"\nScanning {download_dir} for modules...") | ||
| 3560 | |||
| 3561 | for dirpath, _, filenames in os.walk(download_dir): | ||
| 3562 | path_parts = Path(dirpath).relative_to(download_dir).parts | ||
| 3563 | |||
| 3564 | # Look for @v directories | ||
| 3565 | if not path_parts or path_parts[-1] != '@v': | ||
| 3566 | continue | ||
| 3567 | |||
| 3568 | # Module path is everything before @v | ||
| 3569 | module_path = '/'.join(path_parts[:-1]) | ||
| 3570 | module_path = unescape_module_path(module_path) # Unescape !-encoding | ||
| 3571 | |||
| 3572 | # Process each .info file | ||
| 3573 | for filename in filenames: | ||
| 3574 | if not filename.endswith('.info'): | ||
| 3575 | continue | ||
| 3576 | |||
| 3577 | version = filename[:-5] # Strip .info extension | ||
| 3578 | info_path = Path(dirpath) / filename | ||
| 3579 | |||
| 3580 | try: | ||
| 3581 | # Read metadata from .info file | ||
| 3582 | with open(info_path) as f: | ||
| 3583 | info = json.load(f) | ||
| 3584 | |||
| 3585 | # Extract VCS information | ||
| 3586 | origin = info.get('Origin', {}) | ||
| 3587 | vcs_url = origin.get('URL') | ||
| 3588 | vcs_hash = origin.get('Hash') | ||
| 3589 | vcs_ref = origin.get('Ref', '') | ||
| 3590 | subdir = origin.get('Subdir', '') | ||
| 3591 | |||
| 3592 | if not vcs_url or not vcs_hash: | ||
| 3593 | # Try to refresh cache entry and ask Go directly for metadata. | ||
| 3594 | go_mod_download(module_path, version) | ||
| 3595 | |||
| 3596 | # Reload .info in case go mod download updated it. | ||
| 3597 | try: | ||
| 3598 | with open(info_path) as f: | ||
| 3599 | info = json.load(f) | ||
| 3600 | origin = info.get('Origin', {}) | ||
| 3601 | vcs_url = origin.get('URL') | ||
| 3602 | vcs_hash = origin.get('Hash') | ||
| 3603 | vcs_ref = origin.get('Ref', '') | ||
| 3604 | subdir = origin.get('Subdir', '') | ||
| 3605 | except Exception: | ||
| 3606 | pass | ||
| 3607 | |||
| 3608 | if not vcs_url or not vcs_hash: | ||
| 3609 | go_info = query_module_via_go_list(module_path, version) | ||
| 3610 | if go_info: | ||
| 3611 | vcs_url = go_info.get('vcs_url') | ||
| 3612 | vcs_hash = go_info.get('commit') | ||
| 3613 | subdir = go_info.get('subdir', subdir) | ||
| 3614 | origin_time = go_info.get('timestamp', '') | ||
| 3615 | if origin_time: | ||
| 3616 | info['Time'] = origin_time | ||
| 3617 | |||
| 3618 | if not vcs_url or not vcs_hash: | ||
| 3619 | print(f" ⚠️ Skipping {module_path}@{version}: No VCS info") | ||
| 3620 | continue | ||
| 3621 | |||
| 3622 | overrides = repo_override_candidates(module_path, version) | ||
| 3623 | if overrides: | ||
| 3624 | vcs_url = overrides[0] | ||
| 3625 | |||
| 3626 | # BitBake requires full 40-character commit hashes | ||
| 3627 | if len(vcs_hash) != 40: | ||
| 3628 | print(f" ⚠️ Skipping {module_path}@{version}: Short commit hash ({vcs_hash})") | ||
| 3629 | continue | ||
| 3630 | |||
| 3631 | # PROACTIVE dangling commit detection and correction | ||
| 3632 | # Check if commit is BitBake-fetchable BEFORE expensive verification | ||
| 3633 | # BitBake's nobranch=1 requires commits to be branch/tag HEADs, not dangling commits | ||
| 3634 | if VERIFY_ENABLED and vcs_ref and vcs_ref.startswith("refs/"): | ||
| 3635 | if not is_commit_bitbake_fetchable(vcs_url, vcs_hash, vcs_ref): | ||
| 3636 | print(f" ⚠️ DANGLING COMMIT: {module_path}@{version} commit {vcs_hash[:12]} not a branch/tag HEAD") | ||
| 3637 | |||
| 3638 | # Try to correct by dereferencing the ref | ||
| 3639 | corrected_hash = correct_commit_hash_from_ref(vcs_url, vcs_hash, vcs_ref) | ||
| 3640 | if corrected_hash: | ||
| 3641 | print(f" ✓ Corrected hash by dereferencing {vcs_ref}: {vcs_hash[:12]} → {corrected_hash[:12]}") | ||
| 3642 | vcs_hash = corrected_hash | ||
| 3643 | else: | ||
| 3644 | print(f" ❌ Could not auto-correct dangling commit") | ||
| 3645 | # Continue anyway - verification will catch if it's truly unfetchable | ||
| 3646 | |||
| 3647 | # Validate commit exists in repository (detect force-pushed tags) | ||
| 3648 | # If verification is enabled, check that the commit from .info file | ||
| 3649 | # actually exists in the repository. If not, refresh from Go proxy. | ||
| 3650 | commit_verified = VERIFY_ENABLED and verify_commit_accessible(vcs_url, vcs_hash, vcs_ref, version, origin_time) | ||
| 3651 | |||
| 3652 | # Apply fallback commit if verification used one (for orphaned commits) | ||
| 3653 | if commit_verified and VERIFY_ENABLED: | ||
| 3654 | vcs_hash = get_actual_commit(vcs_url, vcs_hash) | ||
| 3655 | |||
| 3656 | if VERIFY_ENABLED and not commit_verified: | ||
| 3657 | print(f" ⚠️ STALE CACHE: {module_path}@{version} commit {vcs_hash[:12]} not found in {vcs_url}") | ||
| 3658 | |||
| 3659 | # Last resort: Try proxy refresh (this shouldn't happen if dangling check worked) | ||
| 3660 | corrected_hash = correct_commit_hash_from_ref(vcs_url, vcs_hash, vcs_ref) | ||
| 3661 | if corrected_hash: | ||
| 3662 | print(f" ✓ Corrected hash by dereferencing {vcs_ref}: {vcs_hash[:12]} → {corrected_hash[:12]}") | ||
| 3663 | vcs_hash = corrected_hash | ||
| 3664 | # Verify the corrected hash is accessible | ||
| 3665 | if verify_commit_accessible(vcs_url, vcs_hash, vcs_ref, version, origin_time): | ||
| 3666 | # Successfully corrected! Continue with this module (skip proxy refresh) | ||
| 3667 | commit_verified = True | ||
| 3668 | else: | ||
| 3669 | print(f" ❌ Even corrected commit not accessible") | ||
| 3670 | |||
| 3671 | # If still not verified after correction attempt, try proxy refresh | ||
| 3672 | if not commit_verified: | ||
| 3673 | # Check if module is actually needed before attempting refresh | ||
| 3674 | if not is_module_actually_needed(module_path, CURRENT_SOURCE_DIR): | ||
| 3675 | print(f" ℹ️ Module not needed by main module (indirect-only), skipping") | ||
| 3676 | print(f" (Verified via 'go mod why {module_path}')") | ||
| 3677 | continue | ||
| 3678 | |||
| 3679 | print(f" Attempting to refresh from Go proxy...") | ||
| 3680 | |||
| 3681 | # Delete stale .info file to force re-download | ||
| 3682 | try: | ||
| 3683 | info_path.unlink() | ||
| 3684 | print(f" Deleted stale .info file") | ||
| 3685 | except Exception as e: | ||
| 3686 | print(f" Warning: Could not delete .info file: {e}") | ||
| 3687 | |||
| 3688 | # Re-download from Go proxy to get current commit | ||
| 3689 | try: | ||
| 3690 | go_mod_download(module_path, version) | ||
| 3691 | |||
| 3692 | # Reload .info file with fresh data | ||
| 3693 | if info_path.exists(): | ||
| 3694 | with open(info_path) as f: | ||
| 3695 | info = json.load(f) | ||
| 3696 | origin = info.get('Origin', {}) | ||
| 3697 | new_vcs_hash = origin.get('Hash') | ||
| 3698 | |||
| 3699 | if new_vcs_hash and new_vcs_hash != vcs_hash: | ||
| 3700 | print(f" ✓ Refreshed: {vcs_hash[:12]} → {new_vcs_hash[:12]}") | ||
| 3701 | vcs_hash = new_vcs_hash | ||
| 3702 | vcs_ref = origin.get('Ref', vcs_ref) | ||
| 3703 | |||
| 3704 | # Verify new commit exists | ||
| 3705 | if not verify_commit_accessible(vcs_url, vcs_hash, vcs_ref, version, origin.get('Time', '')): | ||
| 3706 | print(f" ❌ Even refreshed commit not accessible") | ||
| 3707 | # Last resort: check if it's actually needed | ||
| 3708 | if not is_module_actually_needed(module_path, CURRENT_SOURCE_DIR): | ||
| 3709 | print(f" ℹ️ Module not needed anyway, skipping") | ||
| 3710 | continue | ||
| 3711 | else: | ||
| 3712 | print(f" ❌ Module IS needed but commit unavailable") | ||
| 3713 | print(f" This module cannot be built from git sources") | ||
| 3714 | continue | ||
| 3715 | else: | ||
| 3716 | print(f" ⚠️ Go proxy returned same commit (permanently deleted)") | ||
| 3717 | # Check if it's actually needed | ||
| 3718 | if not is_module_actually_needed(module_path, CURRENT_SOURCE_DIR): | ||
| 3719 | print(f" ℹ️ Module not needed by main module, skipping") | ||
| 3720 | continue | ||
| 3721 | else: | ||
| 3722 | print(f" ❌ Module IS needed but commit permanently deleted") | ||
| 3723 | print(f" Consider using gomod:// fetcher for this module") | ||
| 3724 | continue | ||
| 3725 | else: | ||
| 3726 | print(f" ❌ Re-download failed, skipping module") | ||
| 3727 | continue | ||
| 3728 | except Exception as e: | ||
| 3729 | print(f" ❌ Refresh failed: {e}") | ||
| 3730 | continue | ||
| 3731 | |||
| 3732 | DOWNLOADED_MODULES.add((module_path, version)) | ||
| 3733 | modules.append({ | ||
| 3734 | 'module_path': module_path, | ||
| 3735 | 'version': version, | ||
| 3736 | 'vcs_url': vcs_url, | ||
| 3737 | 'vcs_hash': vcs_hash, | ||
| 3738 | 'vcs_ref': vcs_ref, | ||
| 3739 | 'timestamp': info.get('Time', ''), | ||
| 3740 | 'subdir': subdir or '', | ||
| 3741 | }) | ||
| 3742 | |||
| 3743 | print(f" ✓ {module_path}@{version}") | ||
| 3744 | |||
| 3745 | except Exception as e: | ||
| 3746 | print(f" ✗ Error processing {info_path}: {e}") | ||
| 3747 | continue | ||
| 3748 | |||
| 3749 | print(f"\nDiscovered {len(modules)} modules with VCS info") | ||
| 3750 | |||
| 3751 | # FIX: Synthesize entries for +incompatible versions that lack VCS data | ||
| 3752 | # These are pre-v2 versions of modules that later adopted semantic import versioning (/v2, /v3, etc.) | ||
| 3753 | # The GOMODCACHE has .info files for them but without Origin data (old proxy cache) | ||
| 3754 | # Strategy: For each versioned module path (e.g., foo/v3), check if a base path version | ||
| 3755 | # with +incompatible exists in GOMODCACHE and lacks VCS data. If so, synthesize an entry. | ||
| 3756 | # | ||
| 3757 | # NOTE (2025-11-28): This code overlaps with Fix #29 in extract-native-modules.py, which | ||
| 3758 | # now uses derive_vcs_info() to handle +incompatible modules at discovery time. Fix #29 | ||
| 3759 | # is more complete because it handles ALL +incompatible modules directly from their path, | ||
| 3760 | # not just those with a corresponding /vN version. This code is kept as a fallback for | ||
| 3761 | # cases where extract-native-modules.py wasn't used (e.g., legacy workflows). | ||
| 3762 | print("\nSynthesizing entries for +incompatible versions without VCS data...") | ||
| 3763 | synthesized_count = 0 | ||
| 3764 | |||
| 3765 | # Build a map of module_path -> vcs_url for discovered modules | ||
| 3766 | module_vcs_map: Dict[str, str] = {} | ||
| 3767 | for mod in modules: | ||
| 3768 | module_vcs_map[mod['module_path']] = mod['vcs_url'] | ||
| 3769 | |||
| 3770 | # For each module with a versioned path suffix (/v2, /v3, etc.), check for base path incompatible versions | ||
| 3771 | for mod in list(modules): # Iterate over copy since we'll append to modules | ||
| 3772 | module_path = mod['module_path'] | ||
| 3773 | vcs_url = mod['vcs_url'] | ||
| 3774 | |||
| 3775 | # Check if this module has a version suffix (/v2, /v3, etc.) | ||
| 3776 | version_match = re.search(r'/v(\d+)$', module_path) | ||
| 3777 | if not version_match: | ||
| 3778 | continue | ||
| 3779 | |||
| 3780 | # Extract base path (without /vN suffix) | ||
| 3781 | base_path = module_path[:module_path.rfind('/v')] | ||
| 3782 | |||
| 3783 | # Check if we already discovered the base path | ||
| 3784 | if base_path in module_vcs_map: | ||
| 3785 | continue # Base path already has VCS data, no synthesis needed | ||
| 3786 | |||
| 3787 | # Look for +incompatible versions of the base path in GOMODCACHE | ||
| 3788 | # Note: GOMODCACHE uses raw paths as directory names (not escaped) | ||
| 3789 | base_path_dir = download_dir / base_path / '@v' | ||
| 3790 | |||
| 3791 | if not base_path_dir.exists(): | ||
| 3792 | continue | ||
| 3793 | |||
| 3794 | # Scan for .info files with +incompatible versions | ||
| 3795 | for info_file in base_path_dir.glob('*.info'): | ||
| 3796 | version = info_file.stem | ||
| 3797 | |||
| 3798 | if not version.endswith('+incompatible'): | ||
| 3799 | continue | ||
| 3800 | |||
| 3801 | # Read the .info file to check if it lacks VCS data | ||
| 3802 | try: | ||
| 3803 | with open(info_file) as f: | ||
| 3804 | info = json.load(f) | ||
| 3805 | |||
| 3806 | # If it already has Origin data, skip it | ||
| 3807 | if 'Origin' in info and info['Origin'].get('URL') and info['Origin'].get('Hash'): | ||
| 3808 | continue | ||
| 3809 | |||
| 3810 | # This +incompatible version lacks VCS data - synthesize an entry | ||
| 3811 | # Extract the tag name from version (e.g., v2.16.0+incompatible -> v2.16.0) | ||
| 3812 | tag_version = version.replace('+incompatible', '') | ||
| 3813 | tag_ref = f"refs/tags/{tag_version}" | ||
| 3814 | |||
| 3815 | # Use git ls-remote to find the commit for this tag | ||
| 3816 | tag_commit = git_ls_remote(vcs_url, tag_ref) | ||
| 3817 | |||
| 3818 | if not tag_commit: | ||
| 3819 | print(f" ⚠️ Could not find tag {tag_ref} for {base_path}@{version}") | ||
| 3820 | continue | ||
| 3821 | |||
| 3822 | # Synthesize a module entry using data from the versioned path | ||
| 3823 | synthesized_module = { | ||
| 3824 | 'module_path': base_path, # Use BASE path (without /vN) | ||
| 3825 | 'version': version, | ||
| 3826 | 'vcs_url': vcs_url, | ||
| 3827 | 'vcs_hash': tag_commit, | ||
| 3828 | 'vcs_ref': tag_ref, | ||
| 3829 | 'timestamp': info.get('Time', ''), | ||
| 3830 | 'subdir': '', | ||
| 3831 | } | ||
| 3832 | |||
| 3833 | modules.append(synthesized_module) | ||
| 3834 | module_vcs_map[base_path] = vcs_url # Prevent duplicate synthesis | ||
| 3835 | synthesized_count += 1 | ||
| 3836 | |||
| 3837 | print(f" ✓ Synthesized {base_path}@{version} (from {module_path} VCS data)") | ||
| 3838 | print(f" VCS: {vcs_url}") | ||
| 3839 | print(f" Commit: {tag_commit[:12]} (tag {tag_version})") | ||
| 3840 | |||
| 3841 | except Exception as e: | ||
| 3842 | print(f" ⚠️ Error synthesizing {base_path}@{version}: {e}") | ||
| 3843 | continue | ||
| 3844 | |||
| 3845 | if synthesized_count > 0: | ||
| 3846 | print(f"\nSynthesized {synthesized_count} +incompatible module entries") | ||
| 3847 | else: | ||
| 3848 | print("No +incompatible versions needed synthesis") | ||
| 3849 | |||
| 3850 | print(f"\nTotal modules after synthesis: {len(modules)}") | ||
| 3851 | return modules | ||
| 3852 | |||
| 3853 | finally: | ||
| 3854 | # Defer cleanup of temporary caches until the end of execution | ||
| 3855 | if cleanup_cache and temp_cache.exists(): | ||
| 3856 | TEMP_GOMODCACHES.append(temp_cache) | ||
| 3857 | |||
| 3858 | # ============================================================================= | ||
| 3859 | # Phase 2: Recipe Generation | ||
| 3860 | # ============================================================================= | ||
| 3861 | |||
| 3862 | def generate_recipe(modules: List[Dict], source_dir: Path, output_dir: Optional[Path], | ||
| 3863 | git_repo: str, git_ref: str, validate_only: bool = False, | ||
| 3864 | debug_limit: Optional[int] = None, skip_verify: bool = False, | ||
| 3865 | verify_jobs: int = 10) -> bool: | ||
| 3866 | """ | ||
| 3867 | Phase 2: Recipe Generation | ||
| 3868 | |||
| 3869 | Generate BitBake recipe with git:// SRC_URI entries. | ||
| 3870 | No file:// entries - we'll build cache from git during do_create_module_cache. | ||
| 3871 | |||
| 3872 | Creates: | ||
| 3873 | - go-mod-git.inc: SRC_URI with git:// entries | ||
| 3874 | - go-mod-cache.inc: BitBake task to build module cache | ||
| 3875 | """ | ||
| 3876 | print("\n" + "=" * 70) | ||
| 3877 | phase_label = "VALIDATION" if validate_only else "RECIPE GENERATION" | ||
| 3878 | print(f"PHASE 2: {phase_label} - {('commit verification' if validate_only else 'Creating BitBake recipe files')}") | ||
| 3879 | print("=" * 70) | ||
| 3880 | |||
| 3881 | src_uri_entries = [] | ||
| 3882 | modules_data = [] | ||
| 3883 | vcs_repos: Dict[str, Dict] = {} | ||
| 3884 | |||
| 3885 | def repo_key_for_url(url: str) -> str: | ||
| 3886 | return hashlib.sha256(f"git3:{url}".encode()).hexdigest() | ||
| 3887 | |||
| 3888 | def commit_cache_key(repo_key: str, commit: str) -> str: | ||
| 3889 | return hashlib.sha256(f"{repo_key}:{commit}".encode()).hexdigest() | ||
| 3890 | |||
| 3891 | unresolved_commits: List[Tuple[str, str, str, str, str]] = [] | ||
| 3892 | |||
| 3893 | total_modules = len(modules) | ||
| 3894 | if debug_limit is not None: | ||
| 3895 | print(f"\n⚙️ Debug limit active: validating first {debug_limit} modules (total list size {total_modules})") | ||
| 3896 | |||
| 3897 | if skip_verify: | ||
| 3898 | print(f"\n⚙️ Skipping verification (--skip-verify enabled)") | ||
| 3899 | |||
| 3900 | # First pass: Build repo structure without verification | ||
| 3901 | for index, module in enumerate(modules, start=1): | ||
| 3902 | vcs_url = module['vcs_url'] | ||
| 3903 | commit_hash = module['vcs_hash'] | ||
| 3904 | |||
| 3905 | repo_key = repo_key_for_url(vcs_url) | ||
| 3906 | repo_info = vcs_repos.setdefault( | ||
| 3907 | repo_key, | ||
| 3908 | { | ||
| 3909 | 'url': vcs_url, | ||
| 3910 | 'commits': {}, # commit hash -> commit metadata | ||
| 3911 | }, | ||
| 3912 | ) | ||
| 3913 | |||
| 3914 | if commit_hash not in repo_info['commits']: | ||
| 3915 | commit_sha = commit_cache_key(repo_key, commit_hash) | ||
| 3916 | repo_info['commits'][commit_hash] = { | ||
| 3917 | 'commit_sha': commit_sha, | ||
| 3918 | 'modules': [], | ||
| 3919 | } | ||
| 3920 | else: | ||
| 3921 | commit_sha = repo_info['commits'][commit_hash]['commit_sha'] | ||
| 3922 | |||
| 3923 | ref_hint = module.get('vcs_ref', '') | ||
| 3924 | if ref_hint and not _ref_points_to_commit(vcs_url, ref_hint, commit_hash): | ||
| 3925 | ref_hint = '' | ||
| 3926 | |||
| 3927 | entry = repo_info['commits'][commit_hash] | ||
| 3928 | entry['modules'].append(module) | ||
| 3929 | if ref_hint: | ||
| 3930 | entry['ref_hint'] = ref_hint | ||
| 3931 | |||
| 3932 | module['repo_key'] = repo_key | ||
| 3933 | module['commit_sha'] = commit_sha | ||
| 3934 | |||
| 3935 | # Second pass: Verify commits (parallel or sequential) with auto-correction | ||
| 3936 | # PHASE MERGE: This now includes force-pushed tag detection and auto-correction | ||
| 3937 | global VERIFY_CORRECTIONS_APPLIED | ||
| 3938 | if not skip_verify: | ||
| 3939 | print(f"\n⚙️ Verifying {total_modules} commits with {verify_jobs} parallel jobs") | ||
| 3940 | corrected_modules = [] # Track corrections for reporting | ||
| 3941 | |||
| 3942 | def verify_module(module_info): | ||
| 3943 | index, module = module_info | ||
| 3944 | vcs_url = module['vcs_url'] | ||
| 3945 | commit_hash = module['vcs_hash'] | ||
| 3946 | ref_hint = module.get('vcs_ref', '') | ||
| 3947 | |||
| 3948 | print(f" • verifying [{index}/{total_modules}] {module['module_path']}@{module['version']} -> {commit_hash[:12]}") | ||
| 3949 | |||
| 3950 | # Verify commit is accessible | ||
| 3951 | if not verify_commit_accessible(vcs_url, commit_hash, ref_hint, module.get('version', ''), module.get('timestamp', '')): | ||
| 3952 | # PHASE MERGE: If verification fails and we have a ref, try auto-correction | ||
| 3953 | if ref_hint and ref_hint.startswith("refs/"): | ||
| 3954 | corrected_hash = correct_commit_hash_from_ref(vcs_url, commit_hash, ref_hint) | ||
| 3955 | if corrected_hash and corrected_hash != commit_hash: | ||
| 3956 | print(f" ✓ Auto-corrected: {commit_hash[:12]} → {corrected_hash[:12]} (force-pushed tag)") | ||
| 3957 | module['vcs_hash'] = corrected_hash | ||
| 3958 | |||
| 3959 | # Update repo_info dict to use the new hash as key | ||
| 3960 | repo_key = module['repo_key'] | ||
| 3961 | if commit_hash in vcs_repos[repo_key]['commits']: | ||
| 3962 | # Move the entry from old hash to new hash | ||
| 3963 | vcs_repos[repo_key]['commits'][corrected_hash] = vcs_repos[repo_key]['commits'].pop(commit_hash) | ||
| 3964 | |||
| 3965 | return ('corrected', module['module_path'], module['version'], commit_hash, corrected_hash) | ||
| 3966 | else: | ||
| 3967 | # Could not correct - treat as failure | ||
| 3968 | return ('failed', module['module_path'], module['version'], commit_hash, vcs_url, ref_hint) | ||
| 3969 | else: | ||
| 3970 | # No ref to dereference - genuine failure | ||
| 3971 | return ('failed', module['module_path'], module['version'], commit_hash, vcs_url, ref_hint) | ||
| 3972 | else: | ||
| 3973 | # Verification succeeded - apply fallback commit if one was used | ||
| 3974 | actual_hash = get_actual_commit(vcs_url, commit_hash) | ||
| 3975 | if actual_hash != commit_hash: | ||
| 3976 | print(f" ✓ Applied fallback: {commit_hash[:12]} → {actual_hash[:12]} (orphaned commit)") | ||
| 3977 | module['vcs_hash'] = actual_hash | ||
| 3978 | |||
| 3979 | # Update repo_info dict to use the new hash as key | ||
| 3980 | repo_key = module['repo_key'] | ||
| 3981 | if commit_hash in vcs_repos[repo_key]['commits']: | ||
| 3982 | # Move the entry from old hash to new hash | ||
| 3983 | vcs_repos[repo_key]['commits'][actual_hash] = vcs_repos[repo_key]['commits'].pop(commit_hash) | ||
| 3984 | |||
| 3985 | return ('corrected', module['module_path'], module['version'], commit_hash, actual_hash) | ||
| 3986 | return None | ||
| 3987 | |||
| 3988 | if verify_jobs > 0: | ||
| 3989 | # Parallel verification | ||
| 3990 | with concurrent.futures.ThreadPoolExecutor(max_workers=verify_jobs) as executor: | ||
| 3991 | results = list(executor.map(verify_module, enumerate(modules, start=1))) | ||
| 3992 | else: | ||
| 3993 | # Sequential verification (--verify-jobs=0) | ||
| 3994 | results = [] | ||
| 3995 | for index, module in enumerate(modules, start=1): | ||
| 3996 | result = verify_module((index, module)) | ||
| 3997 | if result is not None: | ||
| 3998 | results.append(result) | ||
| 3999 | |||
| 4000 | # Save verification cache every 50 modules | ||
| 4001 | if index % 50 == 0: | ||
| 4002 | save_verify_commit_cache(force=True) | ||
| 4003 | print(f" 💾 Saved verification cache at {index}/{total_modules}") | ||
| 4004 | |||
| 4005 | # Separate corrected vs failed results | ||
| 4006 | corrected_results = [r for r in results if r and r[0] == 'corrected'] | ||
| 4007 | failed_results = [r for r in results if r and r[0] == 'failed'] | ||
| 4008 | |||
| 4009 | # Apply corrections back to modules list (needed for parallel execution) | ||
| 4010 | if corrected_results: | ||
| 4011 | VERIFY_CORRECTIONS_APPLIED = True | ||
| 4012 | print(f"\n✓ Auto-corrected {len(corrected_results)} force-pushed tags:") | ||
| 4013 | for _, module_path, version, old_hash, new_hash in corrected_results: | ||
| 4014 | print(f" • {module_path}@{version}: {old_hash[:12]} → {new_hash[:12]}") | ||
| 4015 | |||
| 4016 | # Find and update the module in the main list | ||
| 4017 | for module in modules: | ||
| 4018 | if module['module_path'] == module_path and module['version'] == version: | ||
| 4019 | module['vcs_hash'] = new_hash | ||
| 4020 | |||
| 4021 | # Also update the vcs_repos dict | ||
| 4022 | repo_key = module['repo_key'] | ||
| 4023 | if old_hash in vcs_repos[repo_key]['commits']: | ||
| 4024 | vcs_repos[repo_key]['commits'][new_hash] = vcs_repos[repo_key]['commits'].pop(old_hash) | ||
| 4025 | break | ||
| 4026 | else: | ||
| 4027 | # Verification skipped - no failed results | ||
| 4028 | failed_results = [] | ||
| 4029 | |||
| 4030 | print(f"\nFound {len(vcs_repos)} unique git repositories") | ||
| 4031 | print(f"Supporting {len(modules)} modules") | ||
| 4032 | |||
| 4033 | if failed_results: | ||
| 4034 | print("\n❌ Unable to verify the following module commits against their repositories:") | ||
| 4035 | for _, module_path, version, commit_hash, vcs_url, ref_hint in failed_results: | ||
| 4036 | print(f" - {module_path}@{version} ({commit_hash})") | ||
| 4037 | hint = f" {ref_hint}" if ref_hint else "" | ||
| 4038 | print(f" try: git fetch --depth=1 {vcs_url}{hint} {commit_hash}") | ||
| 4039 | print(f" cache: mark reachable via --inject-commit '{vcs_url} {commit_hash}'") | ||
| 4040 | print(f" repo : override via --set-repo {module_path}@{version} {vcs_url}") | ||
| 4041 | print("Aborting to prevent emitting invalid SRCREVs.") | ||
| 4042 | return False | ||
| 4043 | |||
| 4044 | if validate_only: | ||
| 4045 | print("\n✅ Validation complete - all commits are reachable upstream") | ||
| 4046 | return True | ||
| 4047 | |||
| 4048 | if output_dir is None: | ||
| 4049 | print("❌ Internal error: output directory missing for recipe generation") | ||
| 4050 | return False | ||
| 4051 | |||
| 4052 | # Generate SRC_URI entries for each repo/commit combination | ||
| 4053 | for repo_key, repo_info in vcs_repos.items(): | ||
| 4054 | git_url = repo_info['url'] | ||
| 4055 | |||
| 4056 | if git_url.startswith('https://'): | ||
| 4057 | git_url_bb = 'git://' + git_url[8:] | ||
| 4058 | protocol = 'https' | ||
| 4059 | elif git_url.startswith('http://'): | ||
| 4060 | git_url_bb = 'git://' + git_url[7:] | ||
| 4061 | protocol = 'http' | ||
| 4062 | else: | ||
| 4063 | git_url_bb = git_url | ||
| 4064 | protocol = 'https' | ||
| 4065 | |||
| 4066 | for idx, (commit_hash, commit_info) in enumerate(sorted(repo_info['commits'].items())): | ||
| 4067 | fetch_name = f"git_{repo_key[:8]}_{idx}" | ||
| 4068 | destsuffix = f"vcs_cache/{commit_info['commit_sha']}" | ||
| 4069 | |||
| 4070 | # Use branch name from ref_hint when available (more reliable than nobranch=1) | ||
| 4071 | # ref_hint is like "refs/tags/v1.9.3" or "refs/heads/main" | ||
| 4072 | ref_hint = commit_info.get('ref_hint', '') | ||
| 4073 | if ref_hint: | ||
| 4074 | shallow_param = ';shallow=1' | ||
| 4075 | # For tags, use nobranch=1 since the commit may not be on a branch head | ||
| 4076 | # For branches, use the branch name directly | ||
| 4077 | if ref_hint.startswith('refs/tags/'): | ||
| 4078 | # Tags: BitBake can fetch tagged commits with nobranch=1 | ||
| 4079 | branch_param = ';nobranch=1' | ||
| 4080 | elif ref_hint.startswith('refs/heads/'): | ||
| 4081 | # Branches: use the actual branch name | ||
| 4082 | branch_name = ref_hint[11:] # Strip "refs/heads/" | ||
| 4083 | branch_param = f';branch={branch_name}' | ||
| 4084 | else: | ||
| 4085 | branch_param = ';nobranch=1' | ||
| 4086 | else: | ||
| 4087 | # For pseudo-versions (no ref_hint), check if we detected a branch | ||
| 4088 | detected_branch = VERIFY_DETECTED_BRANCHES.get((git_url, commit_hash)) | ||
| 4089 | if detected_branch: | ||
| 4090 | # Use the detected branch name instead of nobranch=1 | ||
| 4091 | shallow_param = '' | ||
| 4092 | branch_param = f';branch={detected_branch}' | ||
| 4093 | print(f" Using detected branch: {detected_branch} for {commit_hash[:12]}") | ||
| 4094 | else: | ||
| 4095 | # No ref and no detected branch - use nobranch=1 | ||
| 4096 | # This should only happen for genuine orphaned commits that couldn't be fixed | ||
| 4097 | shallow_param = '' | ||
| 4098 | branch_param = ';nobranch=1' | ||
| 4099 | |||
| 4100 | src_uri_entries.append( | ||
| 4101 | f'{git_url_bb};protocol={protocol}{branch_param}{shallow_param};' | ||
| 4102 | f'rev={commit_hash};' | ||
| 4103 | f'name={fetch_name};' | ||
| 4104 | f'destsuffix={destsuffix}' | ||
| 4105 | ) | ||
| 4106 | |||
| 4107 | commit_info['fetch_name'] = fetch_name | ||
| 4108 | commit_info['destsuffix'] = destsuffix | ||
| 4109 | |||
| 4110 | if len(repo_info['commits']) == 1: | ||
| 4111 | print(f" {fetch_name}: {repo_info['url'][:60]}...") | ||
| 4112 | else: | ||
| 4113 | print(f" {fetch_name}: {repo_info['url'][:60]}... (commit {commit_hash[:12]})") | ||
| 4114 | |||
| 4115 | # Prepare modules data for do_create_module_cache | ||
| 4116 | for module in modules: | ||
| 4117 | repo_key = module['repo_key'] | ||
| 4118 | commit_hash = module['vcs_hash'] | ||
| 4119 | commit_info = vcs_repos[repo_key]['commits'][commit_hash] | ||
| 4120 | |||
| 4121 | update_metadata_cache( | ||
| 4122 | module['module_path'], | ||
| 4123 | module['version'], | ||
| 4124 | module['vcs_url'], | ||
| 4125 | module['vcs_hash'], | ||
| 4126 | module.get('timestamp', ''), | ||
| 4127 | module.get('subdir', ''), | ||
| 4128 | module.get('vcs_ref', ''), | ||
| 4129 | dirty=True, | ||
| 4130 | ) | ||
| 4131 | |||
| 4132 | # DEBUG: Track server/v3 module | ||
| 4133 | if 'server/v3' in module['module_path']: | ||
| 4134 | print(f"\n🔍 DEBUG server/v3: Adding to modules_data") | ||
| 4135 | print(f" module_path: {module['module_path']}") | ||
| 4136 | print(f" subdir: '{module.get('subdir', '')}' (from module dict)") | ||
| 4137 | print(f" timestamp: {module['timestamp']}") | ||
| 4138 | print(f" vcs_hash: {module['vcs_hash']}") | ||
| 4139 | |||
| 4140 | modules_data.append({ | ||
| 4141 | 'module': module['module_path'], | ||
| 4142 | 'version': module['version'], | ||
| 4143 | 'vcs_hash': commit_info['commit_sha'], | ||
| 4144 | 'timestamp': module['timestamp'], | ||
| 4145 | 'subdir': module.get('subdir', ''), | ||
| 4146 | 'vcs_ref': module.get('vcs_ref', ''), | ||
| 4147 | }) | ||
| 4148 | |||
| 4149 | # Write go-mod-git.inc | ||
| 4150 | git_inc_path = output_dir / "go-mod-git.inc" | ||
| 4151 | print(f"\nWriting {git_inc_path}") | ||
| 4152 | |||
| 4153 | with open(git_inc_path, 'w') as f: | ||
| 4154 | f.write("# Generated by oe-go-mod-fetcher.py v" + VERSION + "\n") | ||
| 4155 | f.write("# Git repositories for Go module dependencies\n\n") | ||
| 4156 | for entry in src_uri_entries: | ||
| 4157 | f.write(f'SRC_URI += "{entry}"\n') | ||
| 4158 | f.write('\n') | ||
| 4159 | |||
| 4160 | # Collect all tag references for shallow cloning | ||
| 4161 | # BB_GIT_SHALLOW_EXTRA_REFS ensures these refs are included in shallow clones | ||
| 4162 | tag_refs = set() | ||
| 4163 | for module in modules: | ||
| 4164 | vcs_ref = module.get('vcs_ref', '') | ||
| 4165 | if vcs_ref and 'refs/tags/' in vcs_ref: | ||
| 4166 | tag_refs.add(vcs_ref) | ||
| 4167 | |||
| 4168 | if tag_refs: | ||
| 4169 | f.write("# Tag references for shallow cloning\n") | ||
| 4170 | f.write("# Ensures shallow clones include all necessary tags\n") | ||
| 4171 | f.write("BB_GIT_SHALLOW_EXTRA_REFS = \"\\\n") | ||
| 4172 | for tag_ref in sorted(tag_refs): | ||
| 4173 | f.write(f" {tag_ref} \\\n") | ||
| 4174 | f.write('"\n') | ||
| 4175 | |||
| 4176 | # Note: SRCREV_* variables are not needed since rev= is embedded directly in SRC_URI | ||
| 4177 | |||
| 4178 | # Write go-mod-cache.inc | ||
| 4179 | cache_inc_path = output_dir / "go-mod-cache.inc" | ||
| 4180 | print(f"Writing {cache_inc_path}") | ||
| 4181 | |||
| 4182 | with open(cache_inc_path, 'w') as f: | ||
| 4183 | f.write("# Generated by oe-go-mod-fetcher.py v" + VERSION + "\n") | ||
| 4184 | f.write("# Module cache data for Go dependencies\n") | ||
| 4185 | f.write("#\n") | ||
| 4186 | f.write("# This file contains recipe-specific module metadata.\n") | ||
| 4187 | f.write("# The task implementations are in go-mod-vcs.bbclass.\n\n") | ||
| 4188 | |||
| 4189 | # Inherit the bbclass that provides the task implementations | ||
| 4190 | f.write("inherit go-mod-vcs\n\n") | ||
| 4191 | |||
| 4192 | # Write modules data as JSON - one module per line for readability | ||
| 4193 | f.write("# Module metadata for cache building (one module per line)\n") | ||
| 4194 | f.write("GO_MODULE_CACHE_DATA = '[\\\n") | ||
| 4195 | for i, mod in enumerate(modules_data): | ||
| 4196 | line = json.dumps(mod, separators=(',', ':')) | ||
| 4197 | if i < len(modules_data) - 1: | ||
| 4198 | f.write(f"{line},\\\n") | ||
| 4199 | else: | ||
| 4200 | f.write(f"{line}\\\n") | ||
| 4201 | f.write("]'\n") | ||
| 4202 | |||
| 4203 | print(f"\n✅ Generated recipe files:") | ||
| 4204 | print(f" {git_inc_path}") | ||
| 4205 | print(f" {cache_inc_path}") | ||
| 4206 | print(f"\nTo use these files, add to your recipe:") | ||
| 4207 | print(f" require go-mod-git.inc") | ||
| 4208 | print(f" require go-mod-cache.inc") | ||
| 4209 | |||
| 4210 | return True | ||
| 4211 | |||
| 4212 | # ============================================================================= | ||
| 4213 | # Discovered Module Loading (Bootstrap Strategy) | ||
| 4214 | # ============================================================================= | ||
| 4215 | |||
| 4216 | def load_discovered_modules(discovered_modules_path: Path) -> Optional[List[Dict]]: | ||
| 4217 | """ | ||
| 4218 | Load pre-discovered module metadata from BitBake discovery build. | ||
| 4219 | |||
| 4220 | This implements the bootstrap strategy where a BitBake discovery build has | ||
| 4221 | already run 'go mod download' (via do_discover_modules task) and | ||
| 4222 | extract-native-modules.py has extracted complete metadata from the GOMODCACHE. | ||
| 4223 | |||
| 4224 | Args: | ||
| 4225 | discovered_modules_path: Path to JSON file with module metadata | ||
| 4226 | |||
| 4227 | Returns: | ||
| 4228 | List of module dicts with complete VCS info, or None if load fails | ||
| 4229 | """ | ||
| 4230 | if not discovered_modules_path.exists(): | ||
| 4231 | print(f"❌ Discovered modules file not found: {discovered_modules_path}") | ||
| 4232 | return None | ||
| 4233 | |||
| 4234 | try: | ||
| 4235 | with open(discovered_modules_path) as f: | ||
| 4236 | modules = json.load(f) | ||
| 4237 | |||
| 4238 | if not isinstance(modules, list): | ||
| 4239 | print(f"❌ Invalid discovered modules file format (expected list, got {type(modules).__name__})") | ||
| 4240 | return None | ||
| 4241 | |||
| 4242 | print(f"✓ Loaded {len(modules)} modules from discovery metadata") | ||
| 4243 | print(f" File: {discovered_modules_path}") | ||
| 4244 | |||
| 4245 | # Validate module format | ||
| 4246 | required_fields = ['module_path', 'version', 'vcs_url', 'vcs_hash'] | ||
| 4247 | for i, module in enumerate(modules): | ||
| 4248 | if not isinstance(module, dict): | ||
| 4249 | print(f"❌ Module {i} is not a dict: {module}") | ||
| 4250 | return None | ||
| 4251 | for field in required_fields: | ||
| 4252 | if field not in module: | ||
| 4253 | print(f"❌ Module {i} missing required field '{field}': {module.get('module_path', '<unknown>')}") | ||
| 4254 | return None | ||
| 4255 | |||
| 4256 | # Show statistics | ||
| 4257 | unique_repos = len(set(m['vcs_url'] for m in modules)) | ||
| 4258 | with_subdirs = sum(1 for m in modules if m.get('subdir')) | ||
| 4259 | |||
| 4260 | print(f"\nDiscovery metadata summary:") | ||
| 4261 | print(f" Modules: {len(modules)}") | ||
| 4262 | print(f" Unique repositories: {unique_repos}") | ||
| 4263 | print(f" Multi-module repos: {with_subdirs} modules have subdirs") | ||
| 4264 | |||
| 4265 | # Expand 12-char short hashes to full 40-char hashes. | ||
| 4266 | # Pseudo-versions like v0.0.0-20161002113705-648efa622239 only contain | ||
| 4267 | # 12 chars of the commit hash. BitBake's git fetcher needs full 40-char. | ||
| 4268 | short_hash_modules = [m for m in modules if len(m.get('vcs_hash', '')) == 12] | ||
| 4269 | if short_hash_modules: | ||
| 4270 | print(f"\n⚙️ Expanding {len(short_hash_modules)} short hashes to full 40-char...") | ||
| 4271 | expanded = 0 | ||
| 4272 | failed = 0 | ||
| 4273 | for i, module in enumerate(short_hash_modules): | ||
| 4274 | if (i + 1) % 20 == 0 or i == 0: | ||
| 4275 | print(f" Progress: {i + 1}/{len(short_hash_modules)}...", end='\r', flush=True) | ||
| 4276 | |||
| 4277 | version = module.get('version', '') | ||
| 4278 | vcs_url = module['vcs_url'] | ||
| 4279 | short_hash = module['vcs_hash'] | ||
| 4280 | |||
| 4281 | # Parse pseudo-version to get timestamp | ||
| 4282 | pseudo_info = parse_pseudo_version_tag(version.split('+')[0]) | ||
| 4283 | if pseudo_info: | ||
| 4284 | timestamp_str, _ = pseudo_info | ||
| 4285 | full_hash = resolve_pseudo_version_commit( | ||
| 4286 | vcs_url, timestamp_str, short_hash, | ||
| 4287 | clone_cache_dir=CLONE_CACHE_DIR | ||
| 4288 | ) | ||
| 4289 | if full_hash and len(full_hash) == 40: | ||
| 4290 | module['vcs_hash'] = full_hash | ||
| 4291 | expanded += 1 | ||
| 4292 | else: | ||
| 4293 | failed += 1 | ||
| 4294 | if VERBOSE_MODE: | ||
| 4295 | print(f"\n ⚠️ Could not expand: {module['module_path']}@{version}") | ||
| 4296 | else: | ||
| 4297 | failed += 1 | ||
| 4298 | |||
| 4299 | print(f" Expanded {expanded} short hashes, {failed} failed ") | ||
| 4300 | |||
| 4301 | return modules | ||
| 4302 | |||
| 4303 | except json.JSONDecodeError as e: | ||
| 4304 | print(f"❌ Failed to parse discovered modules JSON: {e}") | ||
| 4305 | return None | ||
| 4306 | except Exception as e: | ||
| 4307 | print(f"❌ Error loading discovered modules: {e}") | ||
| 4308 | return None | ||
| 4309 | |||
| 4310 | # ============================================================================= | ||
| 4311 | # Main Entry Point | ||
| 4312 | # ============================================================================= | ||
| 4313 | |||
| 4314 | def main(): | ||
| 4315 | global LOG_PATH, CURRENT_GOMODCACHE | ||
| 4316 | parser = argparse.ArgumentParser( | ||
| 4317 | description=f"Generate BitBake recipes for Go modules using hybrid approach (v{VERSION})", | ||
| 4318 | epilog=""" | ||
| 4319 | This tool uses a 3-phase hybrid approach: | ||
| 4320 | 1. Discovery: Run 'go mod download' to get correct module paths | ||
| 4321 | 2. Recipe Generation: Create git:// SRC_URI entries for BitBake | ||
| 4322 | 3. Cache Building: Build module cache from git during do_create_module_cache | ||
| 4323 | |||
| 4324 | Persistent Caches: | ||
| 4325 | The generator maintains caches in the data/ subdirectory: | ||
| 4326 | - data/module-cache.json: Module metadata (VCS URL, timestamp, subdir, etc.) | ||
| 4327 | - data/ls-remote-cache.json: Git ls-remote results | ||
| 4328 | - data/vanity-url-cache.json: Vanity import path resolution | ||
| 4329 | - data/verify-cache.json: Commit verification status | ||
| 4330 | |||
| 4331 | These caches speed up regeneration but may need cleaning when: | ||
| 4332 | - Derivation logic changes (e.g., subdir calculation fixes) | ||
| 4333 | - Cached data becomes stale or incorrect | ||
| 4334 | |||
| 4335 | Use --clean-cache to remove metadata cache before regeneration. | ||
| 4336 | Use --clean-ls-remote-cache to remove both caches (slower, but fully fresh). | ||
| 4337 | |||
| 4338 | Examples: | ||
| 4339 | # Normal regeneration (fast, uses caches) | ||
| 4340 | %(prog)s --recipedir /path/to/recipe/output | ||
| 4341 | |||
| 4342 | # Clean metadata cache (e.g., after fixing subdir derivation) | ||
| 4343 | %(prog)s --recipedir /path/to/recipe/output --clean-cache | ||
| 4344 | |||
| 4345 | # Fully clean regeneration (slow, calls git ls-remote for everything) | ||
| 4346 | %(prog)s --recipedir /path/to/recipe/output --clean-ls-remote-cache | ||
| 4347 | """, | ||
| 4348 | formatter_class=argparse.RawDescriptionHelpFormatter | ||
| 4349 | ) | ||
| 4350 | |||
| 4351 | parser.add_argument( | ||
| 4352 | "--recipedir", | ||
| 4353 | help="Output directory for generated .inc files (required unless running with --validate/--dry-run/--clean-only)" | ||
| 4354 | ) | ||
| 4355 | |||
| 4356 | parser.add_argument( | ||
| 4357 | "--gomodcache", | ||
| 4358 | help="Directory to use for Go module cache (for discovery phase)" | ||
| 4359 | ) | ||
| 4360 | |||
| 4361 | parser.add_argument( | ||
| 4362 | "--cache-dir", | ||
| 4363 | help="Directory to store JSON metadata caches (default: scripts/data)" | ||
| 4364 | ) | ||
| 4365 | |||
| 4366 | parser.add_argument( | ||
| 4367 | "--clone-cache-dir", | ||
| 4368 | help="Directory to cache cloned git repositories (default: scripts/.cache/repos)" | ||
| 4369 | ) | ||
| 4370 | |||
| 4371 | parser.add_argument( | ||
| 4372 | "--source-dir", | ||
| 4373 | help="Source directory containing go.mod (default: current directory)" | ||
| 4374 | ) | ||
| 4375 | |||
| 4376 | parser.add_argument( | ||
| 4377 | "--git-repo", | ||
| 4378 | help="Git repository URL (for documentation purposes)" | ||
| 4379 | ) | ||
| 4380 | |||
| 4381 | parser.add_argument( | ||
| 4382 | "--git-ref", | ||
| 4383 | help="Git reference (for documentation purposes)" | ||
| 4384 | ) | ||
| 4385 | |||
| 4386 | parser.add_argument( | ||
| 4387 | "-v", "--verbose", | ||
| 4388 | action="store_true", | ||
| 4389 | help="Verbose output" | ||
| 4390 | ) | ||
| 4391 | |||
| 4392 | parser.add_argument( | ||
| 4393 | "--clean-cache", | ||
| 4394 | action="store_true", | ||
| 4395 | help="Clear metadata cache before regeneration (useful when derivation logic changes)" | ||
| 4396 | ) | ||
| 4397 | |||
| 4398 | parser.add_argument( | ||
| 4399 | "--clean-ls-remote-cache", | ||
| 4400 | action="store_true", | ||
| 4401 | help="Clear git ls-remote cache in addition to metadata cache (implies --clean-cache)" | ||
| 4402 | ) | ||
| 4403 | |||
| 4404 | parser.add_argument( | ||
| 4405 | "--skip-legacy-module-cache", | ||
| 4406 | action="store_true", | ||
| 4407 | help="Skip importing legacy module metadata from module_cache_task.inc" | ||
| 4408 | ) | ||
| 4409 | |||
| 4410 | parser.add_argument( | ||
| 4411 | "--dry-run", | ||
| 4412 | action="store_true", | ||
| 4413 | help="Execute cache mutations without discovery/generation" | ||
| 4414 | ) | ||
| 4415 | |||
| 4416 | parser.add_argument( | ||
| 4417 | "--clean-gomodcache", | ||
| 4418 | action="store_true", | ||
| 4419 | help="Clean stale .info files in GOMODCACHE that lack VCS metadata (fixes 'module lookup disabled' errors)" | ||
| 4420 | ) | ||
| 4421 | |||
| 4422 | parser.add_argument( | ||
| 4423 | "--validate", | ||
| 4424 | action="store_true", | ||
| 4425 | help="Validate module commits without emitting recipe files" | ||
| 4426 | ) | ||
| 4427 | |||
| 4428 | parser.add_argument( | ||
| 4429 | "--validate-only", | ||
| 4430 | action="store_true", | ||
| 4431 | help=argparse.SUPPRESS | ||
| 4432 | ) | ||
| 4433 | |||
| 4434 | parser.add_argument( | ||
| 4435 | "--skip-verify", | ||
| 4436 | action="store_true", | ||
| 4437 | help="Skip commit verification (trust cached verify results, much faster)" | ||
| 4438 | ) | ||
| 4439 | |||
| 4440 | parser.add_argument( | ||
| 4441 | "--verify-jobs", | ||
| 4442 | type=int, | ||
| 4443 | default=10, | ||
| 4444 | metavar="N", | ||
| 4445 | help="Number of parallel verification jobs (default: 10, 0=sequential)" | ||
| 4446 | ) | ||
| 4447 | |||
| 4448 | parser.add_argument( | ||
| 4449 | "--verify-cached", | ||
| 4450 | action="store_true", | ||
| 4451 | help="Verify commits in GOMODCACHE .info files still exist in repositories (detects force-pushed tags)" | ||
| 4452 | ) | ||
| 4453 | |||
| 4454 | parser.add_argument( | ||
| 4455 | "--verify-cache-max-age", | ||
| 4456 | type=int, | ||
| 4457 | default=30, | ||
| 4458 | metavar="DAYS", | ||
| 4459 | help="Re-verify cached commits older than this many days (default: 30, 0=always verify)" | ||
| 4460 | ) | ||
| 4461 | |||
| 4462 | parser.add_argument( | ||
| 4463 | "--debug-limit", | ||
| 4464 | type=int, | ||
| 4465 | help="Process at most N modules during validation/generation (debug only)" | ||
| 4466 | ) | ||
| 4467 | |||
| 4468 | parser.add_argument( | ||
| 4469 | "--inject-commit", | ||
| 4470 | metavar=("REPO", "COMMIT"), | ||
| 4471 | nargs=2, | ||
| 4472 | action="append", | ||
| 4473 | help="Mark a repo+commit pair as already verified (skips network check)" | ||
| 4474 | ) | ||
| 4475 | |||
| 4476 | parser.add_argument( | ||
| 4477 | "--clear-commit", | ||
| 4478 | metavar=("REPO", "COMMIT"), | ||
| 4479 | nargs=2, | ||
| 4480 | action="append", | ||
| 4481 | help="Remove a repo+commit pair from the verified cache" | ||
| 4482 | ) | ||
| 4483 | |||
| 4484 | parser.add_argument( | ||
| 4485 | "--set-repo", | ||
| 4486 | metavar=("MODULE", "REPO"), | ||
| 4487 | nargs=2, | ||
| 4488 | action="append", | ||
| 4489 | help="Pin a module (or module@version) to the specified repository URL" | ||
| 4490 | ) | ||
| 4491 | |||
| 4492 | parser.add_argument( | ||
| 4493 | "--clear-repo", | ||
| 4494 | metavar="MODULE", | ||
| 4495 | nargs=1, | ||
| 4496 | action="append", | ||
| 4497 | help="Remove a previously pinned repository override (module or module@version)" | ||
| 4498 | ) | ||
| 4499 | |||
| 4500 | parser.add_argument( | ||
| 4501 | "--version", | ||
| 4502 | action="version", | ||
| 4503 | version=f"%(prog)s {VERSION}" | ||
| 4504 | ) | ||
| 4505 | |||
| 4506 | parser.add_argument( | ||
| 4507 | "--discovered-modules", | ||
| 4508 | dest="discovered_modules", | ||
| 4509 | help="JSON file with pre-discovered module metadata (skips discovery phase)" | ||
| 4510 | ) | ||
| 4511 | # Backward compatibility alias for --discovered-modules | ||
| 4512 | parser.add_argument("--native-modules", dest="discovered_modules", help=argparse.SUPPRESS) | ||
| 4513 | |||
| 4514 | # Add compatibility args that we ignore (for backward compatibility) | ||
| 4515 | parser.add_argument("--use-hybrid", action="store_true", help=argparse.SUPPRESS) | ||
| 4516 | parser.add_argument("go_mod_file", nargs='?', help=argparse.SUPPRESS) | ||
| 4517 | |||
| 4518 | args = parser.parse_args() | ||
| 4519 | if args.validate_only: | ||
| 4520 | args.validate = True | ||
| 4521 | |||
| 4522 | # Set global verbose mode | ||
| 4523 | global VERBOSE_MODE | ||
| 4524 | VERBOSE_MODE = args.verbose | ||
| 4525 | |||
| 4526 | original_stdout = sys.stdout | ||
| 4527 | original_stderr = sys.stderr | ||
| 4528 | log_handle = None | ||
| 4529 | log_path = None | ||
| 4530 | try: | ||
| 4531 | timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") | ||
| 4532 | log_path = Path(tempfile.gettempdir()) / f"oe-go-mod-fetcher-{timestamp}.log" | ||
| 4533 | LOG_PATH = log_path | ||
| 4534 | log_handle = log_path.open("w", encoding="utf-8", buffering=1) | ||
| 4535 | sys.stdout = Tee(original_stdout, log_handle) | ||
| 4536 | sys.stderr = Tee(original_stderr, log_handle) | ||
| 4537 | |||
| 4538 | print(f"Go Module Git Fetcher v{VERSION}") | ||
| 4539 | print("Hybrid Architecture: Discovery from Go + Build from Git") | ||
| 4540 | print("=" * 70) | ||
| 4541 | print(f"Logs: {log_path} (pass --dry-run to load caches only)") | ||
| 4542 | |||
| 4543 | exit_code = _execute(args) | ||
| 4544 | except KeyboardInterrupt: | ||
| 4545 | print("\n\nOperation cancelled by user") | ||
| 4546 | exit_code = 1 | ||
| 4547 | except Exception as e: | ||
| 4548 | print(f"\n❌ Unexpected error: {e}") | ||
| 4549 | if args.verbose: | ||
| 4550 | import traceback | ||
| 4551 | traceback.print_exc() | ||
| 4552 | exit_code = 1 | ||
| 4553 | finally: | ||
| 4554 | save_ls_remote_cache() | ||
| 4555 | save_metadata_cache() | ||
| 4556 | save_vanity_url_cache() | ||
| 4557 | save_verify_commit_cache() | ||
| 4558 | save_repo_overrides() | ||
| 4559 | for temp_cache in TEMP_GOMODCACHES: | ||
| 4560 | try: | ||
| 4561 | if temp_cache.exists(): | ||
| 4562 | shutil.rmtree(temp_cache) | ||
| 4563 | except Exception: | ||
| 4564 | pass | ||
| 4565 | TEMP_GOMODCACHES.clear() | ||
| 4566 | if CURRENT_GOMODCACHE and not Path(CURRENT_GOMODCACHE).exists(): | ||
| 4567 | CURRENT_GOMODCACHE = None | ||
| 4568 | if log_handle: | ||
| 4569 | log_handle.flush() | ||
| 4570 | log_handle.close() | ||
| 4571 | sys.stdout = original_stdout | ||
| 4572 | sys.stderr = original_stderr | ||
| 4573 | if LOG_PATH: | ||
| 4574 | print(f"Logs: {LOG_PATH}") | ||
| 4575 | |||
| 4576 | sys.exit(exit_code) | ||
| 4577 | |||
| 4578 | |||
| 4579 | if __name__ == "__main__": | ||
| 4580 | main() | ||
