diff options
| author | Bruce Ashfield <bruce.ashfield@gmail.com> | 2025-12-04 22:36:12 +0000 |
|---|---|---|
| committer | Bruce Ashfield <bruce.ashfield@gmail.com> | 2025-12-08 20:57:44 -0500 |
| commit | a303bf16ffd747c50c95cbe385407ba8b0122cec (patch) | |
| tree | ddb26a7945e746ce8206fc65b0a971ed74dc812b /scripts/extract-discovered-modules.py | |
| parent | 9f40ce9b277a677ad3cddd8bf1c1d15fbd035251 (diff) | |
| download | meta-virtualization-a303bf16ffd747c50c95cbe385407ba8b0122cec.tar.gz | |
scripts: add oe-go-mod-fetcher for Go module VCS resolution
Add the oe-go-mod-fetcher.py tool and supporting files for resolving
Go module dependencies via git repositories instead of module proxies.
oe-go-mod-fetcher.py:
- Parses go.mod and go.sum to identify required modules
- Resolves module paths to git repositories (handles vanity URLs)
- Maps module versions to git commits
- Generates SRC_URI entries for bitbake fetcher
- Creates go-mod-git.inc and go-mod-cache.inc files
- Supports monorepo detection and nested module handling
- Caches resolution results for performance
extract-discovered-modules.py:
- Helper script to extract module information from discovery cache
- Used by go-mod-discovery.bbclass during build
Also adds .gitignore to exclude runtime caches from version control.
Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
Diffstat (limited to 'scripts/extract-discovered-modules.py')
| -rwxr-xr-x | scripts/extract-discovered-modules.py | 491 |
1 files changed, 491 insertions, 0 deletions
diff --git a/scripts/extract-discovered-modules.py b/scripts/extract-discovered-modules.py new file mode 100755 index 00000000..1cfca6ad --- /dev/null +++ b/scripts/extract-discovered-modules.py | |||
| @@ -0,0 +1,491 @@ | |||
| 1 | #!/usr/bin/env python3 | ||
| 2 | # SPDX-License-Identifier: GPL-2.0-only | ||
| 3 | # | ||
| 4 | # go-dep processor | ||
| 5 | # | ||
| 6 | # Copyright (C) 2025 Bruce Ashfield | ||
| 7 | # | ||
| 8 | # This program is free software; you can redistribute it and/or modify | ||
| 9 | # it under the terms of the GNU General Public License version 2 as | ||
| 10 | # published by the Free Software Foundation. | ||
| 11 | # | ||
| 12 | # This program is distributed in the hope that it will be useful, | ||
| 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | # GNU General Public License for more details. | ||
| 16 | # | ||
| 17 | # You should have received a copy of the GNU General Public License along | ||
| 18 | # with this program; if not, write to the Free Software Foundation, Inc., | ||
| 19 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 20 | |||
| 21 | """ | ||
| 22 | Extract complete module metadata from BitBake Go discovery build cache. | ||
| 23 | |||
| 24 | This script walks a GOMODCACHE directory (from BitBake discovery build) and | ||
| 25 | extracts all module metadata from .info files, including VCS information. | ||
| 26 | |||
| 27 | Usage: | ||
| 28 | extract-discovered-modules.py --gomodcache /path/to/cache --output modules.json | ||
| 29 | |||
| 30 | The script creates: | ||
| 31 | - modules.json: Complete metadata with VCS URLs, commits, subdirs, timestamps | ||
| 32 | - modules.txt: Simple module@version list | ||
| 33 | |||
| 34 | This provides 100% accurate module discovery for BitBake recipe generation. | ||
| 35 | """ | ||
| 36 | |||
| 37 | import argparse | ||
| 38 | import json | ||
| 39 | import os | ||
| 40 | import re | ||
| 41 | import shutil | ||
| 42 | import subprocess | ||
| 43 | import sys | ||
| 44 | import tempfile | ||
| 45 | import urllib.parse | ||
| 46 | from pathlib import Path | ||
| 47 | |||
| 48 | |||
| 49 | def git_ls_remote(url: str, ref: str) -> str: | ||
| 50 | """ | ||
| 51 | Query a git repository for a ref and return the commit hash. | ||
| 52 | |||
| 53 | For tags, also tries dereferenced form (^{}) to handle annotated tags. | ||
| 54 | """ | ||
| 55 | try: | ||
| 56 | # Try dereferenced form first (handles annotated tags) | ||
| 57 | refs_to_try = [f"{ref}^{{}}", ref] if ref.startswith("refs/tags/") else [ref] | ||
| 58 | |||
| 59 | for query_ref in refs_to_try: | ||
| 60 | result = subprocess.run( | ||
| 61 | ['git', 'ls-remote', url, query_ref], | ||
| 62 | capture_output=True, | ||
| 63 | text=True, | ||
| 64 | timeout=30 | ||
| 65 | ) | ||
| 66 | if result.returncode == 0 and result.stdout.strip(): | ||
| 67 | # Parse: "hash<tab>ref" | ||
| 68 | line = result.stdout.strip().split('\n')[0] | ||
| 69 | parts = line.split('\t') | ||
| 70 | if len(parts) >= 1 and len(parts[0]) == 40: | ||
| 71 | return parts[0] | ||
| 72 | except Exception: | ||
| 73 | pass | ||
| 74 | return '' | ||
| 75 | |||
| 76 | |||
| 77 | def resolve_short_hash(url: str, short_hash: str) -> str: | ||
| 78 | """ | ||
| 79 | Resolve a 12-char short hash to full 40-char hash. | ||
| 80 | |||
| 81 | Go pseudo-versions only contain 12 characters of the commit hash. | ||
| 82 | BitBake's git fetcher needs the full 40-char hash. | ||
| 83 | |||
| 84 | Strategy: Try GitHub API first (fast), then git ls-remote, then shallow clone. | ||
| 85 | """ | ||
| 86 | if len(short_hash) != 12: | ||
| 87 | return short_hash # Already full or invalid | ||
| 88 | |||
| 89 | # First try: GitHub API (fast - single HTTP request) | ||
| 90 | # Note: Rate limited to 60/hour without auth token | ||
| 91 | if 'github.com' in url: | ||
| 92 | try: | ||
| 93 | import urllib.request | ||
| 94 | repo_path = url.replace('https://github.com/', '').replace('.git', '') | ||
| 95 | api_url = f"https://api.github.com/repos/{repo_path}/commits/{short_hash}" | ||
| 96 | req = urllib.request.Request(api_url, headers={'User-Agent': 'oe-go-mod-fetcher'}) | ||
| 97 | with urllib.request.urlopen(req, timeout=10) as response: | ||
| 98 | data = json.loads(response.read().decode()) | ||
| 99 | if 'sha' in data and len(data['sha']) == 40: | ||
| 100 | return data['sha'] | ||
| 101 | except Exception: | ||
| 102 | pass # Rate limited or other error - try next method | ||
| 103 | |||
| 104 | # Second try: git ls-remote (downloads all refs, checks if any match) | ||
| 105 | # This works if the commit is a branch head or tag | ||
| 106 | try: | ||
| 107 | result = subprocess.run( | ||
| 108 | ['git', 'ls-remote', url], | ||
| 109 | capture_output=True, | ||
| 110 | text=True, | ||
| 111 | timeout=30 | ||
| 112 | ) | ||
| 113 | if result.returncode == 0: | ||
| 114 | for line in result.stdout.strip().split('\n'): | ||
| 115 | if line: | ||
| 116 | full_hash = line.split('\t')[0] | ||
| 117 | if full_hash.startswith(short_hash): | ||
| 118 | return full_hash | ||
| 119 | except Exception: | ||
| 120 | pass | ||
| 121 | |||
| 122 | # Third try: Shallow clone and rev-parse (slower but works for any commit) | ||
| 123 | try: | ||
| 124 | with tempfile.TemporaryDirectory(prefix='hash-resolve-') as tmpdir: | ||
| 125 | # Clone with minimal depth | ||
| 126 | clone_result = subprocess.run( | ||
| 127 | ['git', 'clone', '--bare', '--filter=blob:none', url, tmpdir + '/repo'], | ||
| 128 | capture_output=True, | ||
| 129 | timeout=120, | ||
| 130 | env={**os.environ, 'GIT_TERMINAL_PROMPT': '0'} | ||
| 131 | ) | ||
| 132 | if clone_result.returncode == 0: | ||
| 133 | # Use rev-parse to expand short hash | ||
| 134 | parse_result = subprocess.run( | ||
| 135 | ['git', 'rev-parse', short_hash], | ||
| 136 | cwd=tmpdir + '/repo', | ||
| 137 | capture_output=True, | ||
| 138 | text=True, | ||
| 139 | timeout=10 | ||
| 140 | ) | ||
| 141 | if parse_result.returncode == 0: | ||
| 142 | full_hash = parse_result.stdout.strip() | ||
| 143 | if len(full_hash) == 40: | ||
| 144 | return full_hash | ||
| 145 | except Exception: | ||
| 146 | pass | ||
| 147 | |||
| 148 | # Could not resolve - return original short hash | ||
| 149 | return short_hash | ||
| 150 | |||
| 151 | |||
| 152 | def derive_vcs_info(module_path, version): | ||
| 153 | """ | ||
| 154 | Derive VCS URL and commit info from module path and version. | ||
| 155 | |||
| 156 | This is used for modules where the Go proxy doesn't provide Origin metadata | ||
| 157 | (older modules cached before Go 1.18). | ||
| 158 | |||
| 159 | Returns: | ||
| 160 | dict with vcs_url, vcs_hash (if pseudo-version), vcs_ref, subdir | ||
| 161 | or None if cannot derive | ||
| 162 | """ | ||
| 163 | vcs_url = None | ||
| 164 | vcs_hash = '' | ||
| 165 | vcs_ref = '' | ||
| 166 | subpath = '' # FIX #32: Track subpath for multi-module repos (tag prefix) | ||
| 167 | |||
| 168 | # Derive URL from module path | ||
| 169 | if module_path.startswith('github.com/'): | ||
| 170 | # github.com/owner/repo or github.com/owner/repo/subpkg | ||
| 171 | parts = module_path.split('/') | ||
| 172 | if len(parts) >= 3: | ||
| 173 | vcs_url = f"https://github.com/{parts[1]}/{parts[2]}" | ||
| 174 | # FIX #32: Track subpath for multi-module repos (e.g., github.com/owner/repo/cmd/tool) | ||
| 175 | if len(parts) > 3: | ||
| 176 | subpath = '/'.join(parts[3:]) | ||
| 177 | |||
| 178 | elif module_path.startswith('gitlab.com/'): | ||
| 179 | parts = module_path.split('/') | ||
| 180 | if len(parts) >= 3: | ||
| 181 | vcs_url = f"https://gitlab.com/{parts[1]}/{parts[2]}" | ||
| 182 | |||
| 183 | elif module_path.startswith('bitbucket.org/'): | ||
| 184 | parts = module_path.split('/') | ||
| 185 | if len(parts) >= 3: | ||
| 186 | vcs_url = f"https://bitbucket.org/{parts[1]}/{parts[2]}" | ||
| 187 | |||
| 188 | elif module_path.startswith('gopkg.in/'): | ||
| 189 | # gopkg.in/yaml.v2 -> github.com/go-yaml/yaml | ||
| 190 | # gopkg.in/check.v1 -> github.com/go-check/check | ||
| 191 | # gopkg.in/pkg.v3 -> github.com/go-pkg/pkg (convention) | ||
| 192 | # gopkg.in/fsnotify.v1 -> github.com/fsnotify/fsnotify (no go- prefix) | ||
| 193 | match = re.match(r'gopkg\.in/([^/]+)\.v\d+', module_path) | ||
| 194 | if match: | ||
| 195 | pkg_name = match.group(1) | ||
| 196 | # Common mappings - some use go-* prefix, others don't | ||
| 197 | mappings = { | ||
| 198 | 'yaml': 'https://github.com/go-yaml/yaml', | ||
| 199 | 'check': 'https://github.com/go-check/check', | ||
| 200 | 'inf': 'https://github.com/go-inf/inf', | ||
| 201 | 'tomb': 'https://github.com/go-tomb/tomb', | ||
| 202 | 'fsnotify': 'https://github.com/fsnotify/fsnotify', # No go- prefix | ||
| 203 | } | ||
| 204 | vcs_url = mappings.get(pkg_name, f"https://github.com/go-{pkg_name}/{pkg_name}") | ||
| 205 | |||
| 206 | elif module_path.startswith('google.golang.org/'): | ||
| 207 | # google.golang.org vanity imports -> github.com/golang/* | ||
| 208 | # google.golang.org/appengine -> github.com/golang/appengine | ||
| 209 | # google.golang.org/protobuf -> github.com/protocolbuffers/protobuf-go (special case) | ||
| 210 | # google.golang.org/grpc -> github.com/grpc/grpc-go (special case) | ||
| 211 | # google.golang.org/genproto -> github.com/googleapis/go-genproto (special case) | ||
| 212 | # | ||
| 213 | # FIX #32: Handle submodules in multi-module repos | ||
| 214 | # google.golang.org/grpc/cmd/protoc-gen-go-grpc has tags like: | ||
| 215 | # cmd/protoc-gen-go-grpc/v1.1.0 (NOT v1.1.0) | ||
| 216 | # We need to track the subpath for tag prefix construction | ||
| 217 | parts = module_path.split('/') | ||
| 218 | if len(parts) >= 2: | ||
| 219 | pkg_name = parts[1] # First component after google.golang.org/ | ||
| 220 | mappings = { | ||
| 221 | 'protobuf': 'https://github.com/protocolbuffers/protobuf-go', | ||
| 222 | 'grpc': 'https://github.com/grpc/grpc-go', | ||
| 223 | 'genproto': 'https://github.com/googleapis/go-genproto', | ||
| 224 | 'api': 'https://github.com/googleapis/google-api-go-client', | ||
| 225 | } | ||
| 226 | vcs_url = mappings.get(pkg_name, f"https://github.com/golang/{pkg_name}") | ||
| 227 | # Track subpath for submodule tag construction (e.g., cmd/protoc-gen-go-grpc) | ||
| 228 | if len(parts) > 2: | ||
| 229 | subpath = '/'.join(parts[2:]) # Everything after google.golang.org/grpc/ | ||
| 230 | |||
| 231 | if not vcs_url: | ||
| 232 | return None | ||
| 233 | |||
| 234 | # Parse version for commit hash (pseudo-versions) | ||
| 235 | # Go pseudo-version formats: | ||
| 236 | # v0.0.0-20200815063812-42c35b437635 (no base version) | ||
| 237 | # v1.2.3-0.20200815063812-42c35b437635 (pre-release with "0." prefix) | ||
| 238 | # v1.2.4-0.20200815063812-42c35b437635 (post v1.2.3, pre v1.2.4) | ||
| 239 | # The key pattern: optional "0." then YYYYMMDDHHMMSS (14 digits) then 12-char commit hash | ||
| 240 | # Also handle +incompatible suffix | ||
| 241 | clean_version = version.replace('+incompatible', '') | ||
| 242 | |||
| 243 | # Try both pseudo-version formats: | ||
| 244 | # Format 1: -0.YYYYMMDDHHMMSS-HASH (with "0." prefix) | ||
| 245 | # Format 2: -YYYYMMDDHHMMSS-HASH (without prefix, typically v0.0.0-...) | ||
| 246 | pseudo_match = re.search(r'-(?:0\.)?(\d{14})-([0-9a-f]{12})$', clean_version) | ||
| 247 | if pseudo_match: | ||
| 248 | vcs_hash = pseudo_match.group(2) # 12-char short hash | ||
| 249 | # Note: Short hashes are expanded to full 40-char by oe-go-mod-fetcher.py | ||
| 250 | # in load_native_modules() using resolve_pseudo_version_commit() | ||
| 251 | else: | ||
| 252 | # Tagged version - resolve tag to commit hash | ||
| 253 | # FIX #32: For multi-module repos, the tag includes the subpath prefix | ||
| 254 | # e.g., google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.1.0 | ||
| 255 | # has tag: cmd/protoc-gen-go-grpc/v1.1.0 (not v1.1.0) | ||
| 256 | if subpath: | ||
| 257 | tag_name = f"{subpath}/{clean_version}" | ||
| 258 | else: | ||
| 259 | tag_name = clean_version | ||
| 260 | vcs_ref = f"refs/tags/{tag_name}" | ||
| 261 | # Query the repository to get the actual commit hash for this tag | ||
| 262 | vcs_hash = git_ls_remote(vcs_url, vcs_ref) | ||
| 263 | if not vcs_hash and subpath: | ||
| 264 | # FIX #32: Fallback - try without subpath prefix | ||
| 265 | # Some repos don't use prefixed tags for submodules | ||
| 266 | fallback_ref = f"refs/tags/{clean_version}" | ||
| 267 | vcs_hash = git_ls_remote(vcs_url, fallback_ref) | ||
| 268 | if vcs_hash: | ||
| 269 | vcs_ref = fallback_ref # Use the working ref | ||
| 270 | |||
| 271 | return { | ||
| 272 | 'vcs_url': vcs_url, | ||
| 273 | 'vcs_hash': vcs_hash, | ||
| 274 | 'vcs_ref': vcs_ref, | ||
| 275 | 'subdir': subpath, # FIX #32: Return subdir for submodules | ||
| 276 | } | ||
| 277 | |||
| 278 | |||
| 279 | def extract_modules(gomodcache_path): | ||
| 280 | """ | ||
| 281 | Walk GOMODCACHE and extract all module metadata from .info files. | ||
| 282 | |||
| 283 | Returns list of dicts with complete metadata: | ||
| 284 | - module_path: Unescaped module path | ||
| 285 | - version: Module version | ||
| 286 | - vcs_url: Git repository URL | ||
| 287 | - vcs_hash: Full commit hash (40 chars) | ||
| 288 | - vcs_ref: Tag/branch reference | ||
| 289 | - subdir: Subdirectory in mono-repos | ||
| 290 | - timestamp: Commit timestamp | ||
| 291 | """ | ||
| 292 | cache_dir = Path(gomodcache_path) / "cache" / "download" | ||
| 293 | |||
| 294 | if not cache_dir.exists(): | ||
| 295 | raise FileNotFoundError(f"Cache directory not found: {cache_dir}") | ||
| 296 | |||
| 297 | modules = [] | ||
| 298 | skipped = 0 | ||
| 299 | derived = 0 | ||
| 300 | total_info_files = 0 | ||
| 301 | |||
| 302 | print(f"Scanning GOMODCACHE: {cache_dir}") | ||
| 303 | |||
| 304 | for info_file in cache_dir.rglob("*.info"): | ||
| 305 | total_info_files += 1 | ||
| 306 | |||
| 307 | # Extract module path from directory structure | ||
| 308 | rel_path = info_file.parent.relative_to(cache_dir) | ||
| 309 | parts = list(rel_path.parts) | ||
| 310 | |||
| 311 | if parts[-1] != '@v': | ||
| 312 | continue | ||
| 313 | |||
| 314 | # Module path (unescape Go's !-encoding) | ||
| 315 | # Example: github.com/!microsoft/go-winio -> github.com/Microsoft/go-winio | ||
| 316 | module_path = '/'.join(parts[:-1]) | ||
| 317 | # Unescape !x -> X (Go's case-insensitive encoding) | ||
| 318 | module_path = re.sub(r'!([a-z])', lambda m: m.group(1).upper(), module_path) | ||
| 319 | |||
| 320 | # Version | ||
| 321 | version = info_file.stem | ||
| 322 | |||
| 323 | # Read .info file for VCS metadata | ||
| 324 | try: | ||
| 325 | with open(info_file) as f: | ||
| 326 | info = json.load(f) | ||
| 327 | |||
| 328 | origin = info.get('Origin', {}) | ||
| 329 | |||
| 330 | # Check if we have complete VCS info from Origin | ||
| 331 | if origin.get('URL') and origin.get('Hash'): | ||
| 332 | module = { | ||
| 333 | 'module_path': module_path, | ||
| 334 | 'version': version, | ||
| 335 | 'vcs_url': origin.get('URL', ''), | ||
| 336 | 'vcs_hash': origin.get('Hash', ''), | ||
| 337 | 'vcs_ref': origin.get('Ref', ''), | ||
| 338 | 'subdir': origin.get('Subdir', ''), | ||
| 339 | 'timestamp': info.get('Time', ''), | ||
| 340 | } | ||
| 341 | modules.append(module) | ||
| 342 | else: | ||
| 343 | # FIX #29: Module lacks Origin metadata (common for +incompatible modules) | ||
| 344 | # Use derive_vcs_info() to infer VCS URL and ref from module path/version | ||
| 345 | derived += 1 | ||
| 346 | # Progress output for derived modules (these require network calls) | ||
| 347 | if derived % 10 == 1: | ||
| 348 | print(f" Deriving VCS info... ({derived} modules)", end='\r', flush=True) | ||
| 349 | derived_info = derive_vcs_info(module_path, version) | ||
| 350 | if derived_info: | ||
| 351 | module = { | ||
| 352 | 'module_path': module_path, | ||
| 353 | 'version': version, | ||
| 354 | 'vcs_url': derived_info.get('vcs_url', ''), | ||
| 355 | 'vcs_hash': derived_info.get('vcs_hash', ''), | ||
| 356 | 'vcs_ref': derived_info.get('vcs_ref', ''), | ||
| 357 | 'subdir': derived_info.get('subdir', ''), # FIX #32: Use derived subdir | ||
| 358 | 'timestamp': info.get('Time', ''), | ||
| 359 | } | ||
| 360 | modules.append(module) | ||
| 361 | else: | ||
| 362 | # Cannot derive VCS info - skip this module | ||
| 363 | skipped += 1 | ||
| 364 | derived -= 1 # Don't count as derived if we couldn't derive | ||
| 365 | # Only log for debugging | ||
| 366 | # print(f" ⚠️ Cannot derive VCS info for {module_path}@{version}") | ||
| 367 | |||
| 368 | except json.JSONDecodeError as e: | ||
| 369 | print(f" ⚠️ Failed to parse {info_file}: {e}") | ||
| 370 | skipped += 1 | ||
| 371 | continue | ||
| 372 | except Exception as e: | ||
| 373 | print(f" ⚠️ Error processing {info_file}: {e}") | ||
| 374 | skipped += 1 | ||
| 375 | continue | ||
| 376 | |||
| 377 | print(f"\nProcessed {total_info_files} .info files") | ||
| 378 | print(f"Extracted {len(modules)} modules total:") | ||
| 379 | print(f" - {len(modules) - derived} with Origin metadata from proxy") | ||
| 380 | print(f" - {derived} with derived VCS info (Fix #29)") | ||
| 381 | print(f"Skipped {skipped} modules (cannot derive VCS info)") | ||
| 382 | |||
| 383 | return modules | ||
| 384 | |||
| 385 | |||
| 386 | def main(): | ||
| 387 | parser = argparse.ArgumentParser( | ||
| 388 | description='Extract module metadata from Go module cache', | ||
| 389 | formatter_class=argparse.RawDescriptionHelpFormatter, | ||
| 390 | epilog=""" | ||
| 391 | Examples: | ||
| 392 | # Extract from native Go build cache | ||
| 393 | %(prog)s --gomodcache /tmp/k3s-discovery-cache --output /tmp/k3s-modules.json | ||
| 394 | |||
| 395 | # Extract from BitBake discovery build | ||
| 396 | %(prog)s --gomodcache /path/to/build/tmp/work/.../discovery-cache --output /tmp/k3s-modules.json | ||
| 397 | |||
| 398 | # Extract from system GOMODCACHE | ||
| 399 | %(prog)s --gomodcache ~/go/pkg/mod --output /tmp/modules.json | ||
| 400 | |||
| 401 | Output: | ||
| 402 | - <output>.json: Complete module metadata (VCS URLs, commits, subdirs) | ||
| 403 | - <output>.txt: Simple module@version list (sorted) | ||
| 404 | """ | ||
| 405 | ) | ||
| 406 | parser.add_argument( | ||
| 407 | '--gomodcache', | ||
| 408 | required=True, | ||
| 409 | help='Path to GOMODCACHE directory' | ||
| 410 | ) | ||
| 411 | parser.add_argument( | ||
| 412 | '--output', | ||
| 413 | required=True, | ||
| 414 | help='Output JSON file path (e.g., /tmp/k3s-modules.json)' | ||
| 415 | ) | ||
| 416 | |||
| 417 | args = parser.parse_args() | ||
| 418 | |||
| 419 | # Validate GOMODCACHE path | ||
| 420 | gomodcache = Path(args.gomodcache) | ||
| 421 | if not gomodcache.exists(): | ||
| 422 | print(f"Error: GOMODCACHE directory does not exist: {gomodcache}", file=sys.stderr) | ||
| 423 | sys.exit(1) | ||
| 424 | |||
| 425 | # Extract modules | ||
| 426 | try: | ||
| 427 | modules = extract_modules(gomodcache) | ||
| 428 | except Exception as e: | ||
| 429 | print(f"Error during extraction: {e}", file=sys.stderr) | ||
| 430 | sys.exit(1) | ||
| 431 | |||
| 432 | if not modules: | ||
| 433 | print("Warning: No modules with VCS metadata found!", file=sys.stderr) | ||
| 434 | print("This may indicate:", file=sys.stderr) | ||
| 435 | print(" - GOMODCACHE is from BitBake (synthetic .info files)", file=sys.stderr) | ||
| 436 | print(" - GOMODCACHE is empty or incomplete", file=sys.stderr) | ||
| 437 | print(" - Need to run 'go mod download' first", file=sys.stderr) | ||
| 438 | sys.exit(1) | ||
| 439 | |||
| 440 | # Save as JSON | ||
| 441 | output_path = Path(args.output) | ||
| 442 | try: | ||
| 443 | output_path.parent.mkdir(parents=True, exist_ok=True) | ||
| 444 | output_path.write_text(json.dumps(modules, indent=2, sort_keys=True)) | ||
| 445 | print(f"\n✓ Saved {len(modules)} modules to {output_path}") | ||
| 446 | except Exception as e: | ||
| 447 | print(f"Error writing JSON output: {e}", file=sys.stderr) | ||
| 448 | sys.exit(1) | ||
| 449 | |||
| 450 | # Also save simple list | ||
| 451 | list_path = output_path.with_suffix('.txt') | ||
| 452 | try: | ||
| 453 | simple_list = [f"{m['module_path']}@{m['version']}" for m in modules] | ||
| 454 | list_path.write_text('\n'.join(sorted(simple_list)) + '\n') | ||
| 455 | print(f"✓ Saved module list to {list_path}") | ||
| 456 | except Exception as e: | ||
| 457 | print(f"Error writing module list: {e}", file=sys.stderr) | ||
| 458 | sys.exit(1) | ||
| 459 | |||
| 460 | # Print summary statistics | ||
| 461 | print("\n" + "="*60) | ||
| 462 | print("EXTRACTION SUMMARY") | ||
| 463 | print("="*60) | ||
| 464 | |||
| 465 | # Count unique repositories | ||
| 466 | unique_repos = len(set(m['vcs_url'] for m in modules)) | ||
| 467 | print(f"Total modules: {len(modules)}") | ||
| 468 | print(f"Unique repositories: {unique_repos}") | ||
| 469 | |||
| 470 | # Count modules with subdirs (multi-module repos) | ||
| 471 | with_subdirs = sum(1 for m in modules if m['subdir']) | ||
| 472 | print(f"Multi-module repos: {with_subdirs} modules have subdirs") | ||
| 473 | |||
| 474 | # Show top repositories by module count | ||
| 475 | repo_counts = {} | ||
| 476 | for m in modules: | ||
| 477 | repo_counts[m['vcs_url']] = repo_counts.get(m['vcs_url'], 0) + 1 | ||
| 478 | |||
| 479 | top_repos = sorted(repo_counts.items(), key=lambda x: x[1], reverse=True)[:5] | ||
| 480 | print("\nTop 5 repositories by module count:") | ||
| 481 | for repo_url, count in top_repos: | ||
| 482 | print(f" {count:3d} modules: {repo_url}") | ||
| 483 | |||
| 484 | print("\n" + "="*60) | ||
| 485 | print("Use this JSON file with:") | ||
| 486 | print(f" oe-go-mod-fetcher.py --native-modules {output_path}") | ||
| 487 | print("="*60) | ||
| 488 | |||
| 489 | |||
| 490 | if __name__ == '__main__': | ||
| 491 | main() | ||
