diff options
Diffstat (limited to 'scripts/oe-go-mod-fetcher-hybrid.py')
| -rwxr-xr-x | scripts/oe-go-mod-fetcher-hybrid.py | 696 |
1 files changed, 696 insertions, 0 deletions
diff --git a/scripts/oe-go-mod-fetcher-hybrid.py b/scripts/oe-go-mod-fetcher-hybrid.py new file mode 100755 index 00000000..46d86fc4 --- /dev/null +++ b/scripts/oe-go-mod-fetcher-hybrid.py | |||
| @@ -0,0 +1,696 @@ | |||
| 1 | #!/usr/bin/env python3 | ||
| 2 | |||
| 3 | # SPDX-License-Identifier: GPL-2.0-only | ||
| 4 | # | ||
| 5 | # go-dep processor | ||
| 6 | # | ||
| 7 | # Copyright (C) 2025 Bruce Ashfield | ||
| 8 | # | ||
| 9 | # This program is free software; you can redistribute it and/or modify | ||
| 10 | # it under the terms of the GNU General Public License version 2 as | ||
| 11 | # published by the Free Software Foundation. | ||
| 12 | # | ||
| 13 | # This program is distributed in the hope that it will be useful, | ||
| 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | # GNU General Public License for more details. | ||
| 17 | # | ||
| 18 | # You should have received a copy of the GNU General Public License along | ||
| 19 | # with this program; if not, write to the Free Software Foundation, Inc., | ||
| 20 | # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 21 | |||
| 22 | """ | ||
| 23 | oe-go-mod-fetcher-hybrid.py - Convert go-mod-vcs format to hybrid gomod:// + git:// format. | ||
| 24 | |||
| 25 | This script reads existing go-mod-git.inc and go-mod-cache.inc files and converts | ||
| 26 | them to a hybrid format that uses: | ||
| 27 | - gomod:// for modules fetched from proxy.golang.org (fast, but no VCS control) | ||
| 28 | - git:// for modules where you want SRCREV control (auditable, but slower) | ||
| 29 | |||
| 30 | Usage: | ||
| 31 | # List all modules and their sizes | ||
| 32 | oe-go-mod-fetcher-hybrid.py --recipedir ./recipes-containers/k3s --list | ||
| 33 | |||
| 34 | # Show size-based recommendations | ||
| 35 | oe-go-mod-fetcher-hybrid.py --recipedir ./recipes-containers/k3s --recommend | ||
| 36 | |||
| 37 | # Convert specific modules to gomod:// (rest stay as git://) | ||
| 38 | oe-go-mod-fetcher-hybrid.py --recipedir ./recipes-containers/k3s \\ | ||
| 39 | --gomod "github.com/spf13,golang.org/x,google.golang.org" | ||
| 40 | |||
| 41 | # Convert specific modules to git:// (rest become gomod://) | ||
| 42 | oe-go-mod-fetcher-hybrid.py --recipedir ./recipes-containers/k3s \\ | ||
| 43 | --git "github.com/containerd,github.com/rancher" | ||
| 44 | |||
| 45 | # Use a config file | ||
| 46 | oe-go-mod-fetcher-hybrid.py --recipedir ./recipes-containers/k3s \\ | ||
| 47 | --config hybrid-config.json | ||
| 48 | """ | ||
| 49 | |||
| 50 | import argparse | ||
| 51 | import json | ||
| 52 | import re | ||
| 53 | import sys | ||
| 54 | import subprocess | ||
| 55 | import os | ||
| 56 | import hashlib | ||
| 57 | import urllib.request | ||
| 58 | import urllib.error | ||
| 59 | import concurrent.futures | ||
| 60 | from pathlib import Path | ||
| 61 | from collections import defaultdict | ||
| 62 | from typing import Optional | ||
| 63 | |||
| 64 | |||
| 65 | # Default configuration - used if data/hybrid-config.json is not found | ||
| 66 | DEFAULT_CONFIG = { | ||
| 67 | "vcs_priority_prefixes": [ | ||
| 68 | "github.com/containerd", | ||
| 69 | "github.com/rancher", | ||
| 70 | "github.com/k3s-io", | ||
| 71 | "k8s.io", | ||
| 72 | "sigs.k8s.io", | ||
| 73 | ], | ||
| 74 | "size_threshold_bytes": 1048576, # 1MB | ||
| 75 | "default_git_prefixes": [ | ||
| 76 | "github.com/containerd", | ||
| 77 | "k8s.io", | ||
| 78 | "sigs.k8s.io", | ||
| 79 | ], | ||
| 80 | } | ||
| 81 | |||
| 82 | |||
| 83 | def load_hybrid_config() -> dict: | ||
| 84 | """ | ||
| 85 | Load hybrid mode configuration from data/hybrid-config.json. | ||
| 86 | |||
| 87 | Falls back to DEFAULT_CONFIG if the file doesn't exist. | ||
| 88 | The config file is looked for relative to this script's location. | ||
| 89 | """ | ||
| 90 | script_dir = Path(__file__).parent | ||
| 91 | config_path = script_dir / "data" / "hybrid-config.json" | ||
| 92 | |||
| 93 | if config_path.exists(): | ||
| 94 | try: | ||
| 95 | with open(config_path) as f: | ||
| 96 | config = json.load(f) | ||
| 97 | # Merge with defaults for any missing keys | ||
| 98 | for key, value in DEFAULT_CONFIG.items(): | ||
| 99 | if key not in config: | ||
| 100 | config[key] = value | ||
| 101 | return config | ||
| 102 | except (json.JSONDecodeError, IOError) as e: | ||
| 103 | print(f"Warning: Could not load {config_path}: {e}", file=sys.stderr) | ||
| 104 | print("Using default configuration", file=sys.stderr) | ||
| 105 | |||
| 106 | return DEFAULT_CONFIG.copy() | ||
| 107 | |||
| 108 | |||
| 109 | def fetch_gomod_checksum(module: str, version: str) -> Optional[str]: | ||
| 110 | """ | ||
| 111 | Fetch SHA256 checksum for a module from proxy.golang.org. | ||
| 112 | |||
| 113 | The checksum is calculated by downloading the .zip file and hashing it. | ||
| 114 | """ | ||
| 115 | # Escape capital letters in module path (Go proxy convention) | ||
| 116 | escaped_module = re.sub(r'([A-Z])', lambda m: '!' + m.group(1).lower(), module) | ||
| 117 | escaped_version = re.sub(r'([A-Z])', lambda m: '!' + m.group(1).lower(), version) | ||
| 118 | |||
| 119 | url = f"https://proxy.golang.org/{escaped_module}/@v/{escaped_version}.zip" | ||
| 120 | |||
| 121 | try: | ||
| 122 | req = urllib.request.Request(url, headers={'User-Agent': 'oe-go-mod-fetcher-hybrid/1.0'}) | ||
| 123 | with urllib.request.urlopen(req, timeout=30) as response: | ||
| 124 | data = response.read() | ||
| 125 | return hashlib.sha256(data).hexdigest() | ||
| 126 | except urllib.error.HTTPError as e: | ||
| 127 | print(f" WARNING: Failed to fetch {module}@{version}: HTTP {e.code}", file=sys.stderr) | ||
| 128 | return None | ||
| 129 | except urllib.error.URLError as e: | ||
| 130 | print(f" WARNING: Failed to fetch {module}@{version}: {e.reason}", file=sys.stderr) | ||
| 131 | return None | ||
| 132 | except Exception as e: | ||
| 133 | print(f" WARNING: Failed to fetch {module}@{version}: {e}", file=sys.stderr) | ||
| 134 | return None | ||
| 135 | |||
| 136 | |||
| 137 | def fetch_checksums_parallel(modules: list[dict], max_workers: int = 8) -> dict[str, str]: | ||
| 138 | """ | ||
| 139 | Fetch checksums for multiple modules in parallel. | ||
| 140 | |||
| 141 | Returns dict mapping "module@version" -> "sha256sum" | ||
| 142 | """ | ||
| 143 | checksums = {} | ||
| 144 | |||
| 145 | def fetch_one(mod): | ||
| 146 | key = f"{mod['module']}@{mod['version']}" | ||
| 147 | checksum = fetch_gomod_checksum(mod['module'], mod['version']) | ||
| 148 | return key, checksum | ||
| 149 | |||
| 150 | print(f"Fetching checksums for {len(modules)} modules from proxy.golang.org...") | ||
| 151 | |||
| 152 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: | ||
| 153 | futures = {executor.submit(fetch_one, mod): mod for mod in modules} | ||
| 154 | completed = 0 | ||
| 155 | for future in concurrent.futures.as_completed(futures): | ||
| 156 | key, checksum = future.result() | ||
| 157 | completed += 1 | ||
| 158 | if checksum: | ||
| 159 | checksums[key] = checksum | ||
| 160 | # Progress indicator | ||
| 161 | if completed % 20 == 0 or completed == len(modules): | ||
| 162 | print(f" Progress: {completed}/{len(modules)} modules") | ||
| 163 | |||
| 164 | return checksums | ||
| 165 | |||
| 166 | |||
| 167 | def parse_go_mod_cache_inc(cache_inc_path: Path) -> list[dict]: | ||
| 168 | """Parse GO_MODULE_CACHE_DATA from go-mod-cache.inc.""" | ||
| 169 | content = cache_inc_path.read_text() | ||
| 170 | |||
| 171 | # Find the JSON array in GO_MODULE_CACHE_DATA | ||
| 172 | match = re.search(r"GO_MODULE_CACHE_DATA\s*=\s*'(\[.*\])'", content, re.DOTALL) | ||
| 173 | if not match: | ||
| 174 | raise ValueError(f"Could not find GO_MODULE_CACHE_DATA in {cache_inc_path}") | ||
| 175 | |||
| 176 | json_str = match.group(1).replace('\\\n', '') | ||
| 177 | return json.loads(json_str) | ||
| 178 | |||
| 179 | |||
| 180 | def parse_go_mod_git_inc(git_inc_path: Path) -> dict[str, dict]: | ||
| 181 | """Parse SRC_URI entries from go-mod-git.inc to extract commit and repo info.""" | ||
| 182 | content = git_inc_path.read_text() | ||
| 183 | |||
| 184 | # Map vcs_hash -> {repo, commit, full_entry} | ||
| 185 | vcs_to_info = {} | ||
| 186 | |||
| 187 | # Pattern: git://host/path;...;rev=COMMIT;...;destsuffix=vcs_cache/VCS_HASH | ||
| 188 | for line in content.split('\n'): | ||
| 189 | if not line.startswith('SRC_URI +='): | ||
| 190 | continue | ||
| 191 | |||
| 192 | # Extract the git:// URL part | ||
| 193 | match = re.search(r'git://([^;]+);([^"]*);destsuffix=vcs_cache/([a-f0-9]+)', line) | ||
| 194 | if match: | ||
| 195 | repo_path = match.group(1) | ||
| 196 | params = match.group(2) | ||
| 197 | vcs_hash = match.group(3) | ||
| 198 | |||
| 199 | # Extract rev from params | ||
| 200 | rev_match = re.search(r'rev=([a-f0-9]+)', params) | ||
| 201 | commit = rev_match.group(1) if rev_match else '' | ||
| 202 | |||
| 203 | vcs_to_info[vcs_hash] = { | ||
| 204 | 'repo': f"https://{repo_path}", | ||
| 205 | 'commit': commit, | ||
| 206 | 'full_line': line.strip() | ||
| 207 | } | ||
| 208 | |||
| 209 | return vcs_to_info | ||
| 210 | |||
| 211 | |||
| 212 | def get_repo_sizes(vcs_info: dict, workdir: Optional[Path] = None) -> dict[str, int]: | ||
| 213 | """Get sizes of VCS cache directories if they exist.""" | ||
| 214 | sizes = {} | ||
| 215 | |||
| 216 | if workdir is None: | ||
| 217 | return sizes | ||
| 218 | |||
| 219 | # Try common locations for vcs_cache | ||
| 220 | for subpath in ['sources/vcs_cache', 'vcs_cache']: | ||
| 221 | vcs_cache_dir = workdir / subpath | ||
| 222 | if vcs_cache_dir.exists(): | ||
| 223 | break | ||
| 224 | else: | ||
| 225 | return sizes | ||
| 226 | |||
| 227 | for vcs_hash in vcs_info.keys(): | ||
| 228 | cache_path = vcs_cache_dir / vcs_hash | ||
| 229 | if cache_path.exists(): | ||
| 230 | try: | ||
| 231 | result = subprocess.run( | ||
| 232 | ['du', '-sb', str(cache_path)], | ||
| 233 | capture_output=True, text=True, timeout=10 | ||
| 234 | ) | ||
| 235 | if result.returncode == 0: | ||
| 236 | size = int(result.stdout.split()[0]) | ||
| 237 | sizes[vcs_hash] = size | ||
| 238 | except (subprocess.TimeoutExpired, ValueError): | ||
| 239 | pass | ||
| 240 | |||
| 241 | return sizes | ||
| 242 | |||
| 243 | |||
| 244 | def format_size(size_bytes: int) -> str: | ||
| 245 | """Format bytes as human readable.""" | ||
| 246 | for unit in ['B', 'KB', 'MB', 'GB']: | ||
| 247 | if size_bytes < 1024: | ||
| 248 | return f"{size_bytes:.1f} {unit}" | ||
| 249 | size_bytes /= 1024 | ||
| 250 | return f"{size_bytes:.1f} TB" | ||
| 251 | |||
| 252 | |||
| 253 | def list_modules(modules: list[dict], vcs_info: dict, sizes: dict) -> None: | ||
| 254 | """List all modules with their info.""" | ||
| 255 | # Group by module path prefix | ||
| 256 | by_prefix = defaultdict(list) | ||
| 257 | for mod in modules: | ||
| 258 | parts = mod['module'].split('/') | ||
| 259 | if len(parts) >= 2: | ||
| 260 | prefix = '/'.join(parts[:2]) | ||
| 261 | else: | ||
| 262 | prefix = mod['module'] | ||
| 263 | by_prefix[prefix].append(mod) | ||
| 264 | |||
| 265 | print(f"\n{'Module':<60} {'Version':<25} {'Size':>12}") | ||
| 266 | print("=" * 100) | ||
| 267 | |||
| 268 | total_size = 0 | ||
| 269 | for prefix in sorted(by_prefix.keys()): | ||
| 270 | prefix_size = 0 | ||
| 271 | for mod in sorted(by_prefix[prefix], key=lambda m: m['module']): | ||
| 272 | vcs_hash = mod.get('vcs_hash', '') | ||
| 273 | size = sizes.get(vcs_hash, 0) | ||
| 274 | prefix_size += size | ||
| 275 | total_size += size | ||
| 276 | |||
| 277 | size_str = format_size(size) if size > 0 else '-' | ||
| 278 | print(f" {mod['module']:<58} {mod['version']:<25} {size_str:>12}") | ||
| 279 | |||
| 280 | if len(by_prefix[prefix]) > 1: | ||
| 281 | print(f" {'[subtotal]':<58} {'':<25} {format_size(prefix_size):>12}") | ||
| 282 | print() | ||
| 283 | |||
| 284 | print("=" * 100) | ||
| 285 | print(f"Total: {len(modules)} modules, {format_size(total_size)}") | ||
| 286 | |||
| 287 | |||
| 288 | def recommend_conversion(modules: list[dict], vcs_info: dict, sizes: dict, recipedir: Path = None) -> None: | ||
| 289 | """Recommend modules to convert based on size. | ||
| 290 | |||
| 291 | Configuration is loaded from data/hybrid-config.json if it exists, | ||
| 292 | otherwise defaults are used. This allows easy customization of: | ||
| 293 | - vcs_priority_prefixes: modules to suggest keeping as git:// | ||
| 294 | - size_threshold_bytes: threshold for suggesting gomod:// conversion | ||
| 295 | - default_git_prefixes: fallback prefixes if no matches found | ||
| 296 | """ | ||
| 297 | # Load configuration from external file (or use defaults) | ||
| 298 | config = load_hybrid_config() | ||
| 299 | vcs_priority_patterns = config.get('vcs_priority_prefixes', DEFAULT_CONFIG['vcs_priority_prefixes']) | ||
| 300 | size_threshold = config.get('size_threshold_bytes', DEFAULT_CONFIG['size_threshold_bytes']) | ||
| 301 | default_git_prefixes = config.get('default_git_prefixes', DEFAULT_CONFIG['default_git_prefixes']) | ||
| 302 | |||
| 303 | # Calculate sizes per prefix | ||
| 304 | prefix_sizes = defaultdict(lambda: {'size': 0, 'count': 0, 'modules': []}) | ||
| 305 | |||
| 306 | for mod in modules: | ||
| 307 | parts = mod['module'].split('/') | ||
| 308 | if len(parts) >= 2: | ||
| 309 | prefix = '/'.join(parts[:2]) | ||
| 310 | else: | ||
| 311 | prefix = mod['module'] | ||
| 312 | |||
| 313 | vcs_hash = mod.get('vcs_hash', '') | ||
| 314 | size = sizes.get(vcs_hash, 0) | ||
| 315 | |||
| 316 | prefix_sizes[prefix]['size'] += size | ||
| 317 | prefix_sizes[prefix]['count'] += 1 | ||
| 318 | prefix_sizes[prefix]['modules'].append(mod['module']) | ||
| 319 | |||
| 320 | # Sort by size descending | ||
| 321 | sorted_prefixes = sorted(prefix_sizes.items(), key=lambda x: x[1]['size'], reverse=True) | ||
| 322 | |||
| 323 | total_size = sum(p['size'] for p in prefix_sizes.values()) | ||
| 324 | |||
| 325 | print("\n" + "=" * 80) | ||
| 326 | print("GO MODULE HYBRID CONVERSION RECOMMENDATIONS") | ||
| 327 | print("=" * 80) | ||
| 328 | |||
| 329 | print(f"\n{'Prefix':<45} {'Count':>8} {'Size':>12} {'% Total':>10}") | ||
| 330 | print("-" * 80) | ||
| 331 | |||
| 332 | gomod_candidates = [] | ||
| 333 | git_candidates = [] | ||
| 334 | |||
| 335 | for prefix, info in sorted_prefixes[:25]: # Top 25 | ||
| 336 | pct = (info['size'] / total_size * 100) if total_size > 0 else 0 | ||
| 337 | |||
| 338 | print(f"{prefix:<45} {info['count']:>8} {format_size(info['size']):>12} {pct:>9.1f}%") | ||
| 339 | |||
| 340 | # Check if this is a VCS priority prefix | ||
| 341 | is_vcs_priority = any(prefix.startswith(p) or prefix == p for p in vcs_priority_patterns) | ||
| 342 | |||
| 343 | if is_vcs_priority: | ||
| 344 | git_candidates.append(prefix) | ||
| 345 | elif info['size'] > size_threshold: | ||
| 346 | gomod_candidates.append(prefix) | ||
| 347 | |||
| 348 | print("-" * 80) | ||
| 349 | print(f"{'Total':<45} {len(modules):>8} {format_size(total_size):>12}") | ||
| 350 | |||
| 351 | if gomod_candidates: | ||
| 352 | print("\n" + "=" * 80) | ||
| 353 | print("LARGEST MODULE PREFIXES (top candidates for gomod:// proxy fetch):") | ||
| 354 | print("=" * 80) | ||
| 355 | print("\n " + ",".join(gomod_candidates[:10])) | ||
| 356 | |||
| 357 | # Calculate potential savings | ||
| 358 | gomod_size = sum(prefix_sizes[p]['size'] for p in gomod_candidates) | ||
| 359 | if total_size > 0: | ||
| 360 | print(f"\n These account for {format_size(gomod_size)} ({gomod_size/total_size*100:.0f}% of total)") | ||
| 361 | |||
| 362 | print("\n" + "=" * 80) | ||
| 363 | print("SUGGESTED --git PREFIXES (keep as git:// for VCS control):") | ||
| 364 | print("=" * 80) | ||
| 365 | |||
| 366 | if git_candidates: | ||
| 367 | print("\n " + ",".join(git_candidates)) | ||
| 368 | else: | ||
| 369 | print("\n (none identified)") | ||
| 370 | |||
| 371 | print("\n NOTE: With --git, ALL other modules automatically become gomod://") | ||
| 372 | print(" (not just the large ones listed above)") | ||
| 373 | |||
| 374 | # Output conversion command | ||
| 375 | print("\n" + "=" * 80) | ||
| 376 | print("TO CONVERT TO HYBRID FORMAT:") | ||
| 377 | print("=" * 80) | ||
| 378 | print() | ||
| 379 | |||
| 380 | # Get script path (relative to this script's location) | ||
| 381 | script_path = Path(__file__).resolve() | ||
| 382 | |||
| 383 | # Use default_git_prefixes from config as fallback | ||
| 384 | fallback_git = ','.join(default_git_prefixes) | ||
| 385 | |||
| 386 | if recipedir: | ||
| 387 | print(f" python3 {script_path} \\") | ||
| 388 | print(f" --recipedir {recipedir} \\") | ||
| 389 | if git_candidates: | ||
| 390 | print(f" --git \"{','.join(git_candidates)}\"") | ||
| 391 | else: | ||
| 392 | print(f" --git \"{fallback_git}\"") | ||
| 393 | else: | ||
| 394 | print(f" python3 {script_path} \\") | ||
| 395 | print(f" --recipedir <your-recipe-directory> \\") | ||
| 396 | if git_candidates: | ||
| 397 | print(f" --git \"{','.join(git_candidates)}\"") | ||
| 398 | else: | ||
| 399 | print(f" --git \"{fallback_git}\"") | ||
| 400 | |||
| 401 | |||
| 402 | def fetch_gomod_checksum(module: str, version: str) -> Optional[str]: | ||
| 403 | """Fetch SHA256 checksum for a module from proxy.golang.org.""" | ||
| 404 | import urllib.request | ||
| 405 | import hashlib | ||
| 406 | |||
| 407 | # Escape module path (uppercase letters) | ||
| 408 | escaped = re.sub(r'([A-Z])', lambda m: '!' + m.group(1).lower(), module) | ||
| 409 | |||
| 410 | url = f"https://proxy.golang.org/{escaped}/@v/{version}.zip" | ||
| 411 | |||
| 412 | try: | ||
| 413 | with urllib.request.urlopen(url, timeout=30) as response: | ||
| 414 | content = response.read() | ||
| 415 | return hashlib.sha256(content).hexdigest() | ||
| 416 | except Exception as e: | ||
| 417 | print(f" Warning: Could not fetch checksum for {module}@{version}: {e}", file=sys.stderr) | ||
| 418 | return None | ||
| 419 | |||
| 420 | |||
| 421 | def generate_hybrid_files( | ||
| 422 | modules: list[dict], | ||
| 423 | vcs_info: dict, | ||
| 424 | git_prefixes: list[str], | ||
| 425 | gomod_prefixes: list[str], | ||
| 426 | output_dir: Path, | ||
| 427 | fetch_checksums: bool = False | ||
| 428 | ) -> None: | ||
| 429 | """Generate hybrid include files.""" | ||
| 430 | |||
| 431 | # Ensure output directory exists | ||
| 432 | output_dir.mkdir(parents=True, exist_ok=True) | ||
| 433 | |||
| 434 | git_modules = [] | ||
| 435 | gomod_modules = [] | ||
| 436 | |||
| 437 | # Classify modules | ||
| 438 | for mod in modules: | ||
| 439 | mod_path = mod['module'] | ||
| 440 | |||
| 441 | # Check if explicitly marked as git:// | ||
| 442 | is_git = any(mod_path.startswith(prefix) for prefix in git_prefixes) | ||
| 443 | |||
| 444 | # Check if explicitly marked as gomod:// | ||
| 445 | is_gomod = any(mod_path.startswith(prefix) for prefix in gomod_prefixes) | ||
| 446 | |||
| 447 | if is_git and is_gomod: | ||
| 448 | print(f"Warning: {mod_path} matches both git and gomod prefixes, using git://", | ||
| 449 | file=sys.stderr) | ||
| 450 | is_gomod = False | ||
| 451 | |||
| 452 | # Default: if git_prefixes specified, everything else is gomod | ||
| 453 | # If gomod_prefixes specified, everything else is git | ||
| 454 | if git_prefixes and not is_git and not is_gomod: | ||
| 455 | is_gomod = True | ||
| 456 | elif gomod_prefixes and not is_git and not is_gomod: | ||
| 457 | is_git = True | ||
| 458 | elif not git_prefixes and not gomod_prefixes: | ||
| 459 | # No prefixes specified - default to gomod for all | ||
| 460 | is_gomod = True | ||
| 461 | |||
| 462 | if is_gomod: | ||
| 463 | gomod_modules.append(mod) | ||
| 464 | else: | ||
| 465 | git_modules.append(mod) | ||
| 466 | |||
| 467 | print(f"\nClassification:") | ||
| 468 | print(f" gomod:// (proxy): {len(gomod_modules)} modules") | ||
| 469 | print(f" git:// (VCS): {len(git_modules)} modules") | ||
| 470 | |||
| 471 | # Fetch checksums in parallel (always, unless --no-checksums) | ||
| 472 | checksum_map = {} | ||
| 473 | if fetch_checksums and gomod_modules: | ||
| 474 | checksum_map = fetch_checksums_parallel(gomod_modules) | ||
| 475 | if len(checksum_map) < len(gomod_modules): | ||
| 476 | missing = len(gomod_modules) - len(checksum_map) | ||
| 477 | print(f" WARNING: Failed to fetch {missing} checksums", file=sys.stderr) | ||
| 478 | |||
| 479 | # Generate gomod include file | ||
| 480 | gomod_lines = [ | ||
| 481 | "# Generated by oe-go-mod-fetcher-hybrid.py", | ||
| 482 | "# Go modules fetched from proxy.golang.org (fast path)", | ||
| 483 | "#", | ||
| 484 | "# These modules are fetched as pre-built zip files from the Go proxy.", | ||
| 485 | "# They do not provide VCS commit-level provenance but are much faster.", | ||
| 486 | "", | ||
| 487 | "inherit go-mod", | ||
| 488 | "" | ||
| 489 | ] | ||
| 490 | |||
| 491 | for mod in sorted(gomod_modules, key=lambda m: m['module']): | ||
| 492 | key = f"{mod['module']}@{mod['version']}" | ||
| 493 | if key in checksum_map: | ||
| 494 | # Include checksum inline to avoid BitBake variable flag name issues | ||
| 495 | # (e.g., ~ character in git.sr.ht/~sbinet/gg causes parse errors) | ||
| 496 | gomod_lines.append(f'SRC_URI += "gomod://{mod["module"]};version={mod["version"]};sha256sum={checksum_map[key]}"') | ||
| 497 | else: | ||
| 498 | gomod_lines.append(f'SRC_URI += "gomod://{mod["module"]};version={mod["version"]}"') | ||
| 499 | |||
| 500 | gomod_file = output_dir / 'go-mod-hybrid-gomod.inc' | ||
| 501 | gomod_file.write_text('\n'.join(gomod_lines) + '\n') | ||
| 502 | print(f"\nWrote {gomod_file}") | ||
| 503 | |||
| 504 | if not fetch_checksums and gomod_modules: | ||
| 505 | print(f" WARNING: Checksums not fetched (use default or --fetch-checksums)") | ||
| 506 | print(f" BitBake will fail on first fetch and show required checksums") | ||
| 507 | |||
| 508 | # Generate git include file | ||
| 509 | git_lines = [ | ||
| 510 | "# Generated by oe-go-mod-fetcher-hybrid.py", | ||
| 511 | "# Go modules fetched from git repositories (VCS path)", | ||
| 512 | "#", | ||
| 513 | "# These modules are fetched directly from their git repositories.", | ||
| 514 | "# They provide full VCS provenance and allow easy SRCREV bumping.", | ||
| 515 | "" | ||
| 516 | ] | ||
| 517 | |||
| 518 | for mod in sorted(git_modules, key=lambda m: m['module']): | ||
| 519 | vcs_hash = mod.get('vcs_hash', '') | ||
| 520 | if vcs_hash in vcs_info: | ||
| 521 | git_lines.append(vcs_info[vcs_hash]['full_line']) | ||
| 522 | |||
| 523 | git_file = output_dir / 'go-mod-hybrid-git.inc' | ||
| 524 | git_file.write_text('\n'.join(git_lines) + '\n') | ||
| 525 | print(f"Wrote {git_file}") | ||
| 526 | |||
| 527 | # Generate cache metadata file for git modules | ||
| 528 | cache_lines = [ | ||
| 529 | "# Generated by oe-go-mod-fetcher-hybrid.py", | ||
| 530 | "# Metadata for git-fetched modules (VCS path)", | ||
| 531 | "# Used by go-mod-vcs.bbclass to build module cache from git checkouts", | ||
| 532 | "", | ||
| 533 | "inherit go-mod-vcs", | ||
| 534 | "", | ||
| 535 | ] | ||
| 536 | |||
| 537 | # Format GO_MODULE_CACHE_DATA with one entry per line for readability | ||
| 538 | # (matches go-mod-cache.inc format: '[\ | ||
| 539 | # {entry1},\ | ||
| 540 | # {entry2}]') | ||
| 541 | cache_lines.append("# Module metadata for cache building (one module per line)") | ||
| 542 | if git_modules: | ||
| 543 | cache_lines.append("GO_MODULE_CACHE_DATA = '[\\") | ||
| 544 | for i, mod in enumerate(sorted(git_modules, key=lambda m: m['module'])): | ||
| 545 | entry = json.dumps(mod, separators=(',', ':')) # Compact single-line JSON per entry | ||
| 546 | if i < len(git_modules) - 1: | ||
| 547 | cache_lines.append(f"{entry},\\") | ||
| 548 | else: | ||
| 549 | cache_lines.append(f"{entry}]'") | ||
| 550 | else: | ||
| 551 | cache_lines.append("GO_MODULE_CACHE_DATA = '[]'") | ||
| 552 | |||
| 553 | cache_file = output_dir / 'go-mod-hybrid-cache.inc' | ||
| 554 | cache_file.write_text('\n'.join(cache_lines) + '\n') | ||
| 555 | print(f"Wrote {cache_file}") | ||
| 556 | |||
| 557 | # Print usage instructions | ||
| 558 | print("\n" + "=" * 70) | ||
| 559 | print("NEXT STEPS:") | ||
| 560 | print("=" * 70) | ||
| 561 | print(""" | ||
| 562 | 1. Update your recipe to enable mode switching: | ||
| 563 | |||
| 564 | # GO_MOD_FETCH_MODE: "vcs" (all git://) or "hybrid" (gomod:// + git://) | ||
| 565 | GO_MOD_FETCH_MODE ?= "vcs" | ||
| 566 | |||
| 567 | # VCS mode: all modules via git:// | ||
| 568 | include ${@ "go-mod-git.inc" if d.getVar("GO_MOD_FETCH_MODE") == "vcs" else ""} | ||
| 569 | include ${@ "go-mod-cache.inc" if d.getVar("GO_MOD_FETCH_MODE") == "vcs" else ""} | ||
| 570 | |||
| 571 | # Hybrid mode: gomod:// for most, git:// for selected | ||
| 572 | include ${@ "go-mod-hybrid-gomod.inc" if d.getVar("GO_MOD_FETCH_MODE") == "hybrid" else ""} | ||
| 573 | include ${@ "go-mod-hybrid-git.inc" if d.getVar("GO_MOD_FETCH_MODE") == "hybrid" else ""} | ||
| 574 | include ${@ "go-mod-hybrid-cache.inc" if d.getVar("GO_MOD_FETCH_MODE") == "hybrid" else ""} | ||
| 575 | |||
| 576 | 2. Run bitbake once in hybrid mode to fetch gomod:// checksums: | ||
| 577 | |||
| 578 | GO_MOD_FETCH_MODE = "hybrid" # in local.conf | ||
| 579 | bitbake <recipe> | ||
| 580 | |||
| 581 | 3. Copy the checksums from the error log into go-mod-hybrid-gomod.inc | ||
| 582 | |||
| 583 | 4. Build again - or switch back to VCS mode anytime: | ||
| 584 | |||
| 585 | GO_MOD_FETCH_MODE = "vcs" # full VCS provenance | ||
| 586 | GO_MOD_FETCH_MODE = "hybrid" # faster proxy fetch | ||
| 587 | """) | ||
| 588 | |||
| 589 | |||
| 590 | def main(): | ||
| 591 | parser = argparse.ArgumentParser( | ||
| 592 | description='Convert go-mod-vcs format to hybrid gomod:// + git:// format', | ||
| 593 | formatter_class=argparse.RawDescriptionHelpFormatter, | ||
| 594 | epilog=__doc__ | ||
| 595 | ) | ||
| 596 | |||
| 597 | parser.add_argument('--recipedir', type=Path, required=True, | ||
| 598 | help='Recipe directory containing go-mod-git.inc and go-mod-cache.inc') | ||
| 599 | |||
| 600 | parser.add_argument('--workdir', type=Path, default=None, | ||
| 601 | help='BitBake workdir containing vcs_cache (for size calculations)') | ||
| 602 | |||
| 603 | # Actions | ||
| 604 | parser.add_argument('--list', action='store_true', | ||
| 605 | help='List all modules with sizes') | ||
| 606 | |||
| 607 | parser.add_argument('--recommend', action='store_true', | ||
| 608 | help='Show size-based recommendations for conversion') | ||
| 609 | |||
| 610 | # Conversion options | ||
| 611 | parser.add_argument('--git', type=str, default='', | ||
| 612 | help='Comma-separated module prefixes to keep as git:// (rest become gomod://)') | ||
| 613 | |||
| 614 | parser.add_argument('--gomod', type=str, default='', | ||
| 615 | help='Comma-separated module prefixes to convert to gomod:// (rest stay git://)') | ||
| 616 | |||
| 617 | parser.add_argument('--config', type=Path, default=None, | ||
| 618 | help='JSON config file with git/gomod prefix lists') | ||
| 619 | |||
| 620 | parser.add_argument('--no-checksums', action='store_true', | ||
| 621 | help='Skip fetching SHA256 checksums (not recommended)') | ||
| 622 | |||
| 623 | parser.add_argument('--output-dir', type=Path, default=None, | ||
| 624 | help='Output directory for hybrid files (default: recipedir)') | ||
| 625 | |||
| 626 | args = parser.parse_args() | ||
| 627 | |||
| 628 | # Validate inputs | ||
| 629 | cache_inc = args.recipedir / 'go-mod-cache.inc' | ||
| 630 | git_inc = args.recipedir / 'go-mod-git.inc' | ||
| 631 | |||
| 632 | if not cache_inc.exists(): | ||
| 633 | print(f"Error: {cache_inc} not found", file=sys.stderr) | ||
| 634 | sys.exit(1) | ||
| 635 | |||
| 636 | if not git_inc.exists(): | ||
| 637 | print(f"Error: {git_inc} not found", file=sys.stderr) | ||
| 638 | sys.exit(1) | ||
| 639 | |||
| 640 | # Parse existing files | ||
| 641 | print(f"Loading {cache_inc}...") | ||
| 642 | modules = parse_go_mod_cache_inc(cache_inc) | ||
| 643 | print(f" Found {len(modules)} modules") | ||
| 644 | |||
| 645 | print(f"Loading {git_inc}...") | ||
| 646 | vcs_info = parse_go_mod_git_inc(git_inc) | ||
| 647 | print(f" Found {len(vcs_info)} VCS entries") | ||
| 648 | |||
| 649 | # Get sizes if workdir provided | ||
| 650 | sizes = {} | ||
| 651 | if args.workdir: | ||
| 652 | print(f"Calculating sizes from {args.workdir}...") | ||
| 653 | sizes = get_repo_sizes(vcs_info, args.workdir) | ||
| 654 | print(f" Got sizes for {len(sizes)} repos") | ||
| 655 | |||
| 656 | # Handle actions | ||
| 657 | if args.list: | ||
| 658 | list_modules(modules, vcs_info, sizes) | ||
| 659 | return | ||
| 660 | |||
| 661 | if args.recommend: | ||
| 662 | recommend_conversion(modules, vcs_info, sizes, args.recipedir) | ||
| 663 | return | ||
| 664 | |||
| 665 | # Handle conversion | ||
| 666 | git_prefixes = [p.strip() for p in args.git.split(',') if p.strip()] | ||
| 667 | gomod_prefixes = [p.strip() for p in args.gomod.split(',') if p.strip()] | ||
| 668 | |||
| 669 | if args.config: | ||
| 670 | if args.config.exists(): | ||
| 671 | config = json.loads(args.config.read_text()) | ||
| 672 | git_prefixes.extend(config.get('git', [])) | ||
| 673 | gomod_prefixes.extend(config.get('gomod', [])) | ||
| 674 | else: | ||
| 675 | print(f"Error: Config file {args.config} not found", file=sys.stderr) | ||
| 676 | sys.exit(1) | ||
| 677 | |||
| 678 | if not git_prefixes and not gomod_prefixes: | ||
| 679 | print("Error: Specify --git, --gomod, --list, or --recommend", file=sys.stderr) | ||
| 680 | parser.print_help() | ||
| 681 | sys.exit(1) | ||
| 682 | |||
| 683 | output_dir = args.output_dir or args.recipedir | ||
| 684 | |||
| 685 | generate_hybrid_files( | ||
| 686 | modules=modules, | ||
| 687 | vcs_info=vcs_info, | ||
| 688 | git_prefixes=git_prefixes, | ||
| 689 | gomod_prefixes=gomod_prefixes, | ||
| 690 | output_dir=output_dir, | ||
| 691 | fetch_checksums=not args.no_checksums # Default: fetch checksums | ||
| 692 | ) | ||
| 693 | |||
| 694 | |||
| 695 | if __name__ == '__main__': | ||
| 696 | main() | ||
