summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruce Ashfield <bruce.ashfield@gmail.com>2025-12-09 01:50:29 +0000
committerBruce Ashfield <bruce.ashfield@gmail.com>2025-12-08 20:57:44 -0500
commit2bee001d6ddcafba7d384ac7ee241b16a1449681 (patch)
treeb5b189854614bc810240af43e66f3c85b48b8e2c
parent2385a741401386d129a0cd4dc4c332d3d54af4b3 (diff)
downloadmeta-virtualization-2bee001d6ddcafba7d384ac7ee241b16a1449681.tar.gz
go-mod: introduce hybrid mode
Introduce the ability to have hybrid gomod:// and git:// repositories. This allows SRCREV bumping when fixing bugs, and using the git archiver for some, all or none of the modules in a go mod project. Example: k3s Hybrid Conversion 1. Ensure VCS mode works first bitbake k3s 2. Get recommendations bitbake k3s -c go_mod_recommend 3. Convert with recommended prefixes (keep containerd, k8s.io as git://) python3 ./meta-virtualization/scripts/oe-go-mod-fetcher-hybrid.py \ --recipedir ./meta-virtualization/recipes-containers/k3s/ \ --git "github.com/containerd,k8s.io,sigs.k8s.io,github.com/rancher" Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
-rw-r--r--classes/go-mod-vcs.bbclass224
-rw-r--r--scripts/QUICKSTART-oe-go-mod-vcs.md135
-rw-r--r--scripts/data/hybrid-config.json24
-rwxr-xr-xscripts/oe-go-mod-fetcher-hybrid.py696
4 files changed, 1079 insertions, 0 deletions
diff --git a/classes/go-mod-vcs.bbclass b/classes/go-mod-vcs.bbclass
index 65211445..549fa9f5 100644
--- a/classes/go-mod-vcs.bbclass
+++ b/classes/go-mod-vcs.bbclass
@@ -1138,3 +1138,227 @@ python do_sync_go_files() {
1138} 1138}
1139 1139
1140addtask sync_go_files after do_create_module_cache before do_compile 1140addtask sync_go_files after do_create_module_cache before do_compile
1141
1142
1143do_fix_go_mod_permissions() {
1144 # Go module cache is intentionally read-only for integrity, but this breaks
1145 # BitBake's rm -rf cleanup (sstate_eventhandler_reachablestamps).
1146 # Make all files writable so workdir can be cleaned properly.
1147 #
1148 # Check multiple possible locations where Go module cache might exist
1149 for modpath in "${S}/pkg/mod" "${S}/src/import/pkg/mod"; do
1150 if [ -d "$modpath" ]; then
1151 chmod -R u+w "$modpath" 2>/dev/null || true
1152 bbnote "Fixed permissions on Go module cache: $modpath"
1153 fi
1154 done
1155 # Also check sources subdirectory (for recipes with WORKDIR/sources layout)
1156 if [ -d "${WORKDIR}/sources" ]; then
1157 find "${WORKDIR}/sources" -type d -name "mod" -path "*/pkg/mod" 2>/dev/null | while read modpath; do
1158 chmod -R u+w "$modpath" 2>/dev/null || true
1159 bbnote "Fixed permissions on Go module cache: $modpath"
1160 done
1161 fi
1162}
1163
1164# Run after sync_go_files (which is the last Go module setup task) and before compile
1165addtask fix_go_mod_permissions after do_sync_go_files before do_compile
1166
1167
1168python do_go_mod_recommend() {
1169 """
1170 Analyze VCS-fetched modules and recommend candidates for gomod:// conversion.
1171
1172 This task delegates to oe-go-mod-fetcher-hybrid.py --recommend to avoid
1173 duplicating the analysis logic. The script handles:
1174 - Module size calculation from vcs_cache
1175 - Grouping by prefix (github.com/containerd, k8s.io, etc.)
1176 - Suggesting --git prefixes for VCS-priority modules
1177 - Generating conversion command lines
1178
1179 Run with: bitbake <recipe> -c go_mod_recommend
1180 """
1181 import subprocess
1182 from pathlib import Path
1183
1184 # Find the hybrid script in meta-virtualization layer
1185 layerdir = None
1186 for layer in d.getVar('BBLAYERS').split():
1187 if 'meta-virtualization' in layer:
1188 layerdir = layer
1189 break
1190
1191 if not layerdir:
1192 bb.error("Could not find meta-virtualization layer in BBLAYERS")
1193 return
1194
1195 scriptpath = Path(layerdir) / "scripts" / "oe-go-mod-fetcher-hybrid.py"
1196 if not scriptpath.exists():
1197 bb.error(f"Hybrid script not found at {scriptpath}")
1198 return
1199
1200 # Get recipe directory and workdir
1201 recipedir = d.getVar('FILE_DIRNAME')
1202 workdir = d.getVar('WORKDIR')
1203
1204 # Build command to run the hybrid script with --recommend
1205 cmd = [
1206 'python3', str(scriptpath),
1207 '--recipedir', recipedir,
1208 '--recommend'
1209 ]
1210
1211 # Add workdir if vcs_cache exists there (for size calculations)
1212 vcs_cache = Path(workdir) / "sources" / "vcs_cache"
1213 if vcs_cache.exists():
1214 cmd.extend(['--workdir', workdir])
1215
1216 bb.note(f"Running: {' '.join(cmd)}")
1217
1218 try:
1219 result = subprocess.run(
1220 cmd,
1221 capture_output=True,
1222 text=True,
1223 timeout=300 # 5 minute timeout
1224 )
1225
1226 # Print stdout (the recommendations)
1227 if result.stdout:
1228 for line in result.stdout.splitlines():
1229 bb.plain(line)
1230
1231 # Print any errors
1232 if result.stderr:
1233 for line in result.stderr.splitlines():
1234 bb.warn(line)
1235
1236 if result.returncode != 0:
1237 bb.warn(f"Script exited with code {result.returncode}")
1238
1239 except subprocess.TimeoutExpired:
1240 bb.error("Recommendation script timed out after 5 minutes")
1241 except Exception as e:
1242 bb.error(f"Failed to run recommendation script: {e}")
1243
1244 # Always print the recipe configuration hint
1245 bb.plain("")
1246 bb.plain("=" * 80)
1247 bb.plain("RECIPE CONFIGURATION FOR SWITCHING MODES:")
1248 bb.plain("=" * 80)
1249 bb.plain("")
1250 bb.plain(" Add this to your recipe to enable switching between VCS and hybrid modes:")
1251 bb.plain("")
1252 bb.plain(' # GO_MOD_FETCH_MODE: "vcs" (all git://) or "hybrid" (gomod:// + git://)')
1253 bb.plain(' GO_MOD_FETCH_MODE ?= "vcs"')
1254 bb.plain("")
1255 bb.plain(' # VCS mode: all modules via git://')
1256 bb.plain(' include ${@ "go-mod-git.inc" if d.getVar("GO_MOD_FETCH_MODE") == "vcs" else ""}')
1257 bb.plain(' include ${@ "go-mod-cache.inc" if d.getVar("GO_MOD_FETCH_MODE") == "vcs" else ""}')
1258 bb.plain("")
1259 bb.plain(' # Hybrid mode: gomod:// for most, git:// for selected')
1260 bb.plain(' include ${@ "go-mod-hybrid-gomod.inc" if d.getVar("GO_MOD_FETCH_MODE") == "hybrid" else ""}')
1261 bb.plain(' include ${@ "go-mod-hybrid-git.inc" if d.getVar("GO_MOD_FETCH_MODE") == "hybrid" else ""}')
1262 bb.plain(' include ${@ "go-mod-hybrid-cache.inc" if d.getVar("GO_MOD_FETCH_MODE") == "hybrid" else ""}')
1263 bb.plain("")
1264 bb.plain(" Then switch modes with: GO_MOD_FETCH_MODE = \"hybrid\" in local.conf")
1265 bb.plain("")
1266}
1267
1268addtask go_mod_recommend after do_fetch
1269do_go_mod_recommend[nostamp] = "1"
1270
1271# =============================================================================
1272# Go Module Cache Permission Fix
1273# =============================================================================
1274#
1275# Go's module cache creates read-only files by design. This prevents BitBake's
1276# do_unpack cleanup (cleandirs = ${S}) from removing the previous build's
1277# module cache, causing "Permission denied" errors.
1278#
1279# Solution: Add a prefunc to do_unpack that makes the module cache writable
1280# before BitBake tries to clean the directory.
1281#
1282
1283python go_mod_fix_permissions() {
1284 """
1285 Fix Go module cache permissions before do_unpack cleanup.
1286
1287 Go creates read-only files in pkg/mod to prevent accidental modification.
1288 BitBake's cleandirs tries to rm -rf ${S} before unpacking, which fails
1289 on these read-only files. This prefunc makes them writable first.
1290 """
1291 import os
1292 import stat
1293 from pathlib import Path
1294
1295 s_dir = d.getVar('S')
1296 if not s_dir:
1297 return
1298
1299 # Check for Go module cache in various locations
1300 mod_paths = [
1301 Path(s_dir) / 'pkg' / 'mod',
1302 Path(s_dir) / 'src' / 'import' / 'pkg' / 'mod', # k3s-style layout
1303 ]
1304
1305 for mod_cache in mod_paths:
1306 if mod_cache.exists():
1307 bb.note(f"Fixing Go module cache permissions: {mod_cache}")
1308 try:
1309 # Walk the tree and add write permission
1310 for root, dirs, files in os.walk(str(mod_cache)):
1311 # Fix directory permissions first
1312 for d_name in dirs:
1313 d_path = os.path.join(root, d_name)
1314 try:
1315 current = os.stat(d_path).st_mode
1316 os.chmod(d_path, current | stat.S_IWUSR)
1317 except (OSError, PermissionError):
1318 pass
1319 # Fix file permissions
1320 for f_name in files:
1321 f_path = os.path.join(root, f_name)
1322 try:
1323 current = os.stat(f_path).st_mode
1324 os.chmod(f_path, current | stat.S_IWUSR)
1325 except (OSError, PermissionError):
1326 pass
1327 bb.note(f"Fixed permissions on {mod_cache}")
1328 except Exception as e:
1329 bb.warn(f"Could not fix permissions on {mod_cache}: {e}")
1330
1331 # Also check the WORKDIR for sources subdirectories
1332 workdir = d.getVar('WORKDIR')
1333 if workdir:
1334 sources_dir = Path(workdir) / 'sources'
1335 if sources_dir.exists():
1336 for source_subdir in sources_dir.iterdir():
1337 if source_subdir.is_dir():
1338 for mod_subpath in ['pkg/mod', 'src/import/pkg/mod']:
1339 mod_cache = source_subdir / mod_subpath
1340 if mod_cache.exists():
1341 bb.note(f"Fixing Go module cache permissions: {mod_cache}")
1342 try:
1343 for root, dirs, files in os.walk(str(mod_cache)):
1344 for d_name in dirs:
1345 d_path = os.path.join(root, d_name)
1346 try:
1347 current = os.stat(d_path).st_mode
1348 os.chmod(d_path, current | stat.S_IWUSR)
1349 except (OSError, PermissionError):
1350 pass
1351 for f_name in files:
1352 f_path = os.path.join(root, f_name)
1353 try:
1354 current = os.stat(f_path).st_mode
1355 os.chmod(f_path, current | stat.S_IWUSR)
1356 except (OSError, PermissionError):
1357 pass
1358 bb.note(f"Fixed permissions on {mod_cache}")
1359 except Exception as e:
1360 bb.warn(f"Could not fix permissions on {mod_cache}: {e}")
1361}
1362
1363# Run permission fix BEFORE do_unpack's cleandirs removes ${S}
1364do_unpack[prefuncs] += "go_mod_fix_permissions"
diff --git a/scripts/QUICKSTART-oe-go-mod-vcs.md b/scripts/QUICKSTART-oe-go-mod-vcs.md
index 40db7885..213867d3 100644
--- a/scripts/QUICKSTART-oe-go-mod-vcs.md
+++ b/scripts/QUICKSTART-oe-go-mod-vcs.md
@@ -379,6 +379,141 @@ GO_MODULE_CACHE_DATA = '[{"module":"github.com/spf13/cobra","version":"v1.8.1","
379 379
380--- 380---
381 381
382## Hybrid Mode: Mixing gomod:// and git:// Fetchers
383
384For large projects like k3s with hundreds of modules, the VCS-only approach (all `git://`) can be slow due to the large number of git clones. **Hybrid mode** provides a faster alternative by using:
385
386- `gomod://` - Fast proxy.golang.org downloads for most modules
387- `git://` - VCS provenance for selected important modules (e.g., containerd, k8s.io)
388
389### Benefits
390
391| Mode | Fetch Speed | VCS Provenance | Use Case |
392|------|-------------|----------------|----------|
393| **VCS** (`git://` only) | Slower | Full | Security-critical, audit requirements |
394| **Hybrid** (`gomod://` + `git://`) | Faster | Selective | Development, CI, most builds |
395
396### Step 1: Build in VCS Mode First
397
398Ensure your recipe works in VCS mode before converting:
399
400```bash
401bitbake myapp -c discover_and_generate
402bitbake myapp
403```
404
405### Step 2: Run Recommendations
406
407After a successful VCS build, analyze which modules to keep as git:// vs convert to gomod://:
408
409```bash
410bitbake myapp -c go_mod_recommend
411```
412
413This outputs size-based recommendations and suggests prefixes to keep as `git://`.
414
415### Step 3: Generate Hybrid Files
416
417Use the hybrid conversion script with suggested prefixes:
418
419```bash
420python3 ./meta-virtualization/scripts/oe-go-mod-fetcher-hybrid.py \
421 --recipedir ./meta-virtualization/recipes-containers/myapp/ \
422 --workdir ${WORKDIR} \
423 --git "github.com/containerd,k8s.io,sigs.k8s.io"
424```
425
426This generates three new files:
427- `go-mod-hybrid-gomod.inc` - SRC_URI entries for gomod:// fetcher (with inline checksums)
428- `go-mod-hybrid-git.inc` - SRC_URI entries for git:// fetcher (VCS provenance)
429- `go-mod-hybrid-cache.inc` - Module metadata for the git:// modules
430
431### Step 4: Configure Recipe for Mode Switching
432
433Update your recipe to allow switching between modes:
434
435```bitbake
436# GO_MOD_FETCH_MODE: "vcs" (all git://) or "hybrid" (gomod:// + git://)
437GO_MOD_FETCH_MODE ?= "vcs"
438
439# VCS mode: all modules via git://
440include ${@ "go-mod-git.inc" if d.getVar("GO_MOD_FETCH_MODE") == "vcs" else ""}
441include ${@ "go-mod-cache.inc" if d.getVar("GO_MOD_FETCH_MODE") == "vcs" else ""}
442
443# Hybrid mode: gomod:// for most, git:// for selected
444include ${@ "go-mod-hybrid-gomod.inc" if d.getVar("GO_MOD_FETCH_MODE") == "hybrid" else ""}
445include ${@ "go-mod-hybrid-git.inc" if d.getVar("GO_MOD_FETCH_MODE") == "hybrid" else ""}
446include ${@ "go-mod-hybrid-cache.inc" if d.getVar("GO_MOD_FETCH_MODE") == "hybrid" else ""}
447```
448
449### Step 5: Switch Modes
450
451Switch between modes in `local.conf`:
452
453```bash
454# Use hybrid mode (faster)
455GO_MOD_FETCH_MODE = "hybrid"
456
457# Or use VCS mode (full provenance)
458GO_MOD_FETCH_MODE = "vcs"
459```
460
461Then rebuild:
462
463```bash
464bitbake myapp
465```
466
467### Hybrid Script Options
468
469| Option | Description |
470|--------|-------------|
471| `--recipedir` | Recipe directory containing go-mod-git.inc and go-mod-cache.inc |
472| `--workdir` | BitBake workdir for size calculations (optional) |
473| `--git "prefixes"` | Comma-separated prefixes to keep as git:// (everything else becomes gomod://) |
474| `--gomod "prefixes"` | Comma-separated prefixes to convert to gomod:// (everything else stays git://) |
475| `--no-checksums` | Skip fetching SHA256 checksums (not recommended) |
476| `--list` | List all modules with sizes |
477| `--recommend` | Show size-based conversion recommendations |
478
479### Example: k3s Hybrid Conversion
480
481```bash
482# 1. Ensure VCS mode works first
483bitbake k3s
484
485# 2. Get recommendations
486bitbake k3s -c go_mod_recommend
487
488# 3. Convert with recommended prefixes (keep containerd, k8s.io as git://)
489python3 ./meta-virtualization/scripts/oe-go-mod-fetcher-hybrid.py \
490 --recipedir ./meta-virtualization/recipes-containers/k3s/ \
491 --git "github.com/containerd,k8s.io,sigs.k8s.io,github.com/rancher"
492
493# 4. Enable hybrid mode
494echo 'GO_MOD_FETCH_MODE = "hybrid"' >> conf/local.conf
495
496# 5. Build in hybrid mode
497bitbake k3s
498```
499
500### Troubleshooting Hybrid Builds
501
502#### "Permission denied" during do_unpack
503
504Go's module cache creates read-only files. The `go-mod-vcs.bbclass` includes automatic permission fixes, but if you hit this on an existing build:
505
506```bash
507chmod -R u+w ${WORKDIR}/sources/
508bitbake myapp
509```
510
511#### BitBake parse errors with special characters
512
513Module paths like `git.sr.ht/~sbinet/gg` contain characters (`~`) that can cause BitBake parse errors in variable flag names. The hybrid script uses inline checksums (`sha256sum=...` in the SRC_URI) to avoid this issue.
514
515---
516
382## Advanced: Manual Script Invocation 517## Advanced: Manual Script Invocation
383 518
384For cases where BitBake isn't available or you need more control: 519For cases where BitBake isn't available or you need more control:
diff --git a/scripts/data/hybrid-config.json b/scripts/data/hybrid-config.json
new file mode 100644
index 00000000..751e566f
--- /dev/null
+++ b/scripts/data/hybrid-config.json
@@ -0,0 +1,24 @@
1{
2 "_comment": "Configuration for oe-go-mod-fetcher-hybrid.py recommendations",
3 "_comment2": "Prefixes listed here are suggested for git:// (VCS provenance) vs gomod:// (fast proxy)",
4
5 "vcs_priority_prefixes": [
6 "github.com/containerd",
7 "github.com/rancher",
8 "github.com/k3s-io",
9 "k8s.io",
10 "sigs.k8s.io"
11 ],
12
13 "_vcs_priority_comment": "These prefixes are suggested to keep as git:// for VCS provenance and auditability",
14
15 "size_threshold_bytes": 1048576,
16 "_size_threshold_comment": "Modules larger than this (1MB) are suggested for gomod:// conversion",
17
18 "default_git_prefixes": [
19 "github.com/containerd",
20 "k8s.io",
21 "sigs.k8s.io"
22 ],
23 "_default_git_comment": "Default --git prefixes suggested when no VCS priority matches are found"
24}
diff --git a/scripts/oe-go-mod-fetcher-hybrid.py b/scripts/oe-go-mod-fetcher-hybrid.py
new file mode 100755
index 00000000..46d86fc4
--- /dev/null
+++ b/scripts/oe-go-mod-fetcher-hybrid.py
@@ -0,0 +1,696 @@
1#!/usr/bin/env python3
2
3# SPDX-License-Identifier: GPL-2.0-only
4#
5# go-dep processor
6#
7# Copyright (C) 2025 Bruce Ashfield
8#
9# This program is free software; you can redistribute it and/or modify
10# it under the terms of the GNU General Public License version 2 as
11# published by the Free Software Foundation.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License along
19# with this program; if not, write to the Free Software Foundation, Inc.,
20# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21
22"""
23oe-go-mod-fetcher-hybrid.py - Convert go-mod-vcs format to hybrid gomod:// + git:// format.
24
25This script reads existing go-mod-git.inc and go-mod-cache.inc files and converts
26them to a hybrid format that uses:
27- gomod:// for modules fetched from proxy.golang.org (fast, but no VCS control)
28- git:// for modules where you want SRCREV control (auditable, but slower)
29
30Usage:
31 # List all modules and their sizes
32 oe-go-mod-fetcher-hybrid.py --recipedir ./recipes-containers/k3s --list
33
34 # Show size-based recommendations
35 oe-go-mod-fetcher-hybrid.py --recipedir ./recipes-containers/k3s --recommend
36
37 # Convert specific modules to gomod:// (rest stay as git://)
38 oe-go-mod-fetcher-hybrid.py --recipedir ./recipes-containers/k3s \\
39 --gomod "github.com/spf13,golang.org/x,google.golang.org"
40
41 # Convert specific modules to git:// (rest become gomod://)
42 oe-go-mod-fetcher-hybrid.py --recipedir ./recipes-containers/k3s \\
43 --git "github.com/containerd,github.com/rancher"
44
45 # Use a config file
46 oe-go-mod-fetcher-hybrid.py --recipedir ./recipes-containers/k3s \\
47 --config hybrid-config.json
48"""
49
50import argparse
51import json
52import re
53import sys
54import subprocess
55import os
56import hashlib
57import urllib.request
58import urllib.error
59import concurrent.futures
60from pathlib import Path
61from collections import defaultdict
62from typing import Optional
63
64
65# Default configuration - used if data/hybrid-config.json is not found
66DEFAULT_CONFIG = {
67 "vcs_priority_prefixes": [
68 "github.com/containerd",
69 "github.com/rancher",
70 "github.com/k3s-io",
71 "k8s.io",
72 "sigs.k8s.io",
73 ],
74 "size_threshold_bytes": 1048576, # 1MB
75 "default_git_prefixes": [
76 "github.com/containerd",
77 "k8s.io",
78 "sigs.k8s.io",
79 ],
80}
81
82
83def load_hybrid_config() -> dict:
84 """
85 Load hybrid mode configuration from data/hybrid-config.json.
86
87 Falls back to DEFAULT_CONFIG if the file doesn't exist.
88 The config file is looked for relative to this script's location.
89 """
90 script_dir = Path(__file__).parent
91 config_path = script_dir / "data" / "hybrid-config.json"
92
93 if config_path.exists():
94 try:
95 with open(config_path) as f:
96 config = json.load(f)
97 # Merge with defaults for any missing keys
98 for key, value in DEFAULT_CONFIG.items():
99 if key not in config:
100 config[key] = value
101 return config
102 except (json.JSONDecodeError, IOError) as e:
103 print(f"Warning: Could not load {config_path}: {e}", file=sys.stderr)
104 print("Using default configuration", file=sys.stderr)
105
106 return DEFAULT_CONFIG.copy()
107
108
109def fetch_gomod_checksum(module: str, version: str) -> Optional[str]:
110 """
111 Fetch SHA256 checksum for a module from proxy.golang.org.
112
113 The checksum is calculated by downloading the .zip file and hashing it.
114 """
115 # Escape capital letters in module path (Go proxy convention)
116 escaped_module = re.sub(r'([A-Z])', lambda m: '!' + m.group(1).lower(), module)
117 escaped_version = re.sub(r'([A-Z])', lambda m: '!' + m.group(1).lower(), version)
118
119 url = f"https://proxy.golang.org/{escaped_module}/@v/{escaped_version}.zip"
120
121 try:
122 req = urllib.request.Request(url, headers={'User-Agent': 'oe-go-mod-fetcher-hybrid/1.0'})
123 with urllib.request.urlopen(req, timeout=30) as response:
124 data = response.read()
125 return hashlib.sha256(data).hexdigest()
126 except urllib.error.HTTPError as e:
127 print(f" WARNING: Failed to fetch {module}@{version}: HTTP {e.code}", file=sys.stderr)
128 return None
129 except urllib.error.URLError as e:
130 print(f" WARNING: Failed to fetch {module}@{version}: {e.reason}", file=sys.stderr)
131 return None
132 except Exception as e:
133 print(f" WARNING: Failed to fetch {module}@{version}: {e}", file=sys.stderr)
134 return None
135
136
137def fetch_checksums_parallel(modules: list[dict], max_workers: int = 8) -> dict[str, str]:
138 """
139 Fetch checksums for multiple modules in parallel.
140
141 Returns dict mapping "module@version" -> "sha256sum"
142 """
143 checksums = {}
144
145 def fetch_one(mod):
146 key = f"{mod['module']}@{mod['version']}"
147 checksum = fetch_gomod_checksum(mod['module'], mod['version'])
148 return key, checksum
149
150 print(f"Fetching checksums for {len(modules)} modules from proxy.golang.org...")
151
152 with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
153 futures = {executor.submit(fetch_one, mod): mod for mod in modules}
154 completed = 0
155 for future in concurrent.futures.as_completed(futures):
156 key, checksum = future.result()
157 completed += 1
158 if checksum:
159 checksums[key] = checksum
160 # Progress indicator
161 if completed % 20 == 0 or completed == len(modules):
162 print(f" Progress: {completed}/{len(modules)} modules")
163
164 return checksums
165
166
167def parse_go_mod_cache_inc(cache_inc_path: Path) -> list[dict]:
168 """Parse GO_MODULE_CACHE_DATA from go-mod-cache.inc."""
169 content = cache_inc_path.read_text()
170
171 # Find the JSON array in GO_MODULE_CACHE_DATA
172 match = re.search(r"GO_MODULE_CACHE_DATA\s*=\s*'(\[.*\])'", content, re.DOTALL)
173 if not match:
174 raise ValueError(f"Could not find GO_MODULE_CACHE_DATA in {cache_inc_path}")
175
176 json_str = match.group(1).replace('\\\n', '')
177 return json.loads(json_str)
178
179
180def parse_go_mod_git_inc(git_inc_path: Path) -> dict[str, dict]:
181 """Parse SRC_URI entries from go-mod-git.inc to extract commit and repo info."""
182 content = git_inc_path.read_text()
183
184 # Map vcs_hash -> {repo, commit, full_entry}
185 vcs_to_info = {}
186
187 # Pattern: git://host/path;...;rev=COMMIT;...;destsuffix=vcs_cache/VCS_HASH
188 for line in content.split('\n'):
189 if not line.startswith('SRC_URI +='):
190 continue
191
192 # Extract the git:// URL part
193 match = re.search(r'git://([^;]+);([^"]*);destsuffix=vcs_cache/([a-f0-9]+)', line)
194 if match:
195 repo_path = match.group(1)
196 params = match.group(2)
197 vcs_hash = match.group(3)
198
199 # Extract rev from params
200 rev_match = re.search(r'rev=([a-f0-9]+)', params)
201 commit = rev_match.group(1) if rev_match else ''
202
203 vcs_to_info[vcs_hash] = {
204 'repo': f"https://{repo_path}",
205 'commit': commit,
206 'full_line': line.strip()
207 }
208
209 return vcs_to_info
210
211
212def get_repo_sizes(vcs_info: dict, workdir: Optional[Path] = None) -> dict[str, int]:
213 """Get sizes of VCS cache directories if they exist."""
214 sizes = {}
215
216 if workdir is None:
217 return sizes
218
219 # Try common locations for vcs_cache
220 for subpath in ['sources/vcs_cache', 'vcs_cache']:
221 vcs_cache_dir = workdir / subpath
222 if vcs_cache_dir.exists():
223 break
224 else:
225 return sizes
226
227 for vcs_hash in vcs_info.keys():
228 cache_path = vcs_cache_dir / vcs_hash
229 if cache_path.exists():
230 try:
231 result = subprocess.run(
232 ['du', '-sb', str(cache_path)],
233 capture_output=True, text=True, timeout=10
234 )
235 if result.returncode == 0:
236 size = int(result.stdout.split()[0])
237 sizes[vcs_hash] = size
238 except (subprocess.TimeoutExpired, ValueError):
239 pass
240
241 return sizes
242
243
244def format_size(size_bytes: int) -> str:
245 """Format bytes as human readable."""
246 for unit in ['B', 'KB', 'MB', 'GB']:
247 if size_bytes < 1024:
248 return f"{size_bytes:.1f} {unit}"
249 size_bytes /= 1024
250 return f"{size_bytes:.1f} TB"
251
252
253def list_modules(modules: list[dict], vcs_info: dict, sizes: dict) -> None:
254 """List all modules with their info."""
255 # Group by module path prefix
256 by_prefix = defaultdict(list)
257 for mod in modules:
258 parts = mod['module'].split('/')
259 if len(parts) >= 2:
260 prefix = '/'.join(parts[:2])
261 else:
262 prefix = mod['module']
263 by_prefix[prefix].append(mod)
264
265 print(f"\n{'Module':<60} {'Version':<25} {'Size':>12}")
266 print("=" * 100)
267
268 total_size = 0
269 for prefix in sorted(by_prefix.keys()):
270 prefix_size = 0
271 for mod in sorted(by_prefix[prefix], key=lambda m: m['module']):
272 vcs_hash = mod.get('vcs_hash', '')
273 size = sizes.get(vcs_hash, 0)
274 prefix_size += size
275 total_size += size
276
277 size_str = format_size(size) if size > 0 else '-'
278 print(f" {mod['module']:<58} {mod['version']:<25} {size_str:>12}")
279
280 if len(by_prefix[prefix]) > 1:
281 print(f" {'[subtotal]':<58} {'':<25} {format_size(prefix_size):>12}")
282 print()
283
284 print("=" * 100)
285 print(f"Total: {len(modules)} modules, {format_size(total_size)}")
286
287
288def recommend_conversion(modules: list[dict], vcs_info: dict, sizes: dict, recipedir: Path = None) -> None:
289 """Recommend modules to convert based on size.
290
291 Configuration is loaded from data/hybrid-config.json if it exists,
292 otherwise defaults are used. This allows easy customization of:
293 - vcs_priority_prefixes: modules to suggest keeping as git://
294 - size_threshold_bytes: threshold for suggesting gomod:// conversion
295 - default_git_prefixes: fallback prefixes if no matches found
296 """
297 # Load configuration from external file (or use defaults)
298 config = load_hybrid_config()
299 vcs_priority_patterns = config.get('vcs_priority_prefixes', DEFAULT_CONFIG['vcs_priority_prefixes'])
300 size_threshold = config.get('size_threshold_bytes', DEFAULT_CONFIG['size_threshold_bytes'])
301 default_git_prefixes = config.get('default_git_prefixes', DEFAULT_CONFIG['default_git_prefixes'])
302
303 # Calculate sizes per prefix
304 prefix_sizes = defaultdict(lambda: {'size': 0, 'count': 0, 'modules': []})
305
306 for mod in modules:
307 parts = mod['module'].split('/')
308 if len(parts) >= 2:
309 prefix = '/'.join(parts[:2])
310 else:
311 prefix = mod['module']
312
313 vcs_hash = mod.get('vcs_hash', '')
314 size = sizes.get(vcs_hash, 0)
315
316 prefix_sizes[prefix]['size'] += size
317 prefix_sizes[prefix]['count'] += 1
318 prefix_sizes[prefix]['modules'].append(mod['module'])
319
320 # Sort by size descending
321 sorted_prefixes = sorted(prefix_sizes.items(), key=lambda x: x[1]['size'], reverse=True)
322
323 total_size = sum(p['size'] for p in prefix_sizes.values())
324
325 print("\n" + "=" * 80)
326 print("GO MODULE HYBRID CONVERSION RECOMMENDATIONS")
327 print("=" * 80)
328
329 print(f"\n{'Prefix':<45} {'Count':>8} {'Size':>12} {'% Total':>10}")
330 print("-" * 80)
331
332 gomod_candidates = []
333 git_candidates = []
334
335 for prefix, info in sorted_prefixes[:25]: # Top 25
336 pct = (info['size'] / total_size * 100) if total_size > 0 else 0
337
338 print(f"{prefix:<45} {info['count']:>8} {format_size(info['size']):>12} {pct:>9.1f}%")
339
340 # Check if this is a VCS priority prefix
341 is_vcs_priority = any(prefix.startswith(p) or prefix == p for p in vcs_priority_patterns)
342
343 if is_vcs_priority:
344 git_candidates.append(prefix)
345 elif info['size'] > size_threshold:
346 gomod_candidates.append(prefix)
347
348 print("-" * 80)
349 print(f"{'Total':<45} {len(modules):>8} {format_size(total_size):>12}")
350
351 if gomod_candidates:
352 print("\n" + "=" * 80)
353 print("LARGEST MODULE PREFIXES (top candidates for gomod:// proxy fetch):")
354 print("=" * 80)
355 print("\n " + ",".join(gomod_candidates[:10]))
356
357 # Calculate potential savings
358 gomod_size = sum(prefix_sizes[p]['size'] for p in gomod_candidates)
359 if total_size > 0:
360 print(f"\n These account for {format_size(gomod_size)} ({gomod_size/total_size*100:.0f}% of total)")
361
362 print("\n" + "=" * 80)
363 print("SUGGESTED --git PREFIXES (keep as git:// for VCS control):")
364 print("=" * 80)
365
366 if git_candidates:
367 print("\n " + ",".join(git_candidates))
368 else:
369 print("\n (none identified)")
370
371 print("\n NOTE: With --git, ALL other modules automatically become gomod://")
372 print(" (not just the large ones listed above)")
373
374 # Output conversion command
375 print("\n" + "=" * 80)
376 print("TO CONVERT TO HYBRID FORMAT:")
377 print("=" * 80)
378 print()
379
380 # Get script path (relative to this script's location)
381 script_path = Path(__file__).resolve()
382
383 # Use default_git_prefixes from config as fallback
384 fallback_git = ','.join(default_git_prefixes)
385
386 if recipedir:
387 print(f" python3 {script_path} \\")
388 print(f" --recipedir {recipedir} \\")
389 if git_candidates:
390 print(f" --git \"{','.join(git_candidates)}\"")
391 else:
392 print(f" --git \"{fallback_git}\"")
393 else:
394 print(f" python3 {script_path} \\")
395 print(f" --recipedir <your-recipe-directory> \\")
396 if git_candidates:
397 print(f" --git \"{','.join(git_candidates)}\"")
398 else:
399 print(f" --git \"{fallback_git}\"")
400
401
402def fetch_gomod_checksum(module: str, version: str) -> Optional[str]:
403 """Fetch SHA256 checksum for a module from proxy.golang.org."""
404 import urllib.request
405 import hashlib
406
407 # Escape module path (uppercase letters)
408 escaped = re.sub(r'([A-Z])', lambda m: '!' + m.group(1).lower(), module)
409
410 url = f"https://proxy.golang.org/{escaped}/@v/{version}.zip"
411
412 try:
413 with urllib.request.urlopen(url, timeout=30) as response:
414 content = response.read()
415 return hashlib.sha256(content).hexdigest()
416 except Exception as e:
417 print(f" Warning: Could not fetch checksum for {module}@{version}: {e}", file=sys.stderr)
418 return None
419
420
421def generate_hybrid_files(
422 modules: list[dict],
423 vcs_info: dict,
424 git_prefixes: list[str],
425 gomod_prefixes: list[str],
426 output_dir: Path,
427 fetch_checksums: bool = False
428) -> None:
429 """Generate hybrid include files."""
430
431 # Ensure output directory exists
432 output_dir.mkdir(parents=True, exist_ok=True)
433
434 git_modules = []
435 gomod_modules = []
436
437 # Classify modules
438 for mod in modules:
439 mod_path = mod['module']
440
441 # Check if explicitly marked as git://
442 is_git = any(mod_path.startswith(prefix) for prefix in git_prefixes)
443
444 # Check if explicitly marked as gomod://
445 is_gomod = any(mod_path.startswith(prefix) for prefix in gomod_prefixes)
446
447 if is_git and is_gomod:
448 print(f"Warning: {mod_path} matches both git and gomod prefixes, using git://",
449 file=sys.stderr)
450 is_gomod = False
451
452 # Default: if git_prefixes specified, everything else is gomod
453 # If gomod_prefixes specified, everything else is git
454 if git_prefixes and not is_git and not is_gomod:
455 is_gomod = True
456 elif gomod_prefixes and not is_git and not is_gomod:
457 is_git = True
458 elif not git_prefixes and not gomod_prefixes:
459 # No prefixes specified - default to gomod for all
460 is_gomod = True
461
462 if is_gomod:
463 gomod_modules.append(mod)
464 else:
465 git_modules.append(mod)
466
467 print(f"\nClassification:")
468 print(f" gomod:// (proxy): {len(gomod_modules)} modules")
469 print(f" git:// (VCS): {len(git_modules)} modules")
470
471 # Fetch checksums in parallel (always, unless --no-checksums)
472 checksum_map = {}
473 if fetch_checksums and gomod_modules:
474 checksum_map = fetch_checksums_parallel(gomod_modules)
475 if len(checksum_map) < len(gomod_modules):
476 missing = len(gomod_modules) - len(checksum_map)
477 print(f" WARNING: Failed to fetch {missing} checksums", file=sys.stderr)
478
479 # Generate gomod include file
480 gomod_lines = [
481 "# Generated by oe-go-mod-fetcher-hybrid.py",
482 "# Go modules fetched from proxy.golang.org (fast path)",
483 "#",
484 "# These modules are fetched as pre-built zip files from the Go proxy.",
485 "# They do not provide VCS commit-level provenance but are much faster.",
486 "",
487 "inherit go-mod",
488 ""
489 ]
490
491 for mod in sorted(gomod_modules, key=lambda m: m['module']):
492 key = f"{mod['module']}@{mod['version']}"
493 if key in checksum_map:
494 # Include checksum inline to avoid BitBake variable flag name issues
495 # (e.g., ~ character in git.sr.ht/~sbinet/gg causes parse errors)
496 gomod_lines.append(f'SRC_URI += "gomod://{mod["module"]};version={mod["version"]};sha256sum={checksum_map[key]}"')
497 else:
498 gomod_lines.append(f'SRC_URI += "gomod://{mod["module"]};version={mod["version"]}"')
499
500 gomod_file = output_dir / 'go-mod-hybrid-gomod.inc'
501 gomod_file.write_text('\n'.join(gomod_lines) + '\n')
502 print(f"\nWrote {gomod_file}")
503
504 if not fetch_checksums and gomod_modules:
505 print(f" WARNING: Checksums not fetched (use default or --fetch-checksums)")
506 print(f" BitBake will fail on first fetch and show required checksums")
507
508 # Generate git include file
509 git_lines = [
510 "# Generated by oe-go-mod-fetcher-hybrid.py",
511 "# Go modules fetched from git repositories (VCS path)",
512 "#",
513 "# These modules are fetched directly from their git repositories.",
514 "# They provide full VCS provenance and allow easy SRCREV bumping.",
515 ""
516 ]
517
518 for mod in sorted(git_modules, key=lambda m: m['module']):
519 vcs_hash = mod.get('vcs_hash', '')
520 if vcs_hash in vcs_info:
521 git_lines.append(vcs_info[vcs_hash]['full_line'])
522
523 git_file = output_dir / 'go-mod-hybrid-git.inc'
524 git_file.write_text('\n'.join(git_lines) + '\n')
525 print(f"Wrote {git_file}")
526
527 # Generate cache metadata file for git modules
528 cache_lines = [
529 "# Generated by oe-go-mod-fetcher-hybrid.py",
530 "# Metadata for git-fetched modules (VCS path)",
531 "# Used by go-mod-vcs.bbclass to build module cache from git checkouts",
532 "",
533 "inherit go-mod-vcs",
534 "",
535 ]
536
537 # Format GO_MODULE_CACHE_DATA with one entry per line for readability
538 # (matches go-mod-cache.inc format: '[\
539 # {entry1},\
540 # {entry2}]')
541 cache_lines.append("# Module metadata for cache building (one module per line)")
542 if git_modules:
543 cache_lines.append("GO_MODULE_CACHE_DATA = '[\\")
544 for i, mod in enumerate(sorted(git_modules, key=lambda m: m['module'])):
545 entry = json.dumps(mod, separators=(',', ':')) # Compact single-line JSON per entry
546 if i < len(git_modules) - 1:
547 cache_lines.append(f"{entry},\\")
548 else:
549 cache_lines.append(f"{entry}]'")
550 else:
551 cache_lines.append("GO_MODULE_CACHE_DATA = '[]'")
552
553 cache_file = output_dir / 'go-mod-hybrid-cache.inc'
554 cache_file.write_text('\n'.join(cache_lines) + '\n')
555 print(f"Wrote {cache_file}")
556
557 # Print usage instructions
558 print("\n" + "=" * 70)
559 print("NEXT STEPS:")
560 print("=" * 70)
561 print("""
5621. Update your recipe to enable mode switching:
563
564 # GO_MOD_FETCH_MODE: "vcs" (all git://) or "hybrid" (gomod:// + git://)
565 GO_MOD_FETCH_MODE ?= "vcs"
566
567 # VCS mode: all modules via git://
568 include ${@ "go-mod-git.inc" if d.getVar("GO_MOD_FETCH_MODE") == "vcs" else ""}
569 include ${@ "go-mod-cache.inc" if d.getVar("GO_MOD_FETCH_MODE") == "vcs" else ""}
570
571 # Hybrid mode: gomod:// for most, git:// for selected
572 include ${@ "go-mod-hybrid-gomod.inc" if d.getVar("GO_MOD_FETCH_MODE") == "hybrid" else ""}
573 include ${@ "go-mod-hybrid-git.inc" if d.getVar("GO_MOD_FETCH_MODE") == "hybrid" else ""}
574 include ${@ "go-mod-hybrid-cache.inc" if d.getVar("GO_MOD_FETCH_MODE") == "hybrid" else ""}
575
5762. Run bitbake once in hybrid mode to fetch gomod:// checksums:
577
578 GO_MOD_FETCH_MODE = "hybrid" # in local.conf
579 bitbake <recipe>
580
5813. Copy the checksums from the error log into go-mod-hybrid-gomod.inc
582
5834. Build again - or switch back to VCS mode anytime:
584
585 GO_MOD_FETCH_MODE = "vcs" # full VCS provenance
586 GO_MOD_FETCH_MODE = "hybrid" # faster proxy fetch
587""")
588
589
590def main():
591 parser = argparse.ArgumentParser(
592 description='Convert go-mod-vcs format to hybrid gomod:// + git:// format',
593 formatter_class=argparse.RawDescriptionHelpFormatter,
594 epilog=__doc__
595 )
596
597 parser.add_argument('--recipedir', type=Path, required=True,
598 help='Recipe directory containing go-mod-git.inc and go-mod-cache.inc')
599
600 parser.add_argument('--workdir', type=Path, default=None,
601 help='BitBake workdir containing vcs_cache (for size calculations)')
602
603 # Actions
604 parser.add_argument('--list', action='store_true',
605 help='List all modules with sizes')
606
607 parser.add_argument('--recommend', action='store_true',
608 help='Show size-based recommendations for conversion')
609
610 # Conversion options
611 parser.add_argument('--git', type=str, default='',
612 help='Comma-separated module prefixes to keep as git:// (rest become gomod://)')
613
614 parser.add_argument('--gomod', type=str, default='',
615 help='Comma-separated module prefixes to convert to gomod:// (rest stay git://)')
616
617 parser.add_argument('--config', type=Path, default=None,
618 help='JSON config file with git/gomod prefix lists')
619
620 parser.add_argument('--no-checksums', action='store_true',
621 help='Skip fetching SHA256 checksums (not recommended)')
622
623 parser.add_argument('--output-dir', type=Path, default=None,
624 help='Output directory for hybrid files (default: recipedir)')
625
626 args = parser.parse_args()
627
628 # Validate inputs
629 cache_inc = args.recipedir / 'go-mod-cache.inc'
630 git_inc = args.recipedir / 'go-mod-git.inc'
631
632 if not cache_inc.exists():
633 print(f"Error: {cache_inc} not found", file=sys.stderr)
634 sys.exit(1)
635
636 if not git_inc.exists():
637 print(f"Error: {git_inc} not found", file=sys.stderr)
638 sys.exit(1)
639
640 # Parse existing files
641 print(f"Loading {cache_inc}...")
642 modules = parse_go_mod_cache_inc(cache_inc)
643 print(f" Found {len(modules)} modules")
644
645 print(f"Loading {git_inc}...")
646 vcs_info = parse_go_mod_git_inc(git_inc)
647 print(f" Found {len(vcs_info)} VCS entries")
648
649 # Get sizes if workdir provided
650 sizes = {}
651 if args.workdir:
652 print(f"Calculating sizes from {args.workdir}...")
653 sizes = get_repo_sizes(vcs_info, args.workdir)
654 print(f" Got sizes for {len(sizes)} repos")
655
656 # Handle actions
657 if args.list:
658 list_modules(modules, vcs_info, sizes)
659 return
660
661 if args.recommend:
662 recommend_conversion(modules, vcs_info, sizes, args.recipedir)
663 return
664
665 # Handle conversion
666 git_prefixes = [p.strip() for p in args.git.split(',') if p.strip()]
667 gomod_prefixes = [p.strip() for p in args.gomod.split(',') if p.strip()]
668
669 if args.config:
670 if args.config.exists():
671 config = json.loads(args.config.read_text())
672 git_prefixes.extend(config.get('git', []))
673 gomod_prefixes.extend(config.get('gomod', []))
674 else:
675 print(f"Error: Config file {args.config} not found", file=sys.stderr)
676 sys.exit(1)
677
678 if not git_prefixes and not gomod_prefixes:
679 print("Error: Specify --git, --gomod, --list, or --recommend", file=sys.stderr)
680 parser.print_help()
681 sys.exit(1)
682
683 output_dir = args.output_dir or args.recipedir
684
685 generate_hybrid_files(
686 modules=modules,
687 vcs_info=vcs_info,
688 git_prefixes=git_prefixes,
689 gomod_prefixes=gomod_prefixes,
690 output_dir=output_dir,
691 fetch_checksums=not args.no_checksums # Default: fetch checksums
692 )
693
694
695if __name__ == '__main__':
696 main()