# # Copyright OpenEmbedded Contributors # # SPDX-License-Identifier: MIT # # go-mod-vcs.bbclass # # Provides tasks for building Go module cache from VCS (git) sources. # This enables fully offline Go builds using modules fetched via BitBake's # git fetcher instead of the Go proxy. # # USAGE: # 1. Add to recipe: inherit go-mod-vcs # 2. Define GO_MODULE_CACHE_DATA as JSON array of module metadata # 3. Include go-mod-git.inc for SRC_URI git entries # 4. Include go-mod-cache.inc for GO_MODULE_CACHE_DATA # # DEPENDENCIES: # - Works with oe-core's go.bbclass and go-mod.bbclass # - h1: checksums calculated in pure Python (with go-dirhash-native fallback) # - Optional: go-dirhash-native for fallback checksum calculation # # TASKS PROVIDED: # - do_create_module_cache: Builds module cache from git repos # - do_sync_go_files: Synchronizes go.sum with cache checksums # # GENERATED FILES: # The oe-go-mod-fetcher.py script generates two .inc files per recipe: # - go-mod-git.inc: SRC_URI and SRCREV entries for git fetching # - go-mod-cache.inc: GO_MODULE_CACHE_DATA JSON + inherit go-mod-vcs # # This class extracts the reusable Python task code, so generated .inc files # only contain recipe-specific data (SRC_URI entries and module metadata). # # ARCHITECTURE NOTES: # - assemble_zip() must create zips INSIDE TemporaryDirectory context # - synthesize_go_mod() preserves go version directive from original go.mod # # CONFIGURATION: # GO_MOD_SKIP_ZIP_EXTRACTION - Set to "1" to skip extracting zips to pkg/mod # Go can extract on-demand from cache (experimental) # # Override go.bbclass defaults for VCS-based module caching # These ensure offline builds use our pre-built module cache export GOMODCACHE = "${S}/pkg/mod" export GOPROXY = "off" export GOSUMDB = "off" export GOTOOLCHAIN = "local" python do_create_module_cache() { """ Build Go module cache from downloaded git repositories. This creates the same cache structure as oe-core's gomod.bbclass. NOTE: h1: checksums are calculated in pure Python during zip creation. Falls back to go-dirhash-native if Python hash fails. """ import hashlib import json import os import shutil import subprocess import zipfile import stat import base64 from pathlib import Path from datetime import datetime # Check for optional go-dirhash-native fallback tool go_dirhash_helper = Path(d.getVar('STAGING_BINDIR_NATIVE') or '') / "dirhash" if not go_dirhash_helper.exists(): go_dirhash_helper = None bb.debug(1, "go-dirhash-native not available, using pure Python for h1: checksums") def calculate_h1_hash_python(zip_path): """Calculate Go module h1: hash in pure Python.""" lines = [] with zipfile.ZipFile(zip_path, 'r') as zf: for info in sorted(zf.infolist(), key=lambda x: x.filename): if info.is_dir(): continue file_data = zf.read(info.filename) file_hash = hashlib.sha256(file_data).hexdigest() lines.append(f"{file_hash} {info.filename}\n") summary = "".join(lines).encode('utf-8') final_hash = hashlib.sha256(summary).digest() return "h1:" + base64.b64encode(final_hash).decode('ascii') def calculate_h1_hash_native(zip_path): """Calculate Go module h1: hash using go-dirhash-native (fallback).""" if go_dirhash_helper is None: return None result = subprocess.run( [str(go_dirhash_helper), str(zip_path)], capture_output=True, text=True, check=False, timeout=60 ) if result.returncode != 0: return None hash_value = result.stdout.strip() if not hash_value.startswith("h1:"): return None return hash_value # Define helper functions BEFORE they are used def escape_module_path(path): """Escape capital letters using exclamation points (same as BitBake gomod.py)""" import re return re.sub(r'([A-Z])', lambda m: '!' + m.group(1).lower(), path) def sanitize_module_name(name): """Remove quotes from module names""" if not name: return name stripped = name.strip() if len(stripped) >= 2 and stripped[0] == '"' and stripped[-1] == '"': return stripped[1:-1] return stripped go_sum_hashes = {} go_sum_entries = {} go_sum_path = Path(d.getVar('S')) / "src" / "import" / "go.sum" if go_sum_path.exists(): with open(go_sum_path, 'r') as f: for line in f: parts = line.strip().split() if len(parts) != 3: continue mod, ver, hash_value = parts mod = sanitize_module_name(mod) go_sum_entries[(mod, ver)] = hash_value if mod.endswith('/go.mod') or not hash_value.startswith('h1:'): continue key = f"{mod}@{ver}" go_sum_hashes.setdefault(key, hash_value) def load_require_versions(go_mod_path): versions = {} if not go_mod_path.exists(): return versions in_block = False with go_mod_path.open('r', encoding='utf-8') as f: for raw_line in f: line = raw_line.strip() if line.startswith('require ('): in_block = True continue if in_block and line == ')': in_block = False continue if line.startswith('require ') and '(' not in line: parts = line.split() if len(parts) >= 3: versions[sanitize_module_name(parts[1])] = parts[2] continue if in_block and line and not line.startswith('//'): parts = line.split() if len(parts) >= 2: versions[sanitize_module_name(parts[0])] = parts[1] return versions def load_replacements(go_mod_path): replacements = {} if not go_mod_path.exists(): return replacements def parse_replace_line(content): if '//' in content: content = content.split('//', 1)[0].strip() if '=>' not in content: return left, right = [part.strip() for part in content.split('=>', 1)] left_parts = left.split() right_parts = right.split() if not left_parts or not right_parts: return old_module = sanitize_module_name(left_parts[0]) old_version = left_parts[1] if len(left_parts) > 1 else None new_module = sanitize_module_name(right_parts[0]) new_version = right_parts[1] if len(right_parts) > 1 else None replacements[old_module] = { "old_version": old_version, "new_module": new_module, "new_version": new_version, } in_block = False with go_mod_path.open('r', encoding='utf-8') as f: for raw_line in f: line = raw_line.strip() if line.startswith('replace ('): in_block = True continue if in_block and line == ')': in_block = False continue if line.startswith('replace ') and '(' not in line: parse_replace_line(line[len('replace '):]) continue if in_block and line and not line.startswith('//'): parse_replace_line(line) return replacements go_mod_path = Path(d.getVar('S')) / "src" / "import" / "go.mod" require_versions = load_require_versions(go_mod_path) replacements = load_replacements(go_mod_path) def duplicate_module_version(module_path, source_version, alias_version, timestamp): if alias_version == source_version: return escaped_module = escape_module_path(module_path) cache_dir = Path(d.getVar('S')) / "pkg" / "mod" / "cache" / "download" download_dir = cache_dir / escaped_module / "@v" download_dir.mkdir(parents=True, exist_ok=True) escaped_source_version = escape_module_path(source_version) escaped_alias_version = escape_module_path(alias_version) source_base = download_dir / escaped_source_version alias_base = download_dir / escaped_alias_version if not (source_base.with_suffix('.zip').exists() and source_base.with_suffix('.mod').exists()): return if alias_base.with_suffix('.zip').exists(): return import shutil shutil.copy2(source_base.with_suffix('.zip'), alias_base.with_suffix('.zip')) shutil.copy2(source_base.with_suffix('.mod'), alias_base.with_suffix('.mod')) ziphash_src = source_base.with_suffix('.ziphash') if ziphash_src.exists(): shutil.copy2(ziphash_src, alias_base.with_suffix('.ziphash')) info_path = alias_base.with_suffix('.info') info_data = { "Version": alias_version, "Time": timestamp } with open(info_path, 'w') as f: json.dump(info_data, f) bb.note(f"Duplicated module version {module_path}@{alias_version} from {source_version} for replace directive") def create_module_zip(module_path, version, vcs_path, subdir, timestamp): """Create module zip file from git repository""" module_path = sanitize_module_name(module_path) # Detect canonical module path FIRST from go.mod. # This prevents creating duplicate cache entries for replace directives. # For "github.com/google/cadvisor => github.com/k3s-io/cadvisor", the # k3s-io fork declares "module github.com/google/cadvisor" in its go.mod, # so we create the cache ONLY at github.com/google/cadvisor. def detect_canonical_module_path(vcs_path, subdir_hint, requested_module): """ Read go.mod file to determine the canonical module path. This is critical for replace directives - always use the path declared in the module's own go.mod, not the replacement path. """ path = Path(vcs_path) # Build list of candidate subdirs to check candidates = [] if subdir_hint: candidates.append(subdir_hint) # Also try deriving subdir from module path parts = requested_module.split('/') if len(parts) > 3: guess = '/'.join(parts[3:]) if guess and guess not in candidates: candidates.append(guess) # Always check root directory last if '' not in candidates: candidates.append('') # Search for go.mod file and read its module declaration for candidate in candidates: gomod_file = path / candidate / "go.mod" if candidate else path / "go.mod" if not gomod_file.exists(): continue try: with gomod_file.open('r', encoding='utf-8') as fh: first_line = fh.readline().strip() # Parse: "module github.com/example/repo" if first_line.startswith('module '): canonical = first_line[7:].strip() # Skip "module " # Remove any inline comments if '//' in canonical: canonical = canonical.split('//')[0].strip() # CRITICAL: Remove quotes from module names canonical = sanitize_module_name(canonical) return canonical, candidate except (UnicodeDecodeError, IOError): continue # Fallback: if no go.mod found, use requested path # This is normal for some modules (e.g., pure Go stdlib-like packages) bb.note(f"No go.mod found for {requested_module} in {vcs_path}, using requested path") return requested_module, '' canonical_module_path, detected_subdir = detect_canonical_module_path(vcs_path, subdir, module_path) # Keep track of the original (requested) module path for replaced modules # We'll need to create symlinks from requested -> canonical after cache creation requested_module_path = module_path # If canonical path differs from requested path, this is a replace directive if canonical_module_path != module_path: bb.note(f"Replace directive detected: {module_path} -> canonical {canonical_module_path}") bb.note(f"Creating cache at canonical path, will symlink from requested path") module_path = canonical_module_path escaped_module = escape_module_path(module_path) escaped_version = escape_module_path(version) # Create cache directory structure using CANONICAL module path workdir = Path(d.getVar('WORKDIR')) s = Path(d.getVar('S')) cache_dir = s / "pkg" / "mod" / "cache" / "download" download_dir = cache_dir / escaped_module / "@v" download_dir.mkdir(parents=True, exist_ok=True) bb.note(f"Creating cache for {module_path}@{version}") # Override subdir with detected subdir from canonical path detection if detected_subdir: subdir = detected_subdir def detect_subdir() -> str: hinted = subdir or "" path = Path(vcs_path) def path_exists(rel: str) -> bool: if not rel: return True return (path / rel).exists() candidate_order = [] if hinted and hinted not in candidate_order: candidate_order.append(hinted) module_parts = module_path.split('/') if len(module_parts) > 3: guess = '/'.join(module_parts[3:]) if guess and guess not in candidate_order: candidate_order.append(guess) target_header = f"module {module_path}\n" found = None try: for go_mod in path.rglob('go.mod'): rel = go_mod.relative_to(path) if any(part.startswith('.') and part != '.' for part in rel.parts): continue if 'vendor' in rel.parts: continue try: with go_mod.open('r', encoding='utf-8') as fh: first_line = fh.readline() except UnicodeDecodeError: continue if first_line.strip() == target_header.strip(): rel_dir = go_mod.parent.relative_to(path).as_posix() found = rel_dir break except Exception: pass if found is not None and found not in candidate_order: candidate_order.insert(0, found) candidate_order.append('') for candidate in candidate_order: if path_exists(candidate): return candidate return '' subdir_resolved = detect_subdir() # 1. Create .info file info_path = download_dir / f"{escaped_version}.info" info_data = { "Version": version, "Time": timestamp } with open(info_path, 'w') as f: json.dump(info_data, f) bb.debug(1, f"Created {info_path}") # 2. Create .mod file mod_path = download_dir / f"{escaped_version}.mod" effective_subdir = subdir_resolved def candidate_subdirs(): candidates = [] parts = module_path.split('/') if len(parts) >= 4: extra = '/'.join(parts[3:]) if extra: candidates.append(extra) if effective_subdir: candidates.insert(0, effective_subdir) else: candidates.append('') suffix = parts[-1] if suffix.startswith('v') and suffix[1:].isdigit(): suffix_path = f"{effective_subdir}/{suffix}" if effective_subdir else suffix if suffix_path not in candidates: candidates.insert(0, suffix_path) if '' not in candidates: candidates.append('') return candidates gomod_file = None for candidate in candidate_subdirs(): path_candidate = Path(vcs_path) / candidate / "go.mod" if candidate else Path(vcs_path) / "go.mod" if path_candidate.exists(): gomod_file = path_candidate if candidate != effective_subdir: effective_subdir = candidate break subdir_resolved = effective_subdir if gomod_file is None: gomod_file = Path(vcs_path) / effective_subdir / "go.mod" if effective_subdir else Path(vcs_path) / "go.mod" def synthesize_go_mod(modname, go_version=None): sanitized = sanitize_module_name(modname) if go_version: return f"module {sanitized}\n\ngo {go_version}\n".encode('utf-8') return f"module {sanitized}\n".encode('utf-8') mod_content = None def is_vendored_package(rel_path): if rel_path.startswith("vendor/"): prefix_len = len("vendor/") else: idx = rel_path.find("/vendor/") if idx < 0: return False prefix_len = len("/vendor/") return "/" in rel_path[prefix_len:] if '+incompatible' in version: mod_content = synthesize_go_mod(module_path) bb.debug(1, f"Synthesizing go.mod for +incompatible module {module_path}@{version}") elif gomod_file.exists(): # Read the existing go.mod and check if module declaration matches mod_content = gomod_file.read_bytes() # Parse the module declaration to check for mismatch import re match = re.search(rb'^\s*module\s+(\S+)', mod_content, re.MULTILINE) if match: declared_module = match.group(1).decode('utf-8', errors='ignore') if declared_module != module_path: # Extract go version directive from original go.mod before synthesizing go_version = None go_match = re.search(rb'^\s*go\s+(\d+\.\d+(?:\.\d+)?)', mod_content, re.MULTILINE) if go_match: go_version = go_match.group(1).decode('utf-8', errors='ignore') # Module declaration doesn't match import path - synthesize correct one bb.note(f"Module {module_path}@{version}: go.mod declares '{declared_module}' but should be '{module_path}', synthesizing correct go.mod (preserving go {go_version})") mod_content = synthesize_go_mod(module_path, go_version) else: bb.debug(1, f"go.mod not found at {gomod_file}") mod_content = synthesize_go_mod(module_path) with open(mod_path, 'wb') as f: f.write(mod_content) bb.debug(1, f"Created {mod_path}") license_blobs = [] if effective_subdir: license_candidates = [ "LICENSE", "LICENSE.txt", "LICENSE.md", "LICENCE", "COPYING", "COPYING.txt", "COPYING.md", ] for candidate in license_candidates: try: content = subprocess.check_output( ["git", "show", f"HEAD:{candidate}"], cwd=vcs_path, stderr=subprocess.DEVNULL, ) except subprocess.CalledProcessError: continue license_blobs.append((Path(candidate).name, content)) break # 3. Create .zip file using git archive + filtering zip_path = download_dir / f"{escaped_version}.zip" # IMPORTANT: For replaced modules, zip internal paths must use the REQUESTED module path, # not the canonical path. Go expects to unzip files to requested_module@version/ directory. zip_prefix = f"{requested_module_path}@{version}/" module_key = f"{module_path}@{version}" expected_hash = go_sum_hashes.get(module_key) import tarfile import tempfile # IMPORTANT: assemble_zip() must run INSIDE TemporaryDirectory context. # The add_zip_entry() and zipfile.ZipFile code MUST be indented inside the # 'with tempfile.TemporaryDirectory()' block. If placed outside, the temp # directory is deleted before files are added, resulting in empty zips. def assemble_zip(include_vendor_modules: bool) -> str: """ Create module zip and compute h1: hash in single pass. Returns h1: hash string on success, None on failure. This avoids re-reading the zip file after creation by tracking file hashes during the zip creation process. """ import base64 try: with tempfile.TemporaryDirectory(dir=str(download_dir)) as tmpdir: tar_path = Path(tmpdir) / "archive.tar" archive_cmd = ["git", "archive", "--format=tar", "-o", str(tar_path), "HEAD"] if subdir_resolved: archive_cmd.append(subdir_resolved) subprocess.run(archive_cmd, cwd=str(vcs_path), check=True, capture_output=True) with tarfile.open(tar_path, 'r') as tf: tf.extractall(tmpdir) tar_path.unlink(missing_ok=True) extract_root = Path(tmpdir) if subdir_resolved: extract_root = extract_root / subdir_resolved excluded_prefixes = [] for gomod_file in extract_root.rglob("go.mod"): rel_path = gomod_file.relative_to(extract_root).as_posix() if rel_path != "go.mod": prefix = gomod_file.parent.relative_to(extract_root).as_posix() if prefix and not prefix.endswith("/"): prefix += "/" excluded_prefixes.append(prefix) if zip_path.exists(): zip_path.unlink() # Track file hashes for h1: calculation during zip creation hash_entries = [] # List of (arcname, sha256_hex) def add_zip_entry(zf, arcname, data, mode=None): info = zipfile.ZipInfo(arcname) info.date_time = (1980, 1, 1, 0, 0, 0) info.compress_type = zipfile.ZIP_DEFLATED info.create_system = 3 # Unix if mode is None: mode = stat.S_IFREG | 0o644 info.external_attr = ((mode & 0xFFFF) << 16) zf.writestr(info, data) # Track hash for h1: calculation hash_entries.append((arcname, hashlib.sha256(data).hexdigest())) with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf: for file_path in sorted(extract_root.rglob("*")): if file_path.is_dir(): continue rel_path = file_path.relative_to(extract_root).as_posix() if file_path.is_symlink(): continue if is_vendored_package(rel_path): continue if rel_path == "vendor/modules.txt" and not include_vendor_modules: continue if any(rel_path.startswith(prefix) for prefix in excluded_prefixes): continue if rel_path.endswith("go.mod") and rel_path != "go.mod": continue if rel_path == "go.mod": data = mod_content mode = stat.S_IFREG | 0o644 else: data = file_path.read_bytes() try: mode = file_path.stat().st_mode except FileNotFoundError: mode = stat.S_IFREG | 0o644 add_zip_entry(zf, zip_prefix + rel_path, data, mode) for license_name, content in license_blobs: if (extract_root / license_name).exists(): continue add_zip_entry(zf, zip_prefix + license_name, content, stat.S_IFREG | 0o644) # Calculate h1: hash from tracked entries (sorted by filename) hash_entries.sort(key=lambda x: x[0]) lines = [f"{h} {name}\n" for name, h in hash_entries] summary = "".join(lines).encode('utf-8') final_hash = hashlib.sha256(summary).digest() inline_hash = "h1:" + base64.b64encode(final_hash).decode('ascii') return inline_hash except subprocess.CalledProcessError as e: bb.error(f"Failed to create zip for {module_path}@{version}: {e.stderr.decode()}") return None except Exception as e: bb.error(f"Failed to assemble zip for {module_path}@{version}: {e}") # Fallback: try native tool if zip was created but hash calculation failed if zip_path.exists(): fallback_hash = calculate_h1_hash_native(zip_path) if fallback_hash: bb.warn(f"Using go-dirhash-native fallback for {module_path}@{version}") return fallback_hash return None hash_value = assemble_zip(include_vendor_modules=True) if hash_value is None: return None if expected_hash and hash_value and hash_value != expected_hash: bb.debug(1, f"Hash mismatch for {module_key} ({hash_value} != {expected_hash}), retrying without vendor/modules.txt") retry_hash = assemble_zip(include_vendor_modules=False) if retry_hash is None: return None hash_value = retry_hash if hash_value and hash_value != expected_hash: bb.note(f"{module_key} still mismatches expected hash after retry ({hash_value} != {expected_hash})") if hash_value: ziphash_path = download_dir / f"{escaped_version}.ziphash" with open(ziphash_path, 'w') as f: f.write(f"{hash_value}\n") bb.debug(1, f"Created {ziphash_path}") else: bb.warn(f"Skipping ziphash for {module_key} due to calculation errors") # 5. Extract zip to pkg/mod for offline builds # This step can be skipped if Go extracts on-demand from cache (experimental) skip_extraction = d.getVar('GO_MOD_SKIP_ZIP_EXTRACTION') == "1" if not skip_extraction: extract_dir = s / "pkg" / "mod" try: with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall(extract_dir) bb.debug(1, f"Extracted {module_path}@{version} to {extract_dir}") except Exception as e: bb.error(f"Failed to extract {module_path}@{version}: {e}") return None # 6. If this was a replaced module, create symlinks from requested path to canonical path # This ensures Go can find the module by either name if requested_module_path != module_path: import os escaped_requested = escape_module_path(requested_module_path) requested_download_dir = cache_dir / escaped_requested / "@v" requested_download_dir.mkdir(parents=True, exist_ok=True) # Create symlinks for all cache files (.info, .mod, .zip, .ziphash) for suffix in ['.info', '.mod', '.zip', '.ziphash']: canonical_file = download_dir / f"{escaped_version}{suffix}" requested_file = requested_download_dir / f"{escaped_version}{suffix}" if canonical_file.exists() and not requested_file.exists(): try: # Calculate relative path from requested to canonical rel_path = os.path.relpath(canonical_file, requested_file.parent) os.symlink(rel_path, requested_file) bb.debug(1, f"Created symlink: {requested_file} -> {rel_path}") except Exception as e: bb.warn(f"Failed to create symlink for {requested_module_path}: {e}") bb.note(f"Created symlinks for replaced module: {requested_module_path} -> {module_path}") # Return the canonical module path for post-processing (e.g., duplicate version handling) return module_path def regenerate_go_sum(): s_path = Path(d.getVar('S')) cache_dir = s_path / "pkg" / "mod" / "cache" / "download" go_sum_path = s_path / "src" / "import" / "go.sum" if not cache_dir.exists(): bb.warn("Module cache directory not found - skipping go.sum regeneration") return def calculate_zip_checksum(zip_file): """Calculate h1: hash for a module zip file (pure Python with native fallback)""" try: result = calculate_h1_hash_python(zip_file) if result: return result except Exception as e: bb.debug(1, f"Python hash failed for {zip_file}: {e}") # Fallback to native tool fallback = calculate_h1_hash_native(zip_file) if fallback: return fallback bb.warn(f"Failed to calculate zip checksum for {zip_file}") return None def calculate_mod_checksum(mod_path): try: mod_bytes = mod_path.read_bytes() except FileNotFoundError: return None import base64 file_hash = hashlib.sha256(mod_bytes).hexdigest() summary = f"{file_hash} go.mod\n".encode('ascii') digest = hashlib.sha256(summary).digest() return "h1:" + base64.b64encode(digest).decode('ascii') def unescape(value): import re return re.sub(r'!([a-z])', lambda m: m.group(1).upper(), value) existing_entries = {} if go_sum_path.exists(): with open(go_sum_path, 'r') as f: for line in f: parts = line.strip().split() if len(parts) != 3: continue mod, ver, hash_value = parts mod = sanitize_module_name(mod) existing_entries[(mod, ver)] = hash_value new_entries = {} for zip_file in sorted(cache_dir.rglob("*.zip")): zip_hash = calculate_zip_checksum(zip_file) if not zip_hash: continue parts = zip_file.parts try: v_index = parts.index('@v') download_index = parts.index('download') except ValueError: bb.warn(f"Unexpected cache layout for {zip_file}") continue escaped_module_parts = parts[download_index + 1:v_index] escaped_module = '/'.join(escaped_module_parts) escaped_version = zip_file.stem module_path = unescape(escaped_module) version = unescape(escaped_version) new_entries[(module_path, version)] = zip_hash mod_checksum = calculate_mod_checksum(zip_file.with_suffix('.mod')) if mod_checksum: new_entries[(module_path, f"{version}/go.mod")] = mod_checksum if not new_entries and not existing_entries: bb.warn("No go.sum entries available - skipping regeneration") return final_entries = existing_entries.copy() final_entries.update(new_entries) go_sum_path.parent.mkdir(parents=True, exist_ok=True) with open(go_sum_path, 'w') as f: for (mod, ver) in sorted(final_entries.keys()): f.write(f"{mod} {ver} {final_entries[(mod, ver)]}\n") bb.debug(1, f"Regenerated go.sum with {len(final_entries)} entries") # Process modules sequentially - I/O bound workload, parallelization causes disk thrashing workdir = Path(d.getVar('WORKDIR')) modules_data = json.loads(d.getVar('GO_MODULE_CACHE_DATA')) # Remove go.sum files from git-fetched dependencies to prevent checksum conflicts # The module checksums from git sources differ from the proxy checksums, and stale # go.sum files in dependencies can cause "checksum mismatch" errors during build vcs_cache_dir = workdir / "sources" / "vcs_cache" if vcs_cache_dir.exists(): import subprocess result = subprocess.run( ['find', str(vcs_cache_dir), '-name', 'go.sum', '-delete'], capture_output=True, text=True ) bb.debug(1, "Removed go.sum files from vcs_cache to prevent checksum conflicts") bb.note(f"Building module cache for {len(modules_data)} modules") # Track results from processing results = [] # List of (module_info, success, actual_module_path) success_count = 0 fail_count = 0 for i, module in enumerate(modules_data, 1): vcs_hash = module['vcs_hash'] vcs_path = workdir / "sources" / "vcs_cache" / vcs_hash # Create module cache files actual_module_path = create_module_zip( module['module'], module['version'], vcs_path, module.get('subdir', ''), module['timestamp'], ) if actual_module_path is not None: success_count += 1 results.append((module, True, actual_module_path)) else: fail_count += 1 results.append((module, False, None)) # Progress update every 100 modules if i % 100 == 0: bb.note(f"Progress: {i}/{len(modules_data)} modules processed") bb.note(f"Module processing complete: {success_count} succeeded, {fail_count} failed") # Post-processing: handle duplicate versions for replace directives (must be sequential) for module_info, success, actual_module_path in results: if success and actual_module_path: alias_info = replacements.get(actual_module_path) if alias_info: alias_version = alias_info.get("old_version") or require_versions.get(actual_module_path) if alias_version is None: for (mod, ver), _hash in go_sum_entries.items(): if mod == actual_module_path and not ver.endswith('/go.mod'): alias_version = ver break if alias_version and alias_version != module_info['version']: duplicate_module_version(actual_module_path, module_info['version'], alias_version, module_info['timestamp']) if fail_count == 0: regenerate_go_sum() else: bb.warn("Skipping go.sum regeneration due to module cache failures") bb.note(f"Module cache complete: {success_count} succeeded, {fail_count} failed") if fail_count > 0: bb.fatal(f"Failed to create cache for {fail_count} modules") } addtask create_module_cache after do_unpack do_prepare_recipe_sysroot before do_configure python do_sync_go_files() { """ Synchronize go.mod and go.sum with the module cache we built from git sources. This task solves the "go: updates to go.mod needed" error by ensuring go.mod declares ALL modules present in our module cache, and go.sum has checksums matching our git-built modules. Architecture: Option 2 (Rewrite go.mod/go.sum approach) - Scans pkg/mod/cache/download/ for ALL modules we built - Regenerates go.mod with complete require block - Regenerates go.sum with our h1: checksums from .ziphash files """ import json import hashlib import re from pathlib import Path bb.note("Synchronizing go.mod and go.sum with module cache") s = Path(d.getVar('S')) cache_dir = s / "pkg" / "mod" / "cache" / "download" go_mod_path = s / "src" / "import" / "go.mod" go_sum_path = s / "src" / "import" / "go.sum" if not cache_dir.exists(): bb.fatal("Module cache directory not found - run do_create_module_cache first") def unescape(escaped): """Unescape capital letters (reverse of escape_module_path)""" import re return re.sub(r'!([a-z])', lambda m: m.group(1).upper(), escaped) def sanitize_module_name(name): """Remove surrounding quotes added by legacy tools""" if not name: return name stripped = name.strip() if len(stripped) >= 2 and stripped[0] == '"' and stripped[-1] == '"': return stripped[1:-1] return stripped def load_require_versions(go_mod_path): versions = {} if not go_mod_path.exists(): return versions in_block = False with go_mod_path.open('r', encoding='utf-8') as f: for raw_line in f: line = raw_line.strip() if line.startswith('require ('): in_block = True continue if in_block and line == ')': in_block = False continue if line.startswith('require ') and '(' not in line: parts = line.split() if len(parts) >= 3: versions[sanitize_module_name(parts[1])] = parts[2] continue if in_block and line and not line.startswith('//'): parts = line.split() if len(parts) >= 2: versions[sanitize_module_name(parts[0])] = parts[1] return versions def load_replacements(go_mod_path): replacements = {} if not go_mod_path.exists(): return replacements def parse_replace_line(line): if '//' in line: line = line.split('//', 1)[0].strip() if not line or '=>' not in line: return left, right = [part.strip() for part in line.split('=>', 1)] left_parts = left.split() right_parts = right.split() if not left_parts or not right_parts: return old_module = sanitize_module_name(left_parts[0]) old_version = left_parts[1] if len(left_parts) > 1 else None new_module = sanitize_module_name(right_parts[0]) new_version = right_parts[1] if len(right_parts) > 1 else None replacements[old_module] = { "old_version": old_version, "new_module": new_module, "new_version": new_version, } in_block = False with go_mod_path.open('r', encoding='utf-8') as f: for raw_line in f: line = raw_line.strip() if line.startswith('replace ('): in_block = True continue if in_block and line == ')': in_block = False continue if line.startswith('replace ') and '(' not in line: parse_replace_line(line[len('replace '):]) continue if in_block and line and not line.startswith('//'): parse_replace_line(line) return replacements require_versions = load_require_versions(go_mod_path) replacements = load_replacements(go_mod_path) # 1. Scan module cache to discover ALL modules we built # Map: (module_path, version) -> {"zip_checksum": str, "mod_path": Path} our_modules = {} bb.note("Scanning module cache...") for zip_file in sorted(cache_dir.rglob("*.zip")): parts = zip_file.parts try: v_index = parts.index('@v') download_index = parts.index('download') except ValueError: continue escaped_module_parts = parts[download_index + 1:v_index] escaped_module = '/'.join(escaped_module_parts) escaped_version = zip_file.stem module_path = unescape(escaped_module) module_path = sanitize_module_name(module_path) version = unescape(escaped_version) # Read checksum from .ziphash file ziphash_file = zip_file.with_suffix('.ziphash') if ziphash_file.exists(): checksum = ziphash_file.read_text().strip() # Some .ziphash files have literal \\n at the end - remove it if checksum.endswith('\\\\n'): checksum = checksum[:-2] our_modules[(module_path, version)] = { "zip_checksum": checksum, "mod_path": zip_file.with_suffix('.mod'), } if not our_modules: bb.fatal("No modules found in cache - cannot synchronize go.mod/go.sum") bb.note(f"Found {len(our_modules)} modules in cache") # 2. DO NOT modify go.mod - keep the original module declarations # The real problem is go.sum has wrong checksums (proxy vs git), not missing modules bb.note("Leaving go.mod unchanged - only updating go.sum with git-based checksums") # 3. Read original go.sum to preserve entries for modules not in our cache original_sum_entries = {} if go_sum_path.exists(): for line in go_sum_path.read_text().splitlines(): line = line.strip() if not line: continue parts = line.split() if len(parts) >= 3: module = sanitize_module_name(parts[0]) version = parts[1] checksum = parts[2] original_sum_entries[(module, version)] = checksum # 4. Build new go.sum by updating checksums for modules we built sum_entries_dict = original_sum_entries.copy() # Start with original for (module, version), entry in our_modules.items(): # Update .zip checksum sum_entries_dict[(module, version)] = entry["zip_checksum"] # Also update /go.mod entry if we have .mod file mod_file = entry["mod_path"] if mod_file.exists(): # Calculate h1: checksum for .mod file mod_bytes = mod_file.read_bytes() file_hash = hashlib.sha256(mod_bytes).hexdigest() summary = f"{file_hash} go.mod\n".encode('ascii') h1_bytes = hashlib.sha256(summary).digest() mod_checksum = "h1:" + __import__('base64').b64encode(h1_bytes).decode('ascii') sum_entries_dict[(module, f"{version}/go.mod")] = mod_checksum # 5. Duplicate checksums for modules that use replace directives so the original # module path (e.g., github.com/Mirantis/...) keeps matching go.sum entries. for alias_module, repl in replacements.items(): alias_module = sanitize_module_name(alias_module) alias_version = repl.get("old_version") if alias_version is None: alias_version = require_versions.get(alias_module) if alias_version is None: # If go.mod didn't pin a replacement version, derive from go.sum for (mod, version) in list(original_sum_entries.keys()): if mod == alias_module and not version.endswith('/go.mod'): alias_version = version break if not alias_version: continue target_module = repl.get("new_module") target_version = repl.get("new_version") if target_version is None: target_version = require_versions.get(target_module) if not target_module or not target_version: continue entry = our_modules.get((target_module, target_version)) if not entry and alias_module != target_module: entry = our_modules.get((alias_module, target_version)) if not entry: continue sum_entries_dict[(alias_module, alias_version)] = entry["zip_checksum"] mod_file = entry["mod_path"] if mod_file.exists(): mod_bytes = mod_file.read_bytes() file_hash = hashlib.sha256(mod_bytes).hexdigest() summary = f"{file_hash} go.mod\n".encode('ascii') h1_bytes = hashlib.sha256(summary).digest() mod_checksum = "h1:" + __import__('base64').b64encode(h1_bytes).decode('ascii') sum_entries_dict[(alias_module, f"{alias_version}/go.mod")] = mod_checksum # Write merged go.sum sum_lines = [] for (module, version), checksum in sorted(sum_entries_dict.items()): sum_lines.append(f"{module} {version} {checksum}") go_sum_path.write_text('\n'.join(sum_lines) + '\n') bb.note(f"Updated go.sum: {len(sum_entries_dict)} total entries, {len(our_modules)} updated from cache") bb.note("go.mod and go.sum synchronized successfully") } addtask sync_go_files after do_create_module_cache before do_compile