From dbf720ccb0519a4dbf143dbaed1633527b8d7b60 Mon Sep 17 00:00:00 2001 From: Bruce Ashfield Date: Sun, 4 Jan 2026 11:40:15 -0500 Subject: go-mod-fetcher: fix shallow clone handling, duplicates, and discovery workflow oe-go-mod-fetcher.py: - Remove BB_GIT_SHALLOW_EXTRA_REFS generation - refs must be present in ALL repositories which isn't the case for module dependencies. Instead, use tag= parameter in individual SRC_URI entries. - Add tag= to SRC_URI when ref is a tag, allowing BitBake's shallow clone to include the necessary tag (with BB_GIT_SHALLOW=1) - Remove premature _ref_points_to_commit() check that was clearing ref_hints before repos were fetched, preventing tag= from being added - Fix pseudo-version verification: only use shallow fetch for actual tags (refs/tags/...), not branch refs. Pseudo-versions with branch refs (refs/heads/...) now correctly use unshallow path to reach historical commits that aren't fetchable with depth=1 oe-go-mod-fetcher-hybrid.py: - Fix duplicate SRC_URI entries when multiple modules share the same git repo/commit (e.g., errdefs and errdefs/pkg). Track added vcs_hashes to skip duplicates. - Add --discovery-cache option to calculate module sizes from discovery cache .zip files, enabling size recommendations during discover_and_generate go-mod-discovery.bbclass: - Add automatic hybrid mode recommendations after generate_modules, showing module sizes and suggested --git prefixes for conversion - Add GO_MOD_DISCOVERY_SKIP_VERIFY variable to skip commit verification on retries (useful after fixing verification issues) - Pass --discovery-cache to hybrid script for accurate size calculations Signed-off-by: Bruce Ashfield --- classes/go-mod-discovery.bbclass | 81 ++++++++++++++++++++++++++++++++++++- scripts/oe-go-mod-fetcher-hybrid.py | 46 +++++++++++++++++++-- scripts/oe-go-mod-fetcher.py | 32 ++++++--------- 3 files changed, 135 insertions(+), 24 deletions(-) diff --git a/classes/go-mod-discovery.bbclass b/classes/go-mod-discovery.bbclass index fc7c2008..d41d70a5 100644 --- a/classes/go-mod-discovery.bbclass +++ b/classes/go-mod-discovery.bbclass @@ -117,6 +117,11 @@ GO_MOD_DISCOVERY_GIT_REF ?= "${SRCREV}" # Recipe directory for generated .inc files - defaults to recipe's directory GO_MOD_DISCOVERY_RECIPEDIR ?= "${FILE_DIRNAME}" +# Skip commit verification during generation (use cached results only) +# Set to "1" to skip verification on retries after initial discovery +# Usage: GO_MOD_DISCOVERY_SKIP_VERIFY = "1" in local.conf or recipe +GO_MOD_DISCOVERY_SKIP_VERIFY ?= "" + # Empty default for TAGS if not set by recipe (avoids undefined variable errors) TAGS ?= "" @@ -384,11 +389,19 @@ Or run 'bitbake ${PN} -c show_upgrade_commands' to see manual options." bbfatal "Could not find oe-go-mod-fetcher.py in any layer" fi + # Build fetcher command with optional flags + SKIP_VERIFY_FLAG="" + if [ "${GO_MOD_DISCOVERY_SKIP_VERIFY}" = "1" ]; then + echo "NOTE: Skipping commit verification (GO_MOD_DISCOVERY_SKIP_VERIFY=1)" + SKIP_VERIFY_FLAG="--skip-verify" + fi + python3 "${FETCHER_SCRIPT}" \ --discovered-modules "${GO_MOD_DISCOVERY_MODULES_JSON}" \ --git-repo "${GO_MOD_DISCOVERY_GIT_REPO}" \ --git-ref "${GO_MOD_DISCOVERY_GIT_REF}" \ - --recipedir "${GO_MOD_DISCOVERY_RECIPEDIR}" + --recipedir "${GO_MOD_DISCOVERY_RECIPEDIR}" \ + ${SKIP_VERIFY_FLAG} if [ $? -eq 0 ]; then echo "" @@ -411,6 +424,70 @@ addtask generate_modules do_generate_modules[nostamp] = "1" do_generate_modules[vardeps] += "GO_MOD_DISCOVERY_MODULES_JSON GO_MOD_DISCOVERY_GIT_REPO \ GO_MOD_DISCOVERY_GIT_REF GO_MOD_DISCOVERY_RECIPEDIR" +do_generate_modules[postfuncs] = "do_show_hybrid_recommendation" + +# Show hybrid conversion recommendations after VCS generation +python do_show_hybrid_recommendation() { + """ + Show recommendations for converting to hybrid gomod:// + git:// mode. + Runs automatically after generate_modules completes. + """ + import subprocess + from pathlib import Path + + recipedir = d.getVar('GO_MOD_DISCOVERY_RECIPEDIR') + git_inc = Path(recipedir) / 'go-mod-git.inc' + + if not git_inc.exists(): + return + + # Find the hybrid script + layerdir = None + for layer in d.getVar('BBLAYERS').split(): + if 'meta-virtualization' in layer: + layerdir = layer + break + + if not layerdir: + return + + scriptpath = Path(layerdir) / "scripts" / "oe-go-mod-fetcher-hybrid.py" + if not scriptpath.exists(): + return + + bb.plain("") + bb.plain("=" * 70) + bb.plain("HYBRID MODE RECOMMENDATION") + bb.plain("=" * 70) + + cmd = ['python3', str(scriptpath), '--recipedir', recipedir, '--recommend'] + + # Try to find module sizes from discovery cache or vcs_cache + discovery_dir = d.getVar('GO_MOD_DISCOVERY_DIR') + workdir = d.getVar('WORKDIR') + + # Check discovery cache first (has .zip files with accurate sizes) + if discovery_dir: + discovery_cache = Path(discovery_dir) / 'cache' / 'cache' / 'download' + if discovery_cache.exists(): + cmd.extend(['--discovery-cache', str(discovery_cache)]) + + # Also check vcs_cache if it exists (from a previous build) + if workdir: + vcs_cache = Path(workdir) / 'sources' / 'vcs_cache' + if vcs_cache.exists(): + cmd.extend(['--workdir', workdir]) + + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) + if result.stdout: + for line in result.stdout.splitlines(): + bb.plain(line) + bb.plain("") + bb.plain("") + except Exception as e: + bb.note(f"Could not run hybrid recommendation: {e}") +} # ============================================================================= # TASK 4: do_discover_and_generate - All-in-one convenience task @@ -443,7 +520,7 @@ addtask discover_and_generate after do_unpack do_discover_and_generate[depends] = "${PN}:do_prepare_recipe_sysroot" do_discover_and_generate[network] = "1" do_discover_and_generate[nostamp] = "1" -do_discover_and_generate[postfuncs] = "do_discover_modules do_extract_modules do_generate_modules" +do_discover_and_generate[postfuncs] = "do_discover_modules do_extract_modules do_generate_modules do_show_hybrid_recommendation" # ============================================================================= # TASK: do_clean_discovery - Clean the persistent cache diff --git a/scripts/oe-go-mod-fetcher-hybrid.py b/scripts/oe-go-mod-fetcher-hybrid.py index f5934ed2..9704cca0 100755 --- a/scripts/oe-go-mod-fetcher-hybrid.py +++ b/scripts/oe-go-mod-fetcher-hybrid.py @@ -241,6 +241,35 @@ def get_repo_sizes(vcs_info: dict, workdir: Optional[Path] = None) -> dict[str, return sizes +def get_discovery_sizes(modules: list[dict], discovery_cache: Optional[Path] = None) -> dict[str, int]: + """Get sizes of modules from discovery cache .zip files.""" + sizes = {} + + if discovery_cache is None or not discovery_cache.exists(): + return sizes + + for mod in modules: + module_path = mod.get('module', '') + version = mod.get('version', '') + vcs_hash = mod.get('vcs_hash', '') + + if not module_path or not version or not vcs_hash: + continue + + # Build path to .zip file: discovery_cache//@v/.zip + zip_path = discovery_cache / module_path / '@v' / f'{version}.zip' + + if zip_path.exists(): + try: + size = zip_path.stat().st_size + # Accumulate size by vcs_hash (same repo may have multiple modules) + sizes[vcs_hash] = sizes.get(vcs_hash, 0) + size + except OSError: + pass + + return sizes + + def format_size(size_bytes: int) -> str: """Format bytes as human readable.""" for unit in ['B', 'KB', 'MB', 'GB']: @@ -604,6 +633,9 @@ def main(): parser.add_argument('--workdir', type=Path, default=None, help='BitBake workdir containing vcs_cache (for size calculations)') + parser.add_argument('--discovery-cache', type=Path, default=None, + help='Discovery cache directory containing module .zip files (for size calculations)') + # Actions parser.add_argument('--list', action='store_true', help='List all modules with sizes') @@ -650,12 +682,20 @@ def main(): vcs_info = parse_go_mod_git_inc(git_inc) print(f" Found {len(vcs_info)} VCS entries") - # Get sizes if workdir provided + # Get sizes from discovery cache and/or workdir sizes = {} + if args.discovery_cache: + print(f"Calculating sizes from discovery cache {args.discovery_cache}...") + sizes = get_discovery_sizes(modules, args.discovery_cache) + print(f" Got sizes for {len(sizes)} modules from discovery cache") + if args.workdir: print(f"Calculating sizes from {args.workdir}...") - sizes = get_repo_sizes(vcs_info, args.workdir) - print(f" Got sizes for {len(sizes)} repos") + vcs_sizes = get_repo_sizes(vcs_info, args.workdir) + print(f" Got sizes for {len(vcs_sizes)} repos from vcs_cache") + # Merge vcs_sizes into sizes (vcs_cache sizes override discovery if both exist) + for k, v in vcs_sizes.items(): + sizes[k] = v # Handle actions if args.list: diff --git a/scripts/oe-go-mod-fetcher.py b/scripts/oe-go-mod-fetcher.py index 20ceee7c..13399a73 100755 --- a/scripts/oe-go-mod-fetcher.py +++ b/scripts/oe-go-mod-fetcher.py @@ -815,7 +815,10 @@ def verify_commit_accessible(vcs_url: str, commit: str, ref_hint: str = "", vers # Strategy depends on whether this is a tagged version or pseudo-version commit_fetched = commit_present # If already present, no need to fetch - if ref_hint and not commit_present: + # Only use shallow fetch for actual tags - pseudo-versions with branch refs need unshallow + is_tag_ref = ref_hint and ref_hint.startswith('refs/tags/') + + if is_tag_ref and not commit_present: # Tagged version: try shallow fetch of the specific commit (only if not already present) try: fetch_cmd = ["git", "fetch", "--depth=1", "origin", commit] @@ -3924,9 +3927,9 @@ def generate_recipe(modules: List[Dict], source_dir: Path, output_dir: Optional[ else: commit_sha = repo_info['commits'][commit_hash]['commit_sha'] + # Trust the ref_hint from discovery - it will be validated/corrected during + # the verification pass if needed (e.g., force-pushed tags are auto-corrected) ref_hint = module.get('vcs_ref', '') - if ref_hint and not _ref_points_to_commit(vcs_url, ref_hint, commit_hash): - ref_hint = '' entry = repo_info['commits'][commit_hash] entry['modules'].append(module) @@ -4080,7 +4083,9 @@ def generate_recipe(modules: List[Dict], source_dir: Path, output_dir: Optional[ # For branches, use the branch name directly if ref_hint.startswith('refs/tags/'): # Tags: BitBake can fetch tagged commits with nobranch=1 - branch_param = ';nobranch=1' + # Add tag= so shallow clones include this tag (with BB_GIT_SHALLOW=1 in recipe) + tag_name = ref_hint[10:] # Strip "refs/tags/" + branch_param = f';nobranch=1;tag={tag_name}' elif ref_hint.startswith('refs/heads/'): # Branches: use the actual branch name branch_name = ref_hint[11:] # Strip "refs/heads/" @@ -4161,21 +4166,10 @@ def generate_recipe(modules: List[Dict], source_dir: Path, output_dir: Optional[ f.write(f'SRC_URI += "{entry}"\n') f.write('\n') - # Collect all tag references for shallow cloning - # BB_GIT_SHALLOW_EXTRA_REFS ensures these refs are included in shallow clones - tag_refs = set() - for module in modules: - vcs_ref = module.get('vcs_ref', '') - if vcs_ref and 'refs/tags/' in vcs_ref: - tag_refs.add(vcs_ref) - - if tag_refs: - f.write("# Tag references for shallow cloning\n") - f.write("# Ensures shallow clones include all necessary tags\n") - f.write("BB_GIT_SHALLOW_EXTRA_REFS = \"\\\n") - for tag_ref in sorted(tag_refs): - f.write(f" {tag_ref} \\\n") - f.write('"\n') + # Note: BB_GIT_SHALLOW_EXTRA_REFS is NOT used here because those refs must be + # present in ALL repositories, which isn't the case for module dependencies. + # Instead, we use tag= in individual SRC_URI entries when the ref is a tag. + # The recipe should set BB_GIT_SHALLOW = "1" to enable shallow clones globally. # Note: SRCREV_* variables are not needed since rev= is embedded directly in SRC_URI -- cgit v1.2.3-54-g00ecf