diff options
| author | Bruce Ashfield <bruce.ashfield@gmail.com> | 2026-04-06 23:43:03 +0000 |
|---|---|---|
| committer | Bruce Ashfield <bruce.ashfield@gmail.com> | 2026-04-06 23:49:24 +0000 |
| commit | 55f63c8a12f86c1a5d5fe1ccf0e9ddb719e7747e (patch) | |
| tree | 46c3e90a212e69f943d3b817603f8adca9ceb645 | |
| parent | 545ecc18af39a48a0543c844aa1652c2ed29c5d0 (diff) | |
| download | meta-virtualization-55f63c8a12f86c1a5d5fe1ccf0e9ddb719e7747e.tar.gz | |
tests: fix k3s multi-node test suite
Fix several issues discovered during multi-node testing:
- Find native QEMU binary from build sysroots-components instead of
relying on PATH (qemu-system-native is not in OE build env PATH)
- Set LD_LIBRARY_PATH for native QEMU shared library dependencies
(libSDL2, etc. from native sysroots)
- Add if=virtio to drive parameter so root device appears as /dev/vda
- Add CNI bin dirs to PATH when starting k3s manually (systemd service
has the PATH fix but manual launch does not)
- Wipe server TLS/cred/db state and kubeconfig before restarting with
cluster IPs to avoid stale certificate errors (cert only valid for
DHCP IP, not 192.168.50.1)
- Add --tls-san for cluster IP to server start
- Wipe agent k3s state to avoid "not authorized" from stale tokens
- Remove server-only config.yaml on agent (disable-cloud-controller
flag crashes the agent)
- Set unique --node-name on agent to prevent hostname collision when
both VMs boot from the same image
Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
| -rw-r--r-- | tests/test_k3s_runtime.py | 70 |
1 files changed, 62 insertions, 8 deletions
diff --git a/tests/test_k3s_runtime.py b/tests/test_k3s_runtime.py index 8b39bfd2..f3e16aa5 100644 --- a/tests/test_k3s_runtime.py +++ b/tests/test_k3s_runtime.py | |||
| @@ -111,6 +111,22 @@ class K3sRunner: | |||
| 111 | self.booted = False | 111 | self.booted = False |
| 112 | self._rootfs_copy = None | 112 | self._rootfs_copy = None |
| 113 | 113 | ||
| 114 | def _find_native_qemu(self, qemu_bin): | ||
| 115 | """Find the native QEMU binary from the build directory.""" | ||
| 116 | native_dir = ( | ||
| 117 | self.build_dir / "tmp" / "sysroots-components" / "x86_64" | ||
| 118 | / "qemu-system-native" / "usr" / "bin") | ||
| 119 | qemu_path = native_dir / qemu_bin | ||
| 120 | if qemu_path.exists(): | ||
| 121 | return str(qemu_path) | ||
| 122 | # Fallback: check PATH | ||
| 123 | import shutil | ||
| 124 | found = shutil.which(qemu_bin) | ||
| 125 | if found: | ||
| 126 | return found | ||
| 127 | raise RuntimeError( | ||
| 128 | f"QEMU binary '{qemu_bin}' not found in {native_dir} or PATH") | ||
| 129 | |||
| 114 | def _build_direct_qemu_cmd(self): | 130 | def _build_direct_qemu_cmd(self): |
| 115 | """Build a direct QEMU command line (not runqemu).""" | 131 | """Build a direct QEMU command line (not runqemu).""" |
| 116 | arch_cfg = _QEMU_ARCH_CONFIG.get(self.machine) | 132 | arch_cfg = _QEMU_ARCH_CONFIG.get(self.machine) |
| @@ -119,6 +135,8 @@ class K3sRunner: | |||
| 119 | f"Unsupported machine '{self.machine}' for direct QEMU. " | 135 | f"Unsupported machine '{self.machine}' for direct QEMU. " |
| 120 | f"Supported: {list(_QEMU_ARCH_CONFIG.keys())}") | 136 | f"Supported: {list(_QEMU_ARCH_CONFIG.keys())}") |
| 121 | 137 | ||
| 138 | qemu_bin = self._find_native_qemu(arch_cfg["qemu_bin"]) | ||
| 139 | |||
| 122 | deploy_dir = self.build_dir / "tmp" / "deploy" / "images" / self.machine | 140 | deploy_dir = self.build_dir / "tmp" / "deploy" / "images" / self.machine |
| 123 | kernel = deploy_dir / arch_cfg["kernel_name"] | 141 | kernel = deploy_dir / arch_cfg["kernel_name"] |
| 124 | if not kernel.exists(): | 142 | if not kernel.exists(): |
| @@ -143,18 +161,32 @@ class K3sRunner: | |||
| 143 | kvm_flag = "-enable-kvm" if self.use_kvm else "" | 161 | kvm_flag = "-enable-kvm" if self.use_kvm else "" |
| 144 | 162 | ||
| 145 | qemu_params = ( | 163 | qemu_params = ( |
| 146 | f"{arch_cfg['qemu_bin']} {arch_cfg['machine']} {cpu} " | 164 | f"{qemu_bin} {arch_cfg['machine']} {cpu} " |
| 147 | f"{kvm_flag} -m 4096 -smp 2 -nographic " | 165 | f"{kvm_flag} -m 4096 -smp 2 -nographic " |
| 148 | f"-kernel {kernel} " | 166 | f"-kernel {kernel} " |
| 149 | f"-drive file={rootfs},format=raw " | 167 | f"-drive file={rootfs},if=virtio,format=raw " |
| 150 | f"-append 'root={arch_cfg['rootdev']} rw console={arch_cfg['console']} ip=dhcp' " | 168 | f'-append "root={arch_cfg["rootdev"]} rw ' |
| 169 | f'console={arch_cfg["console"]} ip=dhcp" ' | ||
| 151 | f"-netdev user,id=net0 -device virtio-net-pci,netdev=net0" | 170 | f"-netdev user,id=net0 -device virtio-net-pci,netdev=net0" |
| 152 | ) | 171 | ) |
| 153 | 172 | ||
| 154 | if self.extra_qemu_params: | 173 | if self.extra_qemu_params: |
| 155 | qemu_params += f" {self.extra_qemu_params}" | 174 | qemu_params += f" {self.extra_qemu_params}" |
| 156 | 175 | ||
| 157 | return qemu_params | 176 | # Native QEMU needs its sysroot libraries. Use oe-init-build-env |
| 177 | # and add the native sysroot bin dir to PATH for library resolution. | ||
| 178 | native_bindir = str( | ||
| 179 | self.build_dir / "tmp" / "sysroots-components" / "x86_64" | ||
| 180 | / "qemu-system-native" / "usr" / "bin") | ||
| 181 | native_libdirs = ":".join(str(p) for p in sorted( | ||
| 182 | (self.build_dir / "tmp" / "sysroots-components" / "x86_64" | ||
| 183 | ).glob("*/usr/lib")) if p.is_dir()) | ||
| 184 | |||
| 185 | return ( | ||
| 186 | f"bash -c 'export PATH={native_bindir}:$PATH && " | ||
| 187 | f"export LD_LIBRARY_PATH={native_libdirs}:${{LD_LIBRARY_PATH:-}} && " | ||
| 188 | f"{qemu_params}'" | ||
| 189 | ) | ||
| 158 | 190 | ||
| 159 | def start(self): | 191 | def start(self): |
| 160 | """Start QEMU and wait for login prompt.""" | 192 | """Start QEMU and wait for login prompt.""" |
| @@ -608,22 +640,34 @@ class TestK3sMultiNode: | |||
| 608 | server = k3s_multinode["server"] | 640 | server = k3s_multinode["server"] |
| 609 | agent = k3s_multinode["agent"] | 641 | agent = k3s_multinode["agent"] |
| 610 | 642 | ||
| 611 | # Stop default k3s service (auto-started) and start with | 643 | # Stop default k3s service and wipe TLS state so the server |
| 612 | # multi-node flags binding to the socket network | 644 | # generates new certs that include the cluster IP (192.168.50.1). |
| 645 | # Without this, certs are only valid for the DHCP IP (10.0.2.15). | ||
| 613 | server.run_command('systemctl stop k3s 2>/dev/null') | 646 | server.run_command('systemctl stop k3s 2>/dev/null') |
| 614 | server.run_command( | 647 | server.run_command( |
| 648 | 'rm -rf /var/lib/rancher/k3s/server/tls ' | ||
| 649 | '/var/lib/rancher/k3s/server/cred ' | ||
| 650 | '/var/lib/rancher/k3s/server/token ' | ||
| 651 | '/var/lib/rancher/k3s/server/agent-token ' | ||
| 652 | '/var/lib/rancher/k3s/server/node-token ' | ||
| 653 | '/var/lib/rancher/k3s/server/db ' | ||
| 654 | '/etc/rancher/k3s/k3s.yaml') | ||
| 655 | server.run_command( | ||
| 656 | 'export PATH=$PATH:/opt/cni/bin:/usr/libexec/cni && ' | ||
| 615 | 'k3s server ' | 657 | 'k3s server ' |
| 616 | '--write-kubeconfig-mode 644 ' | 658 | '--write-kubeconfig-mode 644 ' |
| 617 | '--disable-cloud-controller ' | 659 | '--disable-cloud-controller ' |
| 618 | '--node-ip 192.168.50.1 ' | 660 | '--node-ip 192.168.50.1 ' |
| 619 | '--bind-address 192.168.50.1 ' | 661 | '--bind-address 192.168.50.1 ' |
| 620 | '--advertise-address 192.168.50.1 ' | 662 | '--advertise-address 192.168.50.1 ' |
| 663 | '--tls-san 192.168.50.1 ' | ||
| 621 | '--flannel-iface eth1 ' | 664 | '--flannel-iface eth1 ' |
| 622 | '&>/var/log/k3s-server.log &') | 665 | '&>/var/log/k3s-server.log &') |
| 623 | 666 | ||
| 624 | # Wait for server node Ready | 667 | # Wait for new kubeconfig to be written, then wait for Ready |
| 625 | try: | 668 | try: |
| 626 | server.wait_for_condition( | 669 | server.wait_for_condition( |
| 670 | 'test -f /etc/rancher/k3s/k3s.yaml && ' | ||
| 627 | f'{_KUBECTL} get nodes 2>/dev/null || echo WAITING', | 671 | f'{_KUBECTL} get nodes 2>/dev/null || echo WAITING', |
| 628 | r'\bReady\b', | 672 | r'\bReady\b', |
| 629 | timeout=k3s_timeout, | 673 | timeout=k3s_timeout, |
| @@ -642,12 +686,22 @@ class TestK3sMultiNode: | |||
| 642 | f"Failed to get node token:\n{token}" | 686 | f"Failed to get node token:\n{token}" |
| 643 | token = token.strip().splitlines()[-1].strip() | 687 | token = token.strip().splitlines()[-1].strip() |
| 644 | 688 | ||
| 645 | # Stop default k3s on agent and start agent mode | 689 | # Stop default k3s on agent, clear server-only config, and |
| 690 | # wipe agent state from the auto-started instance. | ||
| 691 | # Set a unique node name — both VMs boot the same image and | ||
| 692 | # have the same hostname, which causes k3s to treat the agent | ||
| 693 | # as the same node as the server. | ||
| 646 | agent.run_command('systemctl stop k3s 2>/dev/null') | 694 | agent.run_command('systemctl stop k3s 2>/dev/null') |
| 647 | agent.run_command( | 695 | agent.run_command( |
| 696 | 'rm -f /etc/rancher/k3s/config.yaml && ' | ||
| 697 | 'rm -rf /var/lib/rancher/k3s/agent ' | ||
| 698 | '/var/lib/rancher/k3s/server') | ||
| 699 | agent.run_command( | ||
| 700 | f'export PATH=$PATH:/opt/cni/bin:/usr/libexec/cni && ' | ||
| 648 | f'k3s agent ' | 701 | f'k3s agent ' |
| 649 | f'--server https://192.168.50.1:6443 ' | 702 | f'--server https://192.168.50.1:6443 ' |
| 650 | f'--token {token} ' | 703 | f'--token {token} ' |
| 704 | f'--node-name k3s-agent ' | ||
| 651 | f'--node-ip 192.168.50.2 ' | 705 | f'--node-ip 192.168.50.2 ' |
| 652 | f'--flannel-iface eth1 ' | 706 | f'--flannel-iface eth1 ' |
| 653 | f'&>/var/log/k3s-agent.log &') | 707 | f'&>/var/log/k3s-agent.log &') |
