diff options
| author | Bruce Ashfield <bruce.ashfield@gmail.com> | 2026-04-06 15:49:58 +0000 |
|---|---|---|
| committer | Bruce Ashfield <bruce.ashfield@gmail.com> | 2026-04-06 23:49:24 +0000 |
| commit | ac89a0c46589c79c6ff583a11e0284e54af6a2cb (patch) | |
| tree | ff2f5d91f2774ee034b03d0104a4d84022c35920 | |
| parent | 540f1aee43c9694912869ef6d32a570ffbf7c123 (diff) | |
| download | meta-virtualization-ac89a0c46589c79c6ff583a11e0284e54af6a2cb.tar.gz | |
tests: add k3s runtime test suite
Add test_k3s_runtime.py with 10 tests for k3s single-node and
multi-node verification:
Single-node (5 tests):
- Boot, verify k3s binary and service unit
- Start k3s server, wait for node Ready
- Verify 1 node in Ready state
- Deploy a busybox pod, verify Running
- Delete pod, verify cleanup
Multi-node (5 tests):
- Boot 2 VMs via QEMU socket networking
- Verify inter-VM ping on socket network
- Start k3s server on VM1, join agent on VM2
- Verify 2 nodes Ready
- Deploy 2-replica deployment, verify scheduling
Uses architecture-aware QEMU configuration (x86-64 and arm64
supported). Multi-node tests launch QEMU directly (not runqemu)
to support two concurrent VMs. kubectl commands use KUBECONFIG
instead of embedded 'k3s kubectl' which is not available in the
Yocto build.
Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
| -rw-r--r-- | tests/test_k3s_runtime.py | 731 |
1 files changed, 731 insertions, 0 deletions
diff --git a/tests/test_k3s_runtime.py b/tests/test_k3s_runtime.py new file mode 100644 index 00000000..8b39bfd2 --- /dev/null +++ b/tests/test_k3s_runtime.py | |||
| @@ -0,0 +1,731 @@ | |||
| 1 | # SPDX-FileCopyrightText: Copyright (C) 2025 Bruce Ashfield | ||
| 2 | # | ||
| 3 | # SPDX-License-Identifier: MIT | ||
| 4 | """ | ||
| 5 | K3s runtime tests - boot container-image-host with k3s and verify Kubernetes. | ||
| 6 | |||
| 7 | Single-node tests verify k3s server start, node readiness, and basic pod | ||
| 8 | deployment. Multi-node tests use QEMU socket networking to connect two VMs | ||
| 9 | on a shared L2 segment and verify agent join + multi-node scheduling. | ||
| 10 | |||
| 11 | Build prerequisites (in local.conf): | ||
| 12 | require conf/distro/include/meta-virt-host.conf | ||
| 13 | require conf/distro/include/container-host-k3s.conf | ||
| 14 | MACHINE = "qemux86-64" # or qemuarm64 | ||
| 15 | |||
| 16 | bitbake container-image-host | ||
| 17 | |||
| 18 | Run: | ||
| 19 | # Single-node only | ||
| 20 | pytest tests/test_k3s_runtime.py -v -k "not multinode" --machine qemux86-64 | ||
| 21 | |||
| 22 | # Multi-node only | ||
| 23 | pytest tests/test_k3s_runtime.py -v -k "multinode" --machine qemux86-64 | ||
| 24 | |||
| 25 | # All tests | ||
| 26 | pytest tests/test_k3s_runtime.py -v --machine qemux86-64 | ||
| 27 | |||
| 28 | Options: | ||
| 29 | --k3s-timeout Overall k3s readiness timeout (default: 300s) | ||
| 30 | --boot-timeout QEMU boot timeout (default: 120s) | ||
| 31 | --no-kvm Disable KVM acceleration | ||
| 32 | |||
| 33 | Notes: | ||
| 34 | - k3s does not embed 'kubectl' as a subcommand in our build. | ||
| 35 | Use 'kubectl' with KUBECONFIG=/etc/rancher/k3s/k3s.yaml instead. | ||
| 36 | - System pods (coredns, traefik) are not auto-deployed because k3s | ||
| 37 | manifest extraction is not yet supported in the Yocto build. | ||
| 38 | - Multi-node tests launch QEMU directly (not via runqemu) to support | ||
| 39 | two concurrent VMs with socket networking. Architecture-specific | ||
| 40 | QEMU parameters are auto-detected from the machine setting. | ||
| 41 | """ | ||
| 42 | |||
| 43 | import os | ||
| 44 | import re | ||
| 45 | import shutil | ||
| 46 | import time | ||
| 47 | import pytest | ||
| 48 | from pathlib import Path | ||
| 49 | |||
| 50 | try: | ||
| 51 | import pexpect | ||
| 52 | PEXPECT_AVAILABLE = True | ||
| 53 | except ImportError: | ||
| 54 | PEXPECT_AVAILABLE = False | ||
| 55 | |||
| 56 | |||
| 57 | # Socket networking port base — each test session gets a unique port | ||
| 58 | _SOCKET_PORT_BASE = 10000 + os.getpid() % 50000 | ||
| 59 | |||
| 60 | # kubectl command prefix — sets KUBECONFIG for all kubectl calls | ||
| 61 | _KUBECTL = 'KUBECONFIG=/etc/rancher/k3s/k3s.yaml kubectl' | ||
| 62 | |||
| 63 | |||
| 64 | # Architecture-specific QEMU parameters | ||
| 65 | _QEMU_ARCH_CONFIG = { | ||
| 66 | "qemux86-64": { | ||
| 67 | "qemu_bin": "qemu-system-x86_64", | ||
| 68 | "machine": "-M q35", | ||
| 69 | "cpu_kvm": "-cpu host", | ||
| 70 | "cpu_tcg": "-cpu Skylake-Client", | ||
| 71 | "kernel_name": "bzImage", | ||
| 72 | "console": "ttyS0", | ||
| 73 | "rootdev": "/dev/vda", | ||
| 74 | }, | ||
| 75 | "qemuarm64": { | ||
| 76 | "qemu_bin": "qemu-system-aarch64", | ||
| 77 | "machine": "-M virt", | ||
| 78 | "cpu_kvm": "-cpu host", | ||
| 79 | "cpu_tcg": "-cpu cortex-a57", | ||
| 80 | "kernel_name": "Image", | ||
| 81 | "console": "ttyAMA0", | ||
| 82 | "rootdev": "/dev/vda", | ||
| 83 | }, | ||
| 84 | } | ||
| 85 | |||
| 86 | |||
| 87 | class K3sRunner: | ||
| 88 | """ | ||
| 89 | Manages a QEMU session for K3s testing. | ||
| 90 | |||
| 91 | Boots container-image-host with optional dual NIC (slirp + socket network) | ||
| 92 | and provides command execution via serial console. Supports both runqemu | ||
| 93 | (single-node) and direct QEMU launch (multi-node). | ||
| 94 | """ | ||
| 95 | |||
| 96 | def __init__(self, poky_dir, build_dir, machine, use_kvm=True, | ||
| 97 | timeout=120, image="container-image-host", | ||
| 98 | extra_qemu_params="", log_suffix="", | ||
| 99 | use_runqemu=True, rootfs_path=None): | ||
| 100 | self.poky_dir = Path(poky_dir) | ||
| 101 | self.build_dir = Path(build_dir) | ||
| 102 | self.machine = machine | ||
| 103 | self.use_kvm = use_kvm | ||
| 104 | self.timeout = timeout | ||
| 105 | self.image = image | ||
| 106 | self.extra_qemu_params = extra_qemu_params | ||
| 107 | self.log_suffix = log_suffix | ||
| 108 | self.use_runqemu = use_runqemu | ||
| 109 | self.rootfs_path = rootfs_path | ||
| 110 | self.child = None | ||
| 111 | self.booted = False | ||
| 112 | self._rootfs_copy = None | ||
| 113 | |||
| 114 | def _build_direct_qemu_cmd(self): | ||
| 115 | """Build a direct QEMU command line (not runqemu).""" | ||
| 116 | arch_cfg = _QEMU_ARCH_CONFIG.get(self.machine) | ||
| 117 | if not arch_cfg: | ||
| 118 | raise RuntimeError( | ||
| 119 | f"Unsupported machine '{self.machine}' for direct QEMU. " | ||
| 120 | f"Supported: {list(_QEMU_ARCH_CONFIG.keys())}") | ||
| 121 | |||
| 122 | deploy_dir = self.build_dir / "tmp" / "deploy" / "images" / self.machine | ||
| 123 | kernel = deploy_dir / arch_cfg["kernel_name"] | ||
| 124 | if not kernel.exists(): | ||
| 125 | # Try with machine suffix | ||
| 126 | kernels = list(deploy_dir.glob(f"{arch_cfg['kernel_name']}*")) | ||
| 127 | if kernels: | ||
| 128 | kernel = kernels[0] | ||
| 129 | else: | ||
| 130 | raise RuntimeError(f"Kernel not found: {kernel}") | ||
| 131 | |||
| 132 | # Use provided rootfs or find the default | ||
| 133 | rootfs = self.rootfs_path | ||
| 134 | if not rootfs: | ||
| 135 | ext4_files = sorted(deploy_dir.glob( | ||
| 136 | f"{self.image}-*.rootfs.ext4"), key=os.path.getmtime) | ||
| 137 | if not ext4_files: | ||
| 138 | raise RuntimeError( | ||
| 139 | f"No ext4 rootfs found in {deploy_dir}") | ||
| 140 | rootfs = ext4_files[-1] | ||
| 141 | |||
| 142 | cpu = arch_cfg["cpu_kvm"] if self.use_kvm else arch_cfg["cpu_tcg"] | ||
| 143 | kvm_flag = "-enable-kvm" if self.use_kvm else "" | ||
| 144 | |||
| 145 | qemu_params = ( | ||
| 146 | f"{arch_cfg['qemu_bin']} {arch_cfg['machine']} {cpu} " | ||
| 147 | f"{kvm_flag} -m 4096 -smp 2 -nographic " | ||
| 148 | f"-kernel {kernel} " | ||
| 149 | f"-drive file={rootfs},format=raw " | ||
| 150 | f"-append 'root={arch_cfg['rootdev']} rw console={arch_cfg['console']} ip=dhcp' " | ||
| 151 | f"-netdev user,id=net0 -device virtio-net-pci,netdev=net0" | ||
| 152 | ) | ||
| 153 | |||
| 154 | if self.extra_qemu_params: | ||
| 155 | qemu_params += f" {self.extra_qemu_params}" | ||
| 156 | |||
| 157 | return qemu_params | ||
| 158 | |||
| 159 | def start(self): | ||
| 160 | """Start QEMU and wait for login prompt.""" | ||
| 161 | if not PEXPECT_AVAILABLE: | ||
| 162 | raise RuntimeError("pexpect not installed. Run: pip install pexpect") | ||
| 163 | |||
| 164 | if self.use_runqemu: | ||
| 165 | cmd = self._build_runqemu_cmd() | ||
| 166 | else: | ||
| 167 | cmd = self._build_direct_qemu_cmd() | ||
| 168 | |||
| 169 | log_name = f"runqemu-k3s-test{self.log_suffix}.log" | ||
| 170 | print(f"Starting QEMU (K3s{self.log_suffix}): {cmd}") | ||
| 171 | self.child = pexpect.spawn( | ||
| 172 | cmd, encoding='utf-8', timeout=self.timeout) | ||
| 173 | self.child.logfile_read = open(f'/tmp/{log_name}', 'w') | ||
| 174 | |||
| 175 | try: | ||
| 176 | index = self.child.expect([ | ||
| 177 | r'login:', | ||
| 178 | r'root@', | ||
| 179 | pexpect.TIMEOUT, | ||
| 180 | pexpect.EOF, | ||
| 181 | ], timeout=self.timeout) | ||
| 182 | |||
| 183 | if index == 0: | ||
| 184 | self.child.sendline('root') | ||
| 185 | self.child.expect([r'root@', r'#', r'\$'], timeout=30) | ||
| 186 | self.booted = True | ||
| 187 | elif index == 1: | ||
| 188 | self.booted = True | ||
| 189 | |||
| 190 | if self.booted: | ||
| 191 | self.child.sendline('export TERM=dumb') | ||
| 192 | self.child.expect(r'root@[^:]+:[^#]+#', timeout=10) | ||
| 193 | # Set KUBECONFIG for all kubectl commands | ||
| 194 | self.child.sendline( | ||
| 195 | 'export KUBECONFIG=/etc/rancher/k3s/k3s.yaml') | ||
| 196 | self.child.expect(r'root@[^:]+:[^#]+#', timeout=10) | ||
| 197 | |||
| 198 | if index == 2: | ||
| 199 | raise RuntimeError( | ||
| 200 | f"Timeout waiting for login (>{self.timeout}s)") | ||
| 201 | elif index == 3: | ||
| 202 | raise RuntimeError("QEMU terminated unexpectedly") | ||
| 203 | |||
| 204 | except Exception as e: | ||
| 205 | self.stop() | ||
| 206 | raise RuntimeError(f"Failed to boot image: {e}") | ||
| 207 | |||
| 208 | return self | ||
| 209 | |||
| 210 | def _build_runqemu_cmd(self): | ||
| 211 | """Build a runqemu command line.""" | ||
| 212 | kvm_opt = "kvm" if self.use_kvm else "" | ||
| 213 | qemu_params = "-m 4096" | ||
| 214 | if self.extra_qemu_params: | ||
| 215 | qemu_params += f" {self.extra_qemu_params}" | ||
| 216 | |||
| 217 | return ( | ||
| 218 | f"bash -c 'cd {self.poky_dir} && " | ||
| 219 | f"source oe-init-build-env {self.build_dir} >/dev/null 2>&1 && " | ||
| 220 | f"runqemu {self.machine} {self.image} ext4 nographic slirp " | ||
| 221 | f"{kvm_opt} " | ||
| 222 | f"qemuparams=\"{qemu_params}\"'" | ||
| 223 | ) | ||
| 224 | |||
| 225 | @staticmethod | ||
| 226 | def _strip_escape_sequences(text): | ||
| 227 | """Strip ANSI and OSC escape sequences from terminal output.""" | ||
| 228 | text = re.sub(r'\x1b\][^\x1b\x07]*(?:\x1b\\|\x07)', '', text) | ||
| 229 | text = re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text) | ||
| 230 | text = re.sub(r'\x1b[^[\]].?', '', text) | ||
| 231 | return text | ||
| 232 | |||
| 233 | def run_command(self, cmd, timeout=60): | ||
| 234 | """Run a command and return the output.""" | ||
| 235 | if not self.booted: | ||
| 236 | raise RuntimeError("System not booted") | ||
| 237 | |||
| 238 | time.sleep(0.3) | ||
| 239 | self.child.sendline(cmd) | ||
| 240 | |||
| 241 | try: | ||
| 242 | self.child.expect(r'root@[^:]+:[^#]+#', timeout=timeout) | ||
| 243 | raw_output = self.child.before | ||
| 244 | raw_output = self._strip_escape_sequences(raw_output) | ||
| 245 | |||
| 246 | lines = raw_output.replace('\r', '').split('\n') | ||
| 247 | output_lines = [] | ||
| 248 | for i, line in enumerate(lines): | ||
| 249 | stripped = line.strip() | ||
| 250 | if not stripped: | ||
| 251 | continue | ||
| 252 | if i == 0 or (output_lines == [] and cmd[:10] in line): | ||
| 253 | continue | ||
| 254 | output_lines.append(stripped) | ||
| 255 | |||
| 256 | return '\n'.join(output_lines) | ||
| 257 | |||
| 258 | except pexpect.TIMEOUT: | ||
| 259 | print(f"[TIMEOUT] Command '{cmd}' timed out after {timeout}s") | ||
| 260 | return "" | ||
| 261 | |||
| 262 | def run_command_rc(self, cmd, timeout=60): | ||
| 263 | """Run a command and return (output, return_code).""" | ||
| 264 | output = self.run_command(f'{cmd}; echo "RC=$?"', timeout=timeout) | ||
| 265 | rc = 1 | ||
| 266 | lines = output.splitlines() | ||
| 267 | clean_lines = [] | ||
| 268 | for line in lines: | ||
| 269 | m = re.match(r'^RC=(\d+)$', line.strip()) | ||
| 270 | if m: | ||
| 271 | rc = int(m.group(1)) | ||
| 272 | else: | ||
| 273 | clean_lines.append(line) | ||
| 274 | return '\n'.join(clean_lines), rc | ||
| 275 | |||
| 276 | def wait_for_condition(self, check_cmd, success_pattern, timeout=180, | ||
| 277 | interval=10, description="condition"): | ||
| 278 | """Poll a command until output matches pattern or timeout.""" | ||
| 279 | deadline = time.time() + timeout | ||
| 280 | last_output = "" | ||
| 281 | while time.time() < deadline: | ||
| 282 | output = self.run_command(check_cmd, timeout=30) | ||
| 283 | last_output = output | ||
| 284 | if re.search(success_pattern, output): | ||
| 285 | return output | ||
| 286 | remaining = int(deadline - time.time()) | ||
| 287 | print(f" Waiting for {description}... ({remaining}s remaining)") | ||
| 288 | time.sleep(interval) | ||
| 289 | raise TimeoutError( | ||
| 290 | f"Timeout waiting for {description} after {timeout}s. " | ||
| 291 | f"Last output:\n{last_output}") | ||
| 292 | |||
| 293 | def stop(self): | ||
| 294 | """Shutdown the QEMU instance.""" | ||
| 295 | if self.child: | ||
| 296 | try: | ||
| 297 | if self.booted: | ||
| 298 | self.child.sendline('poweroff') | ||
| 299 | time.sleep(2) | ||
| 300 | if self.child.isalive(): | ||
| 301 | self.child.terminate(force=True) | ||
| 302 | except Exception: | ||
| 303 | pass | ||
| 304 | finally: | ||
| 305 | if self.child.logfile_read: | ||
| 306 | self.child.logfile_read.close() | ||
| 307 | self.child = None | ||
| 308 | self.booted = False | ||
| 309 | # Clean up rootfs copy | ||
| 310 | if self._rootfs_copy and Path(self._rootfs_copy).exists(): | ||
| 311 | try: | ||
| 312 | os.unlink(self._rootfs_copy) | ||
| 313 | except OSError: | ||
| 314 | pass | ||
| 315 | |||
| 316 | |||
| 317 | # ============================================================================ | ||
| 318 | # Fixtures | ||
| 319 | # ============================================================================ | ||
| 320 | |||
| 321 | @pytest.fixture(scope="module") | ||
| 322 | def poky_dir(request): | ||
| 323 | """Path to poky directory.""" | ||
| 324 | path = Path(request.config.getoption("--poky-dir")) | ||
| 325 | if not path.exists(): | ||
| 326 | pytest.skip(f"Poky directory not found: {path}") | ||
| 327 | return path | ||
| 328 | |||
| 329 | |||
| 330 | @pytest.fixture(scope="module") | ||
| 331 | def build_dir(request, poky_dir): | ||
| 332 | """Path to build directory.""" | ||
| 333 | bd = request.config.getoption("--build-dir") | ||
| 334 | if bd: | ||
| 335 | path = Path(bd) | ||
| 336 | else: | ||
| 337 | path = poky_dir / "build" | ||
| 338 | if not path.exists(): | ||
| 339 | pytest.skip(f"Build directory not found: {path}") | ||
| 340 | return path | ||
| 341 | |||
| 342 | |||
| 343 | @pytest.fixture(scope="module") | ||
| 344 | def machine(request): | ||
| 345 | """Target machine.""" | ||
| 346 | return request.config.getoption("--machine") | ||
| 347 | |||
| 348 | |||
| 349 | @pytest.fixture(scope="module") | ||
| 350 | def k3s_timeout(request): | ||
| 351 | """K3s readiness timeout.""" | ||
| 352 | return request.config.getoption("--k3s-timeout") | ||
| 353 | |||
| 354 | |||
| 355 | @pytest.fixture(scope="module") | ||
| 356 | def k3s_session(request, poky_dir, build_dir, machine): | ||
| 357 | """ | ||
| 358 | Module-scoped fixture that boots container-image-host once for all | ||
| 359 | single-node k3s tests. Uses runqemu for single-node tests. | ||
| 360 | """ | ||
| 361 | if not PEXPECT_AVAILABLE: | ||
| 362 | pytest.skip("pexpect not installed. Run: pip install pexpect") | ||
| 363 | |||
| 364 | deploy_dir = build_dir / "tmp" / "deploy" / "images" / machine | ||
| 365 | ext4_files = list(deploy_dir.glob("container-image-host-*.rootfs.ext4")) | ||
| 366 | if not ext4_files: | ||
| 367 | pytest.skip( | ||
| 368 | f"container-image-host ext4 image not found in {deploy_dir}") | ||
| 369 | |||
| 370 | timeout = request.config.getoption("--boot-timeout") | ||
| 371 | use_kvm = not request.config.getoption("--no-kvm") | ||
| 372 | |||
| 373 | runner = K3sRunner(poky_dir, build_dir, machine, | ||
| 374 | use_kvm=use_kvm, timeout=timeout, | ||
| 375 | use_runqemu=True, log_suffix="-single") | ||
| 376 | |||
| 377 | try: | ||
| 378 | runner.start() | ||
| 379 | yield runner | ||
| 380 | except RuntimeError as e: | ||
| 381 | pytest.skip(f"Failed to boot image: {e}") | ||
| 382 | finally: | ||
| 383 | runner.stop() | ||
| 384 | |||
| 385 | |||
| 386 | @pytest.fixture(scope="module") | ||
| 387 | def k3s_multinode(request, poky_dir, build_dir, machine): | ||
| 388 | """ | ||
| 389 | Module-scoped fixture that boots two VMs connected via QEMU socket | ||
| 390 | networking for multi-node k3s testing. | ||
| 391 | |||
| 392 | Uses direct QEMU launch (not runqemu) since runqemu can only run | ||
| 393 | one VM at a time. Creates a copy of the rootfs for the agent VM. | ||
| 394 | |||
| 395 | VM1 (server): listens on socket, IP 192.168.50.1/24 | ||
| 396 | VM2 (agent): connects to socket, IP 192.168.50.2/24 | ||
| 397 | """ | ||
| 398 | if not PEXPECT_AVAILABLE: | ||
| 399 | pytest.skip("pexpect not installed. Run: pip install pexpect") | ||
| 400 | |||
| 401 | if machine not in _QEMU_ARCH_CONFIG: | ||
| 402 | pytest.skip( | ||
| 403 | f"Machine '{machine}' not supported for multi-node tests. " | ||
| 404 | f"Supported: {list(_QEMU_ARCH_CONFIG.keys())}") | ||
| 405 | |||
| 406 | deploy_dir = build_dir / "tmp" / "deploy" / "images" / machine | ||
| 407 | ext4_files = sorted( | ||
| 408 | deploy_dir.glob("container-image-host-*.rootfs.ext4"), | ||
| 409 | key=os.path.getmtime) | ||
| 410 | if not ext4_files: | ||
| 411 | pytest.skip( | ||
| 412 | f"container-image-host ext4 image not found in {deploy_dir}") | ||
| 413 | |||
| 414 | rootfs_orig = ext4_files[-1] | ||
| 415 | |||
| 416 | # Create a copy of the rootfs for the agent VM — two VMs can't | ||
| 417 | # share the same ext4 file read-write | ||
| 418 | rootfs_agent = Path(f"/tmp/k3s-agent-rootfs-{os.getpid()}.ext4") | ||
| 419 | print(f"Copying rootfs for agent VM: {rootfs_orig} -> {rootfs_agent}") | ||
| 420 | shutil.copy2(rootfs_orig, rootfs_agent) | ||
| 421 | |||
| 422 | timeout = request.config.getoption("--boot-timeout") | ||
| 423 | use_kvm = not request.config.getoption("--no-kvm") | ||
| 424 | socket_port = _SOCKET_PORT_BASE | ||
| 425 | |||
| 426 | # Server VM: socket listen on second NIC | ||
| 427 | server_params = ( | ||
| 428 | f"-netdev socket,id=vlan0,listen=:{socket_port} " | ||
| 429 | f"-device virtio-net-pci,netdev=vlan0" | ||
| 430 | ) | ||
| 431 | server = K3sRunner(poky_dir, build_dir, machine, | ||
| 432 | use_kvm=use_kvm, timeout=timeout, | ||
| 433 | extra_qemu_params=server_params, | ||
| 434 | use_runqemu=False, | ||
| 435 | rootfs_path=rootfs_orig, | ||
| 436 | log_suffix="-server") | ||
| 437 | |||
| 438 | # Agent VM: socket connect on second NIC, uses rootfs copy | ||
| 439 | agent_params = ( | ||
| 440 | f"-netdev socket,id=vlan0,connect=127.0.0.1:{socket_port} " | ||
| 441 | f"-device virtio-net-pci,netdev=vlan0" | ||
| 442 | ) | ||
| 443 | agent = K3sRunner(poky_dir, build_dir, machine, | ||
| 444 | use_kvm=use_kvm, timeout=timeout, | ||
| 445 | extra_qemu_params=agent_params, | ||
| 446 | use_runqemu=False, | ||
| 447 | rootfs_path=rootfs_agent, | ||
| 448 | log_suffix="-agent") | ||
| 449 | agent._rootfs_copy = str(rootfs_agent) | ||
| 450 | |||
| 451 | try: | ||
| 452 | # Start server first (it listens), then agent | ||
| 453 | server.start() | ||
| 454 | agent.start() | ||
| 455 | |||
| 456 | # Configure static IPs on eth1 (the socket NIC) | ||
| 457 | server.run_command('ip addr add 192.168.50.1/24 dev eth1') | ||
| 458 | server.run_command('ip link set eth1 up') | ||
| 459 | agent.run_command('ip addr add 192.168.50.2/24 dev eth1') | ||
| 460 | agent.run_command('ip link set eth1 up') | ||
| 461 | # Brief pause for link to come up | ||
| 462 | time.sleep(2) | ||
| 463 | |||
| 464 | yield {"server": server, "agent": agent} | ||
| 465 | |||
| 466 | except RuntimeError as e: | ||
| 467 | pytest.skip(f"Failed to boot multi-node VMs: {e}") | ||
| 468 | finally: | ||
| 469 | agent.stop() | ||
| 470 | server.stop() | ||
| 471 | |||
| 472 | |||
| 473 | # ============================================================================ | ||
| 474 | # Phase 1: Single-Node Tests | ||
| 475 | # ============================================================================ | ||
| 476 | |||
| 477 | @pytest.mark.boot | ||
| 478 | @pytest.mark.k3s | ||
| 479 | class TestK3sSingleNode: | ||
| 480 | """Single-node k3s tests on container-image-host.""" | ||
| 481 | |||
| 482 | def test_k3s_boot(self, k3s_session): | ||
| 483 | """Boot image, verify k3s binary exists and service unit is present.""" | ||
| 484 | assert k3s_session.booted, "System failed to boot" | ||
| 485 | |||
| 486 | output = k3s_session.run_command('k3s --version') | ||
| 487 | assert 'k3s' in output.lower(), \ | ||
| 488 | f"k3s --version unexpected output:\n{output}" | ||
| 489 | |||
| 490 | output = k3s_session.run_command( | ||
| 491 | 'systemctl list-unit-files | grep k3s || echo NOT_FOUND') | ||
| 492 | assert 'NOT_FOUND' not in output, \ | ||
| 493 | "k3s systemd unit not found" | ||
| 494 | |||
| 495 | def test_k3s_server_start(self, k3s_session, k3s_timeout): | ||
| 496 | """Start k3s server and wait for node to become Ready.""" | ||
| 497 | # k3s.service should auto-start; ensure it's running | ||
| 498 | k3s_session.run_command('systemctl start k3s 2>&1') | ||
| 499 | |||
| 500 | # Wait for node Ready | ||
| 501 | try: | ||
| 502 | output = k3s_session.wait_for_condition( | ||
| 503 | f'{_KUBECTL} get nodes 2>/dev/null || echo WAITING', | ||
| 504 | r'\bReady\b', | ||
| 505 | timeout=k3s_timeout, | ||
| 506 | interval=15, | ||
| 507 | description="k3s node Ready") | ||
| 508 | except TimeoutError: | ||
| 509 | logs = k3s_session.run_command( | ||
| 510 | 'journalctl -u k3s --no-pager -n 50 2>/dev/null || ' | ||
| 511 | 'echo "no logs"') | ||
| 512 | pytest.fail( | ||
| 513 | f"k3s server did not become Ready within {k3s_timeout}s.\n" | ||
| 514 | f"Logs:\n{logs}") | ||
| 515 | |||
| 516 | def test_k3s_node_ready(self, k3s_session): | ||
| 517 | """Verify exactly 1 node in Ready state.""" | ||
| 518 | output = k3s_session.run_command(f'{_KUBECTL} get nodes 2>&1') | ||
| 519 | ready_lines = [l for l in output.splitlines() | ||
| 520 | if 'Ready' in l and 'NotReady' not in l] | ||
| 521 | assert len(ready_lines) == 1, \ | ||
| 522 | f"Expected 1 Ready node, got {len(ready_lines)}:\n{output}" | ||
| 523 | |||
| 524 | def test_k3s_deploy_pod(self, k3s_session, k3s_timeout): | ||
| 525 | """Deploy a busybox pod and verify it reaches Running state.""" | ||
| 526 | k3s_session.run_command( | ||
| 527 | f'{_KUBECTL} run test-busybox --image=busybox ' | ||
| 528 | f'--restart=Never -- sleep 300 2>&1') | ||
| 529 | |||
| 530 | try: | ||
| 531 | output = k3s_session.wait_for_condition( | ||
| 532 | f'{_KUBECTL} get pod test-busybox 2>/dev/null ' | ||
| 533 | f'|| echo WAITING', | ||
| 534 | r'Running', | ||
| 535 | timeout=k3s_timeout, | ||
| 536 | interval=10, | ||
| 537 | description="test-busybox Running") | ||
| 538 | except TimeoutError: | ||
| 539 | events = k3s_session.run_command( | ||
| 540 | f'{_KUBECTL} describe pod test-busybox 2>&1 | tail -20') | ||
| 541 | output = k3s_session.run_command( | ||
| 542 | f'{_KUBECTL} get pod test-busybox 2>&1') | ||
| 543 | pytest.fail( | ||
| 544 | f"Pod test-busybox did not reach Running:\n{output}\n" | ||
| 545 | f"Events:\n{events}") | ||
| 546 | |||
| 547 | assert 'Running' in output, \ | ||
| 548 | f"Pod not Running:\n{output}" | ||
| 549 | |||
| 550 | def test_k3s_cleanup(self, k3s_session): | ||
| 551 | """Delete the test pod and verify termination.""" | ||
| 552 | k3s_session.run_command( | ||
| 553 | f'{_KUBECTL} delete pod test-busybox --grace-period=5 2>&1') | ||
| 554 | |||
| 555 | try: | ||
| 556 | k3s_session.wait_for_condition( | ||
| 557 | f'{_KUBECTL} get pod test-busybox 2>&1', | ||
| 558 | r'NotFound|not found|No resources', | ||
| 559 | timeout=60, | ||
| 560 | interval=5, | ||
| 561 | description="pod deletion") | ||
| 562 | except TimeoutError: | ||
| 563 | output = k3s_session.run_command( | ||
| 564 | f'{_KUBECTL} get pod test-busybox 2>&1') | ||
| 565 | if 'Terminating' not in output: | ||
| 566 | pytest.fail(f"Pod not cleaned up:\n{output}") | ||
| 567 | |||
| 568 | |||
| 569 | # ============================================================================ | ||
| 570 | # Phase 2: Multi-Node Tests | ||
| 571 | # ============================================================================ | ||
| 572 | |||
| 573 | @pytest.mark.boot | ||
| 574 | @pytest.mark.k3s | ||
| 575 | @pytest.mark.multinode | ||
| 576 | class TestK3sMultiNode: | ||
| 577 | """Multi-node k3s tests using QEMU socket networking.""" | ||
| 578 | |||
| 579 | def test_k3s_multinode_boot(self, k3s_multinode): | ||
| 580 | """Both VMs boot successfully.""" | ||
| 581 | server = k3s_multinode["server"] | ||
| 582 | agent = k3s_multinode["agent"] | ||
| 583 | assert server.booted, "Server VM failed to boot" | ||
| 584 | assert agent.booted, "Agent VM failed to boot" | ||
| 585 | |||
| 586 | output = server.run_command('k3s --version') | ||
| 587 | assert 'k3s' in output.lower() | ||
| 588 | output = agent.run_command('k3s --version') | ||
| 589 | assert 'k3s' in output.lower() | ||
| 590 | |||
| 591 | def test_k3s_multinode_network(self, k3s_multinode): | ||
| 592 | """VMs can ping each other on the socket network (eth1).""" | ||
| 593 | server = k3s_multinode["server"] | ||
| 594 | agent = k3s_multinode["agent"] | ||
| 595 | |||
| 596 | output, rc = server.run_command_rc( | ||
| 597 | 'ping -c 3 -W 5 192.168.50.2') | ||
| 598 | assert rc == 0, \ | ||
| 599 | f"Server cannot ping agent:\n{output}" | ||
| 600 | |||
| 601 | output, rc = agent.run_command_rc( | ||
| 602 | 'ping -c 3 -W 5 192.168.50.1') | ||
| 603 | assert rc == 0, \ | ||
| 604 | f"Agent cannot ping server:\n{output}" | ||
| 605 | |||
| 606 | def test_k3s_agent_join(self, k3s_multinode, k3s_timeout): | ||
| 607 | """Start k3s server on VM1, join agent VM2.""" | ||
| 608 | server = k3s_multinode["server"] | ||
| 609 | agent = k3s_multinode["agent"] | ||
| 610 | |||
| 611 | # Stop default k3s service (auto-started) and start with | ||
| 612 | # multi-node flags binding to the socket network | ||
| 613 | server.run_command('systemctl stop k3s 2>/dev/null') | ||
| 614 | server.run_command( | ||
| 615 | 'k3s server ' | ||
| 616 | '--write-kubeconfig-mode 644 ' | ||
| 617 | '--disable-cloud-controller ' | ||
| 618 | '--node-ip 192.168.50.1 ' | ||
| 619 | '--bind-address 192.168.50.1 ' | ||
| 620 | '--advertise-address 192.168.50.1 ' | ||
| 621 | '--flannel-iface eth1 ' | ||
| 622 | '&>/var/log/k3s-server.log &') | ||
| 623 | |||
| 624 | # Wait for server node Ready | ||
| 625 | try: | ||
| 626 | server.wait_for_condition( | ||
| 627 | f'{_KUBECTL} get nodes 2>/dev/null || echo WAITING', | ||
| 628 | r'\bReady\b', | ||
| 629 | timeout=k3s_timeout, | ||
| 630 | interval=15, | ||
| 631 | description="k3s server node Ready") | ||
| 632 | except TimeoutError: | ||
| 633 | logs = server.run_command( | ||
| 634 | 'tail -50 /var/log/k3s-server.log 2>/dev/null || ' | ||
| 635 | 'echo "no logs"') | ||
| 636 | pytest.fail(f"Server not Ready:\n{logs}") | ||
| 637 | |||
| 638 | # Extract node token | ||
| 639 | token = server.run_command( | ||
| 640 | 'cat /var/lib/rancher/k3s/server/node-token 2>&1') | ||
| 641 | assert token and 'No such file' not in token, \ | ||
| 642 | f"Failed to get node token:\n{token}" | ||
| 643 | token = token.strip().splitlines()[-1].strip() | ||
| 644 | |||
| 645 | # Stop default k3s on agent and start agent mode | ||
| 646 | agent.run_command('systemctl stop k3s 2>/dev/null') | ||
| 647 | agent.run_command( | ||
| 648 | f'k3s agent ' | ||
| 649 | f'--server https://192.168.50.1:6443 ' | ||
| 650 | f'--token {token} ' | ||
| 651 | f'--node-ip 192.168.50.2 ' | ||
| 652 | f'--flannel-iface eth1 ' | ||
| 653 | f'&>/var/log/k3s-agent.log &') | ||
| 654 | |||
| 655 | # Wait for 2 nodes Ready on server | ||
| 656 | try: | ||
| 657 | server.wait_for_condition( | ||
| 658 | f'{_KUBECTL} get nodes 2>/dev/null || echo WAITING', | ||
| 659 | r'(?:Ready.*\n.*Ready|Ready[\s\S]*Ready)', | ||
| 660 | timeout=k3s_timeout, | ||
| 661 | interval=15, | ||
| 662 | description="2 nodes Ready") | ||
| 663 | except TimeoutError: | ||
| 664 | nodes = server.run_command( | ||
| 665 | f'{_KUBECTL} get nodes 2>&1') | ||
| 666 | agent_logs = agent.run_command( | ||
| 667 | 'tail -30 /var/log/k3s-agent.log 2>/dev/null || ' | ||
| 668 | 'echo "no logs"') | ||
| 669 | pytest.fail( | ||
| 670 | f"Agent did not join cluster:\n" | ||
| 671 | f"Nodes:\n{nodes}\n" | ||
| 672 | f"Agent logs:\n{agent_logs}") | ||
| 673 | |||
| 674 | def test_k3s_multinode_ready(self, k3s_multinode): | ||
| 675 | """Verify 2 nodes in Ready state.""" | ||
| 676 | server = k3s_multinode["server"] | ||
| 677 | |||
| 678 | output = server.run_command(f'{_KUBECTL} get nodes 2>&1') | ||
| 679 | ready_lines = [l for l in output.splitlines() | ||
| 680 | if 'Ready' in l and 'NotReady' not in l] | ||
| 681 | assert len(ready_lines) == 2, \ | ||
| 682 | f"Expected 2 Ready nodes, got {len(ready_lines)}:\n{output}" | ||
| 683 | |||
| 684 | def test_k3s_multinode_scheduling(self, k3s_multinode, k3s_timeout): | ||
| 685 | """Deploy 2-replica deployment and verify pods on both nodes.""" | ||
| 686 | server = k3s_multinode["server"] | ||
| 687 | |||
| 688 | server.run_command( | ||
| 689 | f'{_KUBECTL} create deployment test-multi ' | ||
| 690 | f'--image=busybox --replicas=2 ' | ||
| 691 | f'-- sleep 300 2>&1') | ||
| 692 | |||
| 693 | try: | ||
| 694 | output = server.wait_for_condition( | ||
| 695 | f'{_KUBECTL} get pods -l app=test-multi -o wide ' | ||
| 696 | f'2>/dev/null || echo WAITING', | ||
| 697 | r'Running.*\n.*Running', | ||
| 698 | timeout=k3s_timeout, | ||
| 699 | interval=10, | ||
| 700 | description="2 replicas Running") | ||
| 701 | except TimeoutError: | ||
| 702 | output = server.run_command( | ||
| 703 | f'{_KUBECTL} get pods -l app=test-multi -o wide 2>&1') | ||
| 704 | events = server.run_command( | ||
| 705 | f'{_KUBECTL} describe pods -l app=test-multi 2>&1 ' | ||
| 706 | f'| tail -30') | ||
| 707 | if 'Running' in output: | ||
| 708 | print(f"Only partial scheduling achieved:\n{output}") | ||
| 709 | return | ||
| 710 | pytest.fail( | ||
| 711 | f"Replicas not Running:\n{output}\nEvents:\n{events}") | ||
| 712 | |||
| 713 | # Verify pods are on different nodes (best effort) | ||
| 714 | pod_lines = [l for l in output.splitlines() if 'Running' in l] | ||
| 715 | if len(pod_lines) >= 2: | ||
| 716 | nodes = set() | ||
| 717 | for line in pod_lines: | ||
| 718 | parts = line.split() | ||
| 719 | if len(parts) >= 7: | ||
| 720 | nodes.add(parts[6]) | ||
| 721 | if len(nodes) >= 2: | ||
| 722 | print(f"Pods scheduled on {len(nodes)} different nodes") | ||
| 723 | else: | ||
| 724 | print( | ||
| 725 | "Pods on same node " | ||
| 726 | "(acceptable with 2-replica deployment)") | ||
| 727 | |||
| 728 | # Cleanup | ||
| 729 | server.run_command( | ||
| 730 | f'{_KUBECTL} delete deployment test-multi ' | ||
| 731 | f'--grace-period=5 2>&1') | ||
