summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruce Ashfield <bruce.ashfield@gmail.com>2026-04-06 15:49:58 +0000
committerBruce Ashfield <bruce.ashfield@gmail.com>2026-04-06 23:49:24 +0000
commitac89a0c46589c79c6ff583a11e0284e54af6a2cb (patch)
treeff2f5d91f2774ee034b03d0104a4d84022c35920
parent540f1aee43c9694912869ef6d32a570ffbf7c123 (diff)
downloadmeta-virtualization-ac89a0c46589c79c6ff583a11e0284e54af6a2cb.tar.gz
tests: add k3s runtime test suite
Add test_k3s_runtime.py with 10 tests for k3s single-node and multi-node verification: Single-node (5 tests): - Boot, verify k3s binary and service unit - Start k3s server, wait for node Ready - Verify 1 node in Ready state - Deploy a busybox pod, verify Running - Delete pod, verify cleanup Multi-node (5 tests): - Boot 2 VMs via QEMU socket networking - Verify inter-VM ping on socket network - Start k3s server on VM1, join agent on VM2 - Verify 2 nodes Ready - Deploy 2-replica deployment, verify scheduling Uses architecture-aware QEMU configuration (x86-64 and arm64 supported). Multi-node tests launch QEMU directly (not runqemu) to support two concurrent VMs. kubectl commands use KUBECONFIG instead of embedded 'k3s kubectl' which is not available in the Yocto build. Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
-rw-r--r--tests/test_k3s_runtime.py731
1 files changed, 731 insertions, 0 deletions
diff --git a/tests/test_k3s_runtime.py b/tests/test_k3s_runtime.py
new file mode 100644
index 00000000..8b39bfd2
--- /dev/null
+++ b/tests/test_k3s_runtime.py
@@ -0,0 +1,731 @@
1# SPDX-FileCopyrightText: Copyright (C) 2025 Bruce Ashfield
2#
3# SPDX-License-Identifier: MIT
4"""
5K3s runtime tests - boot container-image-host with k3s and verify Kubernetes.
6
7Single-node tests verify k3s server start, node readiness, and basic pod
8deployment. Multi-node tests use QEMU socket networking to connect two VMs
9on a shared L2 segment and verify agent join + multi-node scheduling.
10
11Build prerequisites (in local.conf):
12 require conf/distro/include/meta-virt-host.conf
13 require conf/distro/include/container-host-k3s.conf
14 MACHINE = "qemux86-64" # or qemuarm64
15
16 bitbake container-image-host
17
18Run:
19 # Single-node only
20 pytest tests/test_k3s_runtime.py -v -k "not multinode" --machine qemux86-64
21
22 # Multi-node only
23 pytest tests/test_k3s_runtime.py -v -k "multinode" --machine qemux86-64
24
25 # All tests
26 pytest tests/test_k3s_runtime.py -v --machine qemux86-64
27
28Options:
29 --k3s-timeout Overall k3s readiness timeout (default: 300s)
30 --boot-timeout QEMU boot timeout (default: 120s)
31 --no-kvm Disable KVM acceleration
32
33Notes:
34 - k3s does not embed 'kubectl' as a subcommand in our build.
35 Use 'kubectl' with KUBECONFIG=/etc/rancher/k3s/k3s.yaml instead.
36 - System pods (coredns, traefik) are not auto-deployed because k3s
37 manifest extraction is not yet supported in the Yocto build.
38 - Multi-node tests launch QEMU directly (not via runqemu) to support
39 two concurrent VMs with socket networking. Architecture-specific
40 QEMU parameters are auto-detected from the machine setting.
41"""
42
43import os
44import re
45import shutil
46import time
47import pytest
48from pathlib import Path
49
50try:
51 import pexpect
52 PEXPECT_AVAILABLE = True
53except ImportError:
54 PEXPECT_AVAILABLE = False
55
56
57# Socket networking port base — each test session gets a unique port
58_SOCKET_PORT_BASE = 10000 + os.getpid() % 50000
59
60# kubectl command prefix — sets KUBECONFIG for all kubectl calls
61_KUBECTL = 'KUBECONFIG=/etc/rancher/k3s/k3s.yaml kubectl'
62
63
64# Architecture-specific QEMU parameters
65_QEMU_ARCH_CONFIG = {
66 "qemux86-64": {
67 "qemu_bin": "qemu-system-x86_64",
68 "machine": "-M q35",
69 "cpu_kvm": "-cpu host",
70 "cpu_tcg": "-cpu Skylake-Client",
71 "kernel_name": "bzImage",
72 "console": "ttyS0",
73 "rootdev": "/dev/vda",
74 },
75 "qemuarm64": {
76 "qemu_bin": "qemu-system-aarch64",
77 "machine": "-M virt",
78 "cpu_kvm": "-cpu host",
79 "cpu_tcg": "-cpu cortex-a57",
80 "kernel_name": "Image",
81 "console": "ttyAMA0",
82 "rootdev": "/dev/vda",
83 },
84}
85
86
87class K3sRunner:
88 """
89 Manages a QEMU session for K3s testing.
90
91 Boots container-image-host with optional dual NIC (slirp + socket network)
92 and provides command execution via serial console. Supports both runqemu
93 (single-node) and direct QEMU launch (multi-node).
94 """
95
96 def __init__(self, poky_dir, build_dir, machine, use_kvm=True,
97 timeout=120, image="container-image-host",
98 extra_qemu_params="", log_suffix="",
99 use_runqemu=True, rootfs_path=None):
100 self.poky_dir = Path(poky_dir)
101 self.build_dir = Path(build_dir)
102 self.machine = machine
103 self.use_kvm = use_kvm
104 self.timeout = timeout
105 self.image = image
106 self.extra_qemu_params = extra_qemu_params
107 self.log_suffix = log_suffix
108 self.use_runqemu = use_runqemu
109 self.rootfs_path = rootfs_path
110 self.child = None
111 self.booted = False
112 self._rootfs_copy = None
113
114 def _build_direct_qemu_cmd(self):
115 """Build a direct QEMU command line (not runqemu)."""
116 arch_cfg = _QEMU_ARCH_CONFIG.get(self.machine)
117 if not arch_cfg:
118 raise RuntimeError(
119 f"Unsupported machine '{self.machine}' for direct QEMU. "
120 f"Supported: {list(_QEMU_ARCH_CONFIG.keys())}")
121
122 deploy_dir = self.build_dir / "tmp" / "deploy" / "images" / self.machine
123 kernel = deploy_dir / arch_cfg["kernel_name"]
124 if not kernel.exists():
125 # Try with machine suffix
126 kernels = list(deploy_dir.glob(f"{arch_cfg['kernel_name']}*"))
127 if kernels:
128 kernel = kernels[0]
129 else:
130 raise RuntimeError(f"Kernel not found: {kernel}")
131
132 # Use provided rootfs or find the default
133 rootfs = self.rootfs_path
134 if not rootfs:
135 ext4_files = sorted(deploy_dir.glob(
136 f"{self.image}-*.rootfs.ext4"), key=os.path.getmtime)
137 if not ext4_files:
138 raise RuntimeError(
139 f"No ext4 rootfs found in {deploy_dir}")
140 rootfs = ext4_files[-1]
141
142 cpu = arch_cfg["cpu_kvm"] if self.use_kvm else arch_cfg["cpu_tcg"]
143 kvm_flag = "-enable-kvm" if self.use_kvm else ""
144
145 qemu_params = (
146 f"{arch_cfg['qemu_bin']} {arch_cfg['machine']} {cpu} "
147 f"{kvm_flag} -m 4096 -smp 2 -nographic "
148 f"-kernel {kernel} "
149 f"-drive file={rootfs},format=raw "
150 f"-append 'root={arch_cfg['rootdev']} rw console={arch_cfg['console']} ip=dhcp' "
151 f"-netdev user,id=net0 -device virtio-net-pci,netdev=net0"
152 )
153
154 if self.extra_qemu_params:
155 qemu_params += f" {self.extra_qemu_params}"
156
157 return qemu_params
158
159 def start(self):
160 """Start QEMU and wait for login prompt."""
161 if not PEXPECT_AVAILABLE:
162 raise RuntimeError("pexpect not installed. Run: pip install pexpect")
163
164 if self.use_runqemu:
165 cmd = self._build_runqemu_cmd()
166 else:
167 cmd = self._build_direct_qemu_cmd()
168
169 log_name = f"runqemu-k3s-test{self.log_suffix}.log"
170 print(f"Starting QEMU (K3s{self.log_suffix}): {cmd}")
171 self.child = pexpect.spawn(
172 cmd, encoding='utf-8', timeout=self.timeout)
173 self.child.logfile_read = open(f'/tmp/{log_name}', 'w')
174
175 try:
176 index = self.child.expect([
177 r'login:',
178 r'root@',
179 pexpect.TIMEOUT,
180 pexpect.EOF,
181 ], timeout=self.timeout)
182
183 if index == 0:
184 self.child.sendline('root')
185 self.child.expect([r'root@', r'#', r'\$'], timeout=30)
186 self.booted = True
187 elif index == 1:
188 self.booted = True
189
190 if self.booted:
191 self.child.sendline('export TERM=dumb')
192 self.child.expect(r'root@[^:]+:[^#]+#', timeout=10)
193 # Set KUBECONFIG for all kubectl commands
194 self.child.sendline(
195 'export KUBECONFIG=/etc/rancher/k3s/k3s.yaml')
196 self.child.expect(r'root@[^:]+:[^#]+#', timeout=10)
197
198 if index == 2:
199 raise RuntimeError(
200 f"Timeout waiting for login (>{self.timeout}s)")
201 elif index == 3:
202 raise RuntimeError("QEMU terminated unexpectedly")
203
204 except Exception as e:
205 self.stop()
206 raise RuntimeError(f"Failed to boot image: {e}")
207
208 return self
209
210 def _build_runqemu_cmd(self):
211 """Build a runqemu command line."""
212 kvm_opt = "kvm" if self.use_kvm else ""
213 qemu_params = "-m 4096"
214 if self.extra_qemu_params:
215 qemu_params += f" {self.extra_qemu_params}"
216
217 return (
218 f"bash -c 'cd {self.poky_dir} && "
219 f"source oe-init-build-env {self.build_dir} >/dev/null 2>&1 && "
220 f"runqemu {self.machine} {self.image} ext4 nographic slirp "
221 f"{kvm_opt} "
222 f"qemuparams=\"{qemu_params}\"'"
223 )
224
225 @staticmethod
226 def _strip_escape_sequences(text):
227 """Strip ANSI and OSC escape sequences from terminal output."""
228 text = re.sub(r'\x1b\][^\x1b\x07]*(?:\x1b\\|\x07)', '', text)
229 text = re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)
230 text = re.sub(r'\x1b[^[\]].?', '', text)
231 return text
232
233 def run_command(self, cmd, timeout=60):
234 """Run a command and return the output."""
235 if not self.booted:
236 raise RuntimeError("System not booted")
237
238 time.sleep(0.3)
239 self.child.sendline(cmd)
240
241 try:
242 self.child.expect(r'root@[^:]+:[^#]+#', timeout=timeout)
243 raw_output = self.child.before
244 raw_output = self._strip_escape_sequences(raw_output)
245
246 lines = raw_output.replace('\r', '').split('\n')
247 output_lines = []
248 for i, line in enumerate(lines):
249 stripped = line.strip()
250 if not stripped:
251 continue
252 if i == 0 or (output_lines == [] and cmd[:10] in line):
253 continue
254 output_lines.append(stripped)
255
256 return '\n'.join(output_lines)
257
258 except pexpect.TIMEOUT:
259 print(f"[TIMEOUT] Command '{cmd}' timed out after {timeout}s")
260 return ""
261
262 def run_command_rc(self, cmd, timeout=60):
263 """Run a command and return (output, return_code)."""
264 output = self.run_command(f'{cmd}; echo "RC=$?"', timeout=timeout)
265 rc = 1
266 lines = output.splitlines()
267 clean_lines = []
268 for line in lines:
269 m = re.match(r'^RC=(\d+)$', line.strip())
270 if m:
271 rc = int(m.group(1))
272 else:
273 clean_lines.append(line)
274 return '\n'.join(clean_lines), rc
275
276 def wait_for_condition(self, check_cmd, success_pattern, timeout=180,
277 interval=10, description="condition"):
278 """Poll a command until output matches pattern or timeout."""
279 deadline = time.time() + timeout
280 last_output = ""
281 while time.time() < deadline:
282 output = self.run_command(check_cmd, timeout=30)
283 last_output = output
284 if re.search(success_pattern, output):
285 return output
286 remaining = int(deadline - time.time())
287 print(f" Waiting for {description}... ({remaining}s remaining)")
288 time.sleep(interval)
289 raise TimeoutError(
290 f"Timeout waiting for {description} after {timeout}s. "
291 f"Last output:\n{last_output}")
292
293 def stop(self):
294 """Shutdown the QEMU instance."""
295 if self.child:
296 try:
297 if self.booted:
298 self.child.sendline('poweroff')
299 time.sleep(2)
300 if self.child.isalive():
301 self.child.terminate(force=True)
302 except Exception:
303 pass
304 finally:
305 if self.child.logfile_read:
306 self.child.logfile_read.close()
307 self.child = None
308 self.booted = False
309 # Clean up rootfs copy
310 if self._rootfs_copy and Path(self._rootfs_copy).exists():
311 try:
312 os.unlink(self._rootfs_copy)
313 except OSError:
314 pass
315
316
317# ============================================================================
318# Fixtures
319# ============================================================================
320
321@pytest.fixture(scope="module")
322def poky_dir(request):
323 """Path to poky directory."""
324 path = Path(request.config.getoption("--poky-dir"))
325 if not path.exists():
326 pytest.skip(f"Poky directory not found: {path}")
327 return path
328
329
330@pytest.fixture(scope="module")
331def build_dir(request, poky_dir):
332 """Path to build directory."""
333 bd = request.config.getoption("--build-dir")
334 if bd:
335 path = Path(bd)
336 else:
337 path = poky_dir / "build"
338 if not path.exists():
339 pytest.skip(f"Build directory not found: {path}")
340 return path
341
342
343@pytest.fixture(scope="module")
344def machine(request):
345 """Target machine."""
346 return request.config.getoption("--machine")
347
348
349@pytest.fixture(scope="module")
350def k3s_timeout(request):
351 """K3s readiness timeout."""
352 return request.config.getoption("--k3s-timeout")
353
354
355@pytest.fixture(scope="module")
356def k3s_session(request, poky_dir, build_dir, machine):
357 """
358 Module-scoped fixture that boots container-image-host once for all
359 single-node k3s tests. Uses runqemu for single-node tests.
360 """
361 if not PEXPECT_AVAILABLE:
362 pytest.skip("pexpect not installed. Run: pip install pexpect")
363
364 deploy_dir = build_dir / "tmp" / "deploy" / "images" / machine
365 ext4_files = list(deploy_dir.glob("container-image-host-*.rootfs.ext4"))
366 if not ext4_files:
367 pytest.skip(
368 f"container-image-host ext4 image not found in {deploy_dir}")
369
370 timeout = request.config.getoption("--boot-timeout")
371 use_kvm = not request.config.getoption("--no-kvm")
372
373 runner = K3sRunner(poky_dir, build_dir, machine,
374 use_kvm=use_kvm, timeout=timeout,
375 use_runqemu=True, log_suffix="-single")
376
377 try:
378 runner.start()
379 yield runner
380 except RuntimeError as e:
381 pytest.skip(f"Failed to boot image: {e}")
382 finally:
383 runner.stop()
384
385
386@pytest.fixture(scope="module")
387def k3s_multinode(request, poky_dir, build_dir, machine):
388 """
389 Module-scoped fixture that boots two VMs connected via QEMU socket
390 networking for multi-node k3s testing.
391
392 Uses direct QEMU launch (not runqemu) since runqemu can only run
393 one VM at a time. Creates a copy of the rootfs for the agent VM.
394
395 VM1 (server): listens on socket, IP 192.168.50.1/24
396 VM2 (agent): connects to socket, IP 192.168.50.2/24
397 """
398 if not PEXPECT_AVAILABLE:
399 pytest.skip("pexpect not installed. Run: pip install pexpect")
400
401 if machine not in _QEMU_ARCH_CONFIG:
402 pytest.skip(
403 f"Machine '{machine}' not supported for multi-node tests. "
404 f"Supported: {list(_QEMU_ARCH_CONFIG.keys())}")
405
406 deploy_dir = build_dir / "tmp" / "deploy" / "images" / machine
407 ext4_files = sorted(
408 deploy_dir.glob("container-image-host-*.rootfs.ext4"),
409 key=os.path.getmtime)
410 if not ext4_files:
411 pytest.skip(
412 f"container-image-host ext4 image not found in {deploy_dir}")
413
414 rootfs_orig = ext4_files[-1]
415
416 # Create a copy of the rootfs for the agent VM — two VMs can't
417 # share the same ext4 file read-write
418 rootfs_agent = Path(f"/tmp/k3s-agent-rootfs-{os.getpid()}.ext4")
419 print(f"Copying rootfs for agent VM: {rootfs_orig} -> {rootfs_agent}")
420 shutil.copy2(rootfs_orig, rootfs_agent)
421
422 timeout = request.config.getoption("--boot-timeout")
423 use_kvm = not request.config.getoption("--no-kvm")
424 socket_port = _SOCKET_PORT_BASE
425
426 # Server VM: socket listen on second NIC
427 server_params = (
428 f"-netdev socket,id=vlan0,listen=:{socket_port} "
429 f"-device virtio-net-pci,netdev=vlan0"
430 )
431 server = K3sRunner(poky_dir, build_dir, machine,
432 use_kvm=use_kvm, timeout=timeout,
433 extra_qemu_params=server_params,
434 use_runqemu=False,
435 rootfs_path=rootfs_orig,
436 log_suffix="-server")
437
438 # Agent VM: socket connect on second NIC, uses rootfs copy
439 agent_params = (
440 f"-netdev socket,id=vlan0,connect=127.0.0.1:{socket_port} "
441 f"-device virtio-net-pci,netdev=vlan0"
442 )
443 agent = K3sRunner(poky_dir, build_dir, machine,
444 use_kvm=use_kvm, timeout=timeout,
445 extra_qemu_params=agent_params,
446 use_runqemu=False,
447 rootfs_path=rootfs_agent,
448 log_suffix="-agent")
449 agent._rootfs_copy = str(rootfs_agent)
450
451 try:
452 # Start server first (it listens), then agent
453 server.start()
454 agent.start()
455
456 # Configure static IPs on eth1 (the socket NIC)
457 server.run_command('ip addr add 192.168.50.1/24 dev eth1')
458 server.run_command('ip link set eth1 up')
459 agent.run_command('ip addr add 192.168.50.2/24 dev eth1')
460 agent.run_command('ip link set eth1 up')
461 # Brief pause for link to come up
462 time.sleep(2)
463
464 yield {"server": server, "agent": agent}
465
466 except RuntimeError as e:
467 pytest.skip(f"Failed to boot multi-node VMs: {e}")
468 finally:
469 agent.stop()
470 server.stop()
471
472
473# ============================================================================
474# Phase 1: Single-Node Tests
475# ============================================================================
476
477@pytest.mark.boot
478@pytest.mark.k3s
479class TestK3sSingleNode:
480 """Single-node k3s tests on container-image-host."""
481
482 def test_k3s_boot(self, k3s_session):
483 """Boot image, verify k3s binary exists and service unit is present."""
484 assert k3s_session.booted, "System failed to boot"
485
486 output = k3s_session.run_command('k3s --version')
487 assert 'k3s' in output.lower(), \
488 f"k3s --version unexpected output:\n{output}"
489
490 output = k3s_session.run_command(
491 'systemctl list-unit-files | grep k3s || echo NOT_FOUND')
492 assert 'NOT_FOUND' not in output, \
493 "k3s systemd unit not found"
494
495 def test_k3s_server_start(self, k3s_session, k3s_timeout):
496 """Start k3s server and wait for node to become Ready."""
497 # k3s.service should auto-start; ensure it's running
498 k3s_session.run_command('systemctl start k3s 2>&1')
499
500 # Wait for node Ready
501 try:
502 output = k3s_session.wait_for_condition(
503 f'{_KUBECTL} get nodes 2>/dev/null || echo WAITING',
504 r'\bReady\b',
505 timeout=k3s_timeout,
506 interval=15,
507 description="k3s node Ready")
508 except TimeoutError:
509 logs = k3s_session.run_command(
510 'journalctl -u k3s --no-pager -n 50 2>/dev/null || '
511 'echo "no logs"')
512 pytest.fail(
513 f"k3s server did not become Ready within {k3s_timeout}s.\n"
514 f"Logs:\n{logs}")
515
516 def test_k3s_node_ready(self, k3s_session):
517 """Verify exactly 1 node in Ready state."""
518 output = k3s_session.run_command(f'{_KUBECTL} get nodes 2>&1')
519 ready_lines = [l for l in output.splitlines()
520 if 'Ready' in l and 'NotReady' not in l]
521 assert len(ready_lines) == 1, \
522 f"Expected 1 Ready node, got {len(ready_lines)}:\n{output}"
523
524 def test_k3s_deploy_pod(self, k3s_session, k3s_timeout):
525 """Deploy a busybox pod and verify it reaches Running state."""
526 k3s_session.run_command(
527 f'{_KUBECTL} run test-busybox --image=busybox '
528 f'--restart=Never -- sleep 300 2>&1')
529
530 try:
531 output = k3s_session.wait_for_condition(
532 f'{_KUBECTL} get pod test-busybox 2>/dev/null '
533 f'|| echo WAITING',
534 r'Running',
535 timeout=k3s_timeout,
536 interval=10,
537 description="test-busybox Running")
538 except TimeoutError:
539 events = k3s_session.run_command(
540 f'{_KUBECTL} describe pod test-busybox 2>&1 | tail -20')
541 output = k3s_session.run_command(
542 f'{_KUBECTL} get pod test-busybox 2>&1')
543 pytest.fail(
544 f"Pod test-busybox did not reach Running:\n{output}\n"
545 f"Events:\n{events}")
546
547 assert 'Running' in output, \
548 f"Pod not Running:\n{output}"
549
550 def test_k3s_cleanup(self, k3s_session):
551 """Delete the test pod and verify termination."""
552 k3s_session.run_command(
553 f'{_KUBECTL} delete pod test-busybox --grace-period=5 2>&1')
554
555 try:
556 k3s_session.wait_for_condition(
557 f'{_KUBECTL} get pod test-busybox 2>&1',
558 r'NotFound|not found|No resources',
559 timeout=60,
560 interval=5,
561 description="pod deletion")
562 except TimeoutError:
563 output = k3s_session.run_command(
564 f'{_KUBECTL} get pod test-busybox 2>&1')
565 if 'Terminating' not in output:
566 pytest.fail(f"Pod not cleaned up:\n{output}")
567
568
569# ============================================================================
570# Phase 2: Multi-Node Tests
571# ============================================================================
572
573@pytest.mark.boot
574@pytest.mark.k3s
575@pytest.mark.multinode
576class TestK3sMultiNode:
577 """Multi-node k3s tests using QEMU socket networking."""
578
579 def test_k3s_multinode_boot(self, k3s_multinode):
580 """Both VMs boot successfully."""
581 server = k3s_multinode["server"]
582 agent = k3s_multinode["agent"]
583 assert server.booted, "Server VM failed to boot"
584 assert agent.booted, "Agent VM failed to boot"
585
586 output = server.run_command('k3s --version')
587 assert 'k3s' in output.lower()
588 output = agent.run_command('k3s --version')
589 assert 'k3s' in output.lower()
590
591 def test_k3s_multinode_network(self, k3s_multinode):
592 """VMs can ping each other on the socket network (eth1)."""
593 server = k3s_multinode["server"]
594 agent = k3s_multinode["agent"]
595
596 output, rc = server.run_command_rc(
597 'ping -c 3 -W 5 192.168.50.2')
598 assert rc == 0, \
599 f"Server cannot ping agent:\n{output}"
600
601 output, rc = agent.run_command_rc(
602 'ping -c 3 -W 5 192.168.50.1')
603 assert rc == 0, \
604 f"Agent cannot ping server:\n{output}"
605
606 def test_k3s_agent_join(self, k3s_multinode, k3s_timeout):
607 """Start k3s server on VM1, join agent VM2."""
608 server = k3s_multinode["server"]
609 agent = k3s_multinode["agent"]
610
611 # Stop default k3s service (auto-started) and start with
612 # multi-node flags binding to the socket network
613 server.run_command('systemctl stop k3s 2>/dev/null')
614 server.run_command(
615 'k3s server '
616 '--write-kubeconfig-mode 644 '
617 '--disable-cloud-controller '
618 '--node-ip 192.168.50.1 '
619 '--bind-address 192.168.50.1 '
620 '--advertise-address 192.168.50.1 '
621 '--flannel-iface eth1 '
622 '&>/var/log/k3s-server.log &')
623
624 # Wait for server node Ready
625 try:
626 server.wait_for_condition(
627 f'{_KUBECTL} get nodes 2>/dev/null || echo WAITING',
628 r'\bReady\b',
629 timeout=k3s_timeout,
630 interval=15,
631 description="k3s server node Ready")
632 except TimeoutError:
633 logs = server.run_command(
634 'tail -50 /var/log/k3s-server.log 2>/dev/null || '
635 'echo "no logs"')
636 pytest.fail(f"Server not Ready:\n{logs}")
637
638 # Extract node token
639 token = server.run_command(
640 'cat /var/lib/rancher/k3s/server/node-token 2>&1')
641 assert token and 'No such file' not in token, \
642 f"Failed to get node token:\n{token}"
643 token = token.strip().splitlines()[-1].strip()
644
645 # Stop default k3s on agent and start agent mode
646 agent.run_command('systemctl stop k3s 2>/dev/null')
647 agent.run_command(
648 f'k3s agent '
649 f'--server https://192.168.50.1:6443 '
650 f'--token {token} '
651 f'--node-ip 192.168.50.2 '
652 f'--flannel-iface eth1 '
653 f'&>/var/log/k3s-agent.log &')
654
655 # Wait for 2 nodes Ready on server
656 try:
657 server.wait_for_condition(
658 f'{_KUBECTL} get nodes 2>/dev/null || echo WAITING',
659 r'(?:Ready.*\n.*Ready|Ready[\s\S]*Ready)',
660 timeout=k3s_timeout,
661 interval=15,
662 description="2 nodes Ready")
663 except TimeoutError:
664 nodes = server.run_command(
665 f'{_KUBECTL} get nodes 2>&1')
666 agent_logs = agent.run_command(
667 'tail -30 /var/log/k3s-agent.log 2>/dev/null || '
668 'echo "no logs"')
669 pytest.fail(
670 f"Agent did not join cluster:\n"
671 f"Nodes:\n{nodes}\n"
672 f"Agent logs:\n{agent_logs}")
673
674 def test_k3s_multinode_ready(self, k3s_multinode):
675 """Verify 2 nodes in Ready state."""
676 server = k3s_multinode["server"]
677
678 output = server.run_command(f'{_KUBECTL} get nodes 2>&1')
679 ready_lines = [l for l in output.splitlines()
680 if 'Ready' in l and 'NotReady' not in l]
681 assert len(ready_lines) == 2, \
682 f"Expected 2 Ready nodes, got {len(ready_lines)}:\n{output}"
683
684 def test_k3s_multinode_scheduling(self, k3s_multinode, k3s_timeout):
685 """Deploy 2-replica deployment and verify pods on both nodes."""
686 server = k3s_multinode["server"]
687
688 server.run_command(
689 f'{_KUBECTL} create deployment test-multi '
690 f'--image=busybox --replicas=2 '
691 f'-- sleep 300 2>&1')
692
693 try:
694 output = server.wait_for_condition(
695 f'{_KUBECTL} get pods -l app=test-multi -o wide '
696 f'2>/dev/null || echo WAITING',
697 r'Running.*\n.*Running',
698 timeout=k3s_timeout,
699 interval=10,
700 description="2 replicas Running")
701 except TimeoutError:
702 output = server.run_command(
703 f'{_KUBECTL} get pods -l app=test-multi -o wide 2>&1')
704 events = server.run_command(
705 f'{_KUBECTL} describe pods -l app=test-multi 2>&1 '
706 f'| tail -30')
707 if 'Running' in output:
708 print(f"Only partial scheduling achieved:\n{output}")
709 return
710 pytest.fail(
711 f"Replicas not Running:\n{output}\nEvents:\n{events}")
712
713 # Verify pods are on different nodes (best effort)
714 pod_lines = [l for l in output.splitlines() if 'Running' in l]
715 if len(pod_lines) >= 2:
716 nodes = set()
717 for line in pod_lines:
718 parts = line.split()
719 if len(parts) >= 7:
720 nodes.add(parts[6])
721 if len(nodes) >= 2:
722 print(f"Pods scheduled on {len(nodes)} different nodes")
723 else:
724 print(
725 "Pods on same node "
726 "(acceptable with 2-replica deployment)")
727
728 # Cleanup
729 server.run_command(
730 f'{_KUBECTL} delete deployment test-multi '
731 f'--grace-period=5 2>&1')