diff options
| author | Bruce Ashfield <bruce.ashfield@gmail.com> | 2026-01-23 22:22:57 -0500 |
|---|---|---|
| committer | Bruce Ashfield <bruce.ashfield@gmail.com> | 2026-02-09 03:34:12 +0000 |
| commit | 63c4d74172fc80919d8b93a65c575e485ed463ed (patch) | |
| tree | 3bc9025d29c1b825501d95bd57a7c72082403a02 | |
| parent | d242468eef981131f19d20a3c98130eaef84fe88 (diff) | |
| download | meta-virtualization-63c4d74172fc80919d8b93a65c575e485ed463ed.tar.gz | |
vcontainer: add host-side idle timeout with QMP shutdown
Implement reliable idle timeout for vmemres daemon mode using
host-side monitoring with QMP-based shutdown, and container-aware
idle detection via virtio-9p shared file.
Host-side changes (vrunner.sh):
- Add -no-reboot flag to QEMU for clean exit semantics
- Spawn background watchdog when daemon starts
- Watchdog monitors activity file timestamp
- Check interval scales to idle timeout (timeout/5, clamped 10-60s)
- Read container status from shared file (guest writes via virtio-9p)
- Only shutdown if no containers are running
- Send QMP "quit" command for graceful shutdown
- Watchdog auto-exits if QEMU dies (no zombie processes)
- Touch activity file in daemon_send() for user activity tracking
Config changes (vcontainer-common.sh):
- Add idle-timeout to build_runner_args() so it's always passed
Guest-side changes (vcontainer-init-common.sh):
- Add watchdog that writes container status to /mnt/share/.containers_running
- Host reads this file instead of socket commands (avoids output corruption)
- Close inherited virtio-serial fd 3 in watchdog subshell to prevent leaks
- Guest-side shutdown logic preserved but disabled (QMP more reliable)
- Handle Yocto read-only-rootfs volatile directories (/var/volatile)
The shared file approach avoids sending container check commands through
the daemon socket, which previously caused output corruption on the
single-stream virtio-serial channel.
The idle timeout is configurable via: vdkr vconfig idle-timeout <secs>
Default: 1800 seconds (30 minutes)
Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
3 files changed, 154 insertions, 20 deletions
diff --git a/recipes-containers/vcontainer/files/vcontainer-common.sh b/recipes-containers/vcontainer/files/vcontainer-common.sh index 39e57b13..792bd7a2 100755 --- a/recipes-containers/vcontainer/files/vcontainer-common.sh +++ b/recipes-containers/vcontainer/files/vcontainer-common.sh | |||
| @@ -737,6 +737,10 @@ build_runner_args() { | |||
| 737 | [ -n "$INPUT_STORAGE" ] && args+=("--input-storage" "$INPUT_STORAGE") | 737 | [ -n "$INPUT_STORAGE" ] && args+=("--input-storage" "$INPUT_STORAGE") |
| 738 | [ "$DISABLE_KVM" = "true" ] && args+=("--no-kvm") | 738 | [ "$DISABLE_KVM" = "true" ] && args+=("--no-kvm") |
| 739 | 739 | ||
| 740 | # Add idle timeout from config | ||
| 741 | local idle_timeout=$(config_get "idle-timeout" "1800") | ||
| 742 | args+=("--idle-timeout" "$idle_timeout") | ||
| 743 | |||
| 740 | # Add port forwards (each -p adds a --port-forward) | 744 | # Add port forwards (each -p adds a --port-forward) |
| 741 | for pf in "${PORT_FORWARDS[@]}"; do | 745 | for pf in "${PORT_FORWARDS[@]}"; do |
| 742 | args+=("--port-forward" "$pf") | 746 | args+=("--port-forward" "$pf") |
| @@ -1088,10 +1092,9 @@ run_runtime_command() { | |||
| 1088 | # Check if auto-daemon is enabled | 1092 | # Check if auto-daemon is enabled |
| 1089 | local auto_daemon=$(config_get "auto-daemon" "true") | 1093 | local auto_daemon=$(config_get "auto-daemon" "true") |
| 1090 | if [ "$auto_daemon" = "true" ]; then | 1094 | if [ "$auto_daemon" = "true" ]; then |
| 1091 | # Auto-start daemon | 1095 | # Auto-start daemon (idle-timeout is included in runner_args) |
| 1092 | echo -e "${CYAN}[$VCONTAINER_RUNTIME_NAME]${NC} Starting daemon..." >&2 | 1096 | echo -e "${CYAN}[$VCONTAINER_RUNTIME_NAME]${NC} Starting daemon..." >&2 |
| 1093 | local idle_timeout=$(config_get "idle-timeout" "1800") | 1097 | "$RUNNER" $runner_args --daemon-start |
| 1094 | "$RUNNER" $runner_args --idle-timeout "$idle_timeout" --daemon-start | ||
| 1095 | 1098 | ||
| 1096 | if daemon_is_running; then | 1099 | if daemon_is_running; then |
| 1097 | # Fresh daemon has no port forwards - clear stale registry | 1100 | # Fresh daemon has no port forwards - clear stale registry |
| @@ -1136,10 +1139,9 @@ run_runtime_command_with_input() { | |||
| 1136 | # Check if auto-daemon is enabled | 1139 | # Check if auto-daemon is enabled |
| 1137 | local auto_daemon=$(config_get "auto-daemon" "true") | 1140 | local auto_daemon=$(config_get "auto-daemon" "true") |
| 1138 | if [ "$auto_daemon" = "true" ]; then | 1141 | if [ "$auto_daemon" = "true" ]; then |
| 1139 | # Auto-start daemon | 1142 | # Auto-start daemon (idle-timeout is included in runner_args) |
| 1140 | echo -e "${CYAN}[$VCONTAINER_RUNTIME_NAME]${NC} Starting daemon..." >&2 | 1143 | echo -e "${CYAN}[$VCONTAINER_RUNTIME_NAME]${NC} Starting daemon..." >&2 |
| 1141 | local idle_timeout=$(config_get "idle-timeout" "1800") | 1144 | "$RUNNER" $runner_args --daemon-start |
| 1142 | "$RUNNER" $runner_args --idle-timeout "$idle_timeout" --daemon-start | ||
| 1143 | 1145 | ||
| 1144 | if daemon_is_running; then | 1146 | if daemon_is_running; then |
| 1145 | # Fresh daemon has no port forwards - clear stale registry | 1147 | # Fresh daemon has no port forwards - clear stale registry |
diff --git a/recipes-containers/vcontainer/files/vcontainer-init-common.sh b/recipes-containers/vcontainer/files/vcontainer-init-common.sh index 619e334a..fe488ae2 100755 --- a/recipes-containers/vcontainer/files/vcontainer-init-common.sh +++ b/recipes-containers/vcontainer/files/vcontainer-init-common.sh | |||
| @@ -53,6 +53,15 @@ mount_tmpfs_dirs() { | |||
| 53 | mount -t tmpfs tmpfs /tmp | 53 | mount -t tmpfs tmpfs /tmp |
| 54 | mount -t tmpfs tmpfs /run | 54 | mount -t tmpfs tmpfs /run |
| 55 | mount -t tmpfs tmpfs /mnt | 55 | mount -t tmpfs tmpfs /mnt |
| 56 | |||
| 57 | # Handle Yocto read-only-rootfs volatile directories | ||
| 58 | # /var/log and /var/tmp are symlinks to volatile/log and volatile/tmp | ||
| 59 | if [ -d /var/volatile ]; then | ||
| 60 | mount -t tmpfs tmpfs /var/volatile | ||
| 61 | mkdir -p /var/volatile/log /var/volatile/tmp | ||
| 62 | fi | ||
| 63 | |||
| 64 | # Fallback for non-volatile layouts | ||
| 56 | mount -t tmpfs tmpfs /var/run 2>/dev/null || true | 65 | mount -t tmpfs tmpfs /var/run 2>/dev/null || true |
| 57 | mount -t tmpfs tmpfs /var/tmp 2>/dev/null || true | 66 | mount -t tmpfs tmpfs /var/tmp 2>/dev/null || true |
| 58 | 67 | ||
| @@ -330,13 +339,81 @@ run_daemon_mode() { | |||
| 330 | 339 | ||
| 331 | log "Daemon ready, waiting for commands..." | 340 | log "Daemon ready, waiting for commands..." |
| 332 | 341 | ||
| 333 | # Command loop with idle timeout | 342 | # Start idle timeout watchdog |
| 343 | # Note: 'read -t' doesn't work reliably on non-terminal fds (like virtio-serial), | ||
| 344 | # so we use a background watchdog that tracks activity via a timestamp file. | ||
| 345 | ACTIVITY_FILE="/tmp/.daemon_activity" | ||
| 346 | touch "$ACTIVITY_FILE" | ||
| 347 | DAEMON_PID=$$ | ||
| 348 | |||
| 349 | # Watchdog process - writes container status to shared directory for host-side | ||
| 350 | # Host-side handles shutdown via QMP; guest-side shutdown is disabled but preserved | ||
| 351 | CONTAINER_STATUS_FILE="/mnt/share/.containers_running" | ||
| 352 | |||
| 353 | # Scale check interval to idle timeout (check ~5 times before timeout) | ||
| 354 | CHECK_INTERVAL=$((RUNTIME_IDLE_TIMEOUT / 5)) | ||
| 355 | [ "$CHECK_INTERVAL" -lt 10 ] && CHECK_INTERVAL=10 | ||
| 356 | [ "$CHECK_INTERVAL" -gt 60 ] && CHECK_INTERVAL=60 | ||
| 357 | |||
| 358 | ( | ||
| 359 | # Close inherited virtio-serial fd to prevent output leaking to host | ||
| 360 | exec 3>&- | ||
| 361 | |||
| 362 | while true; do | ||
| 363 | sleep "$CHECK_INTERVAL" | ||
| 364 | if [ ! -f "$ACTIVITY_FILE" ]; then | ||
| 365 | # Activity file removed = clean shutdown in progress | ||
| 366 | rm -f "$CONTAINER_STATUS_FILE" 2>/dev/null | ||
| 367 | exit 0 | ||
| 368 | fi | ||
| 369 | |||
| 370 | # Check for running containers and write status to shared file | ||
| 371 | # Host-side reads this file instead of sending socket commands | ||
| 372 | RUNNING=$("$VCONTAINER_RUNTIME_CMD" ps -q 2>/dev/null) | ||
| 373 | if [ -n "$RUNNING" ]; then | ||
| 374 | echo "$RUNNING" > "$CONTAINER_STATUS_FILE" 2>/dev/null || true | ||
| 375 | else | ||
| 376 | rm -f "$CONTAINER_STATUS_FILE" 2>/dev/null || true | ||
| 377 | fi | ||
| 378 | |||
| 379 | # Guest-side shutdown logic - DISABLED, host-side QMP is more reliable | ||
| 380 | # Kept for potential future use if host-side becomes unavailable | ||
| 381 | : << 'DISABLED_GUEST_SHUTDOWN' | ||
| 382 | LAST_ACTIVITY=$(stat -c %Y "$ACTIVITY_FILE" 2>/dev/null || echo 0) | ||
| 383 | NOW=$(date +%s) | ||
| 384 | IDLE_SECONDS=$((NOW - LAST_ACTIVITY)) | ||
| 385 | if [ "$IDLE_SECONDS" -ge "$RUNTIME_IDLE_TIMEOUT" ]; then | ||
| 386 | if [ -n "$RUNNING" ]; then | ||
| 387 | # Containers are running - reset activity and skip shutdown | ||
| 388 | echo "[watchdog] Containers still running, resetting idle timer" >> /dev/kmsg 2>/dev/null || true | ||
| 389 | touch "$ACTIVITY_FILE" | ||
| 390 | continue | ||
| 391 | fi | ||
| 392 | echo "[watchdog] Idle timeout (${IDLE_SECONDS}s >= ${RUNTIME_IDLE_TIMEOUT}s), no containers running, shutting down..." >> /dev/kmsg 2>/dev/null || true | ||
| 393 | rm -f "$CONTAINER_STATUS_FILE" 2>/dev/null | ||
| 394 | kill -TERM "$DAEMON_PID" 2>/dev/null | ||
| 395 | exit 0 | ||
| 396 | fi | ||
| 397 | DISABLED_GUEST_SHUTDOWN | ||
| 398 | done | ||
| 399 | ) & | ||
| 400 | WATCHDOG_PID=$! | ||
| 401 | log "Started idle watchdog (PID: $WATCHDOG_PID, timeout: ${RUNTIME_IDLE_TIMEOUT}s)" | ||
| 402 | |||
| 403 | # Trap to clean up watchdog on exit and power off VM | ||
| 404 | # Use reboot -f which works with QEMU's -no-reboot flag to exit cleanly | ||
| 405 | trap 'log "Idle timeout triggered by watchdog"; log "Calling reboot -f"; sync; /usr/sbin/reboot -f' TERM | ||
| 406 | trap 'rm -f "$ACTIVITY_FILE"; kill $WATCHDOG_PID 2>/dev/null; exit' INT | ||
| 407 | |||
| 408 | # Command loop | ||
| 334 | while true; do | 409 | while true; do |
| 335 | CMD_B64="" | 410 | CMD_B64="" |
| 336 | read -t "$RUNTIME_IDLE_TIMEOUT" -r CMD_B64 <&3 | 411 | read -r CMD_B64 <&3 |
| 337 | READ_EXIT=$? | 412 | READ_EXIT=$? |
| 338 | 413 | ||
| 339 | if [ $READ_EXIT -eq 0 ]; then | 414 | if [ $READ_EXIT -eq 0 ] && [ -n "$CMD_B64" ]; then |
| 415 | # Update activity timestamp | ||
| 416 | touch "$ACTIVITY_FILE" | ||
| 340 | log "Received: '$CMD_B64'" | 417 | log "Received: '$CMD_B64'" |
| 341 | # Handle special commands | 418 | # Handle special commands |
| 342 | case "$CMD_B64" in | 419 | case "$CMD_B64" in |
| @@ -450,19 +527,15 @@ run_daemon_mode() { | |||
| 450 | 527 | ||
| 451 | log "Command completed (exit code: $EXEC_EXIT_CODE)" | 528 | log "Command completed (exit code: $EXEC_EXIT_CODE)" |
| 452 | else | 529 | else |
| 453 | # Read returned non-zero: either timeout or EOF | 530 | # Read returned non-zero or empty - host closed connection or EOF |
| 454 | # Timeout returns >128 (typically 142), EOF returns 1 | 531 | # Idle timeout is handled by the watchdog process |
| 455 | if [ $READ_EXIT -gt 128 ]; then | ||
| 456 | # Actual timeout - shut down | ||
| 457 | log "Idle timeout (${RUNTIME_IDLE_TIMEOUT}s), shutting down..." | ||
| 458 | echo "===IDLE_SHUTDOWN===" | cat >&3 | ||
| 459 | break | ||
| 460 | fi | ||
| 461 | # EOF or empty line - host closed connection, wait for reconnect | ||
| 462 | sleep 0.1 | 532 | sleep 0.1 |
| 463 | fi | 533 | fi |
| 464 | done | 534 | done |
| 465 | 535 | ||
| 536 | # Clean shutdown | ||
| 537 | rm -f "$ACTIVITY_FILE" | ||
| 538 | kill $WATCHDOG_PID 2>/dev/null | ||
| 466 | exec 3>&- | 539 | exec 3>&- |
| 467 | log "Daemon shutting down..." | 540 | log "Daemon shutting down..." |
| 468 | } | 541 | } |
| @@ -578,5 +651,6 @@ graceful_shutdown() { | |||
| 578 | sleep 2 | 651 | sleep 2 |
| 579 | 652 | ||
| 580 | log "=== ${VCONTAINER_RUNTIME_NAME} Complete ===" | 653 | log "=== ${VCONTAINER_RUNTIME_NAME} Complete ===" |
| 581 | poweroff -f | 654 | # Use reboot -f which works with QEMU's -no-reboot flag to exit cleanly |
| 655 | reboot -f | ||
| 582 | } | 656 | } |
diff --git a/recipes-containers/vcontainer/files/vrunner.sh b/recipes-containers/vcontainer/files/vrunner.sh index 5d824ba5..4e99cba7 100755 --- a/recipes-containers/vcontainer/files/vrunner.sh +++ b/recipes-containers/vcontainer/files/vrunner.sh | |||
| @@ -546,6 +546,9 @@ daemon_send() { | |||
| 546 | exit 1 | 546 | exit 1 |
| 547 | fi | 547 | fi |
| 548 | 548 | ||
| 549 | # Update activity timestamp for idle timeout tracking | ||
| 550 | touch "$DAEMON_SOCKET_DIR/activity" 2>/dev/null || true | ||
| 551 | |||
| 549 | # Encode command in base64 and send | 552 | # Encode command in base64 and send |
| 550 | local cmd_b64=$(echo -n "$cmd" | base64 -w0) | 553 | local cmd_b64=$(echo -n "$cmd" | base64 -w0) |
| 551 | 554 | ||
| @@ -603,6 +606,9 @@ daemon_send_with_input() { | |||
| 603 | exit 1 | 606 | exit 1 |
| 604 | fi | 607 | fi |
| 605 | 608 | ||
| 609 | # Update activity timestamp for idle timeout tracking | ||
| 610 | touch "$DAEMON_SOCKET_DIR/activity" 2>/dev/null || true | ||
| 611 | |||
| 606 | # Shared directory for virtio-9p | 612 | # Shared directory for virtio-9p |
| 607 | local share_dir="$DAEMON_SOCKET_DIR/share" | 613 | local share_dir="$DAEMON_SOCKET_DIR/share" |
| 608 | if [ ! -d "$share_dir" ]; then | 614 | if [ ! -d "$share_dir" ]; then |
| @@ -1159,7 +1165,7 @@ fi | |||
| 1159 | # /dev/vdc = state disk (if any) | 1165 | # /dev/vdc = state disk (if any) |
| 1160 | # The preinit script in initramfs mounts /dev/vda and does switch_root | 1166 | # The preinit script in initramfs mounts /dev/vda and does switch_root |
| 1161 | # Build QEMU options | 1167 | # Build QEMU options |
| 1162 | QEMU_OPTS="$QEMU_MACHINE -nographic -smp 2 -m 2048" | 1168 | QEMU_OPTS="$QEMU_MACHINE -nographic -smp 2 -m 2048 -no-reboot" |
| 1163 | if [ "$USE_KVM" = "true" ]; then | 1169 | if [ "$USE_KVM" = "true" ]; then |
| 1164 | QEMU_OPTS="$QEMU_OPTS -enable-kvm" | 1170 | QEMU_OPTS="$QEMU_OPTS -enable-kvm" |
| 1165 | fi | 1171 | fi |
| @@ -1336,6 +1342,58 @@ if [ "$DAEMON_MODE" = "start" ]; then | |||
| 1336 | 1342 | ||
| 1337 | if [ "$READY" = "true" ]; then | 1343 | if [ "$READY" = "true" ]; then |
| 1338 | log "INFO" "Daemon is ready!" | 1344 | log "INFO" "Daemon is ready!" |
| 1345 | |||
| 1346 | # Start host-side idle watchdog if timeout is set | ||
| 1347 | if [ "$IDLE_TIMEOUT" -gt 0 ] 2>/dev/null; then | ||
| 1348 | ACTIVITY_FILE="$DAEMON_SOCKET_DIR/activity" | ||
| 1349 | touch "$ACTIVITY_FILE" | ||
| 1350 | |||
| 1351 | # Spawn background watchdog | ||
| 1352 | ( | ||
| 1353 | # Container status file - guest writes this via virtio-9p share | ||
| 1354 | # This avoids sending commands through daemon socket which corrupts output | ||
| 1355 | CONTAINER_STATUS_FILE="$DAEMON_SHARE_DIR/.containers_running" | ||
| 1356 | |||
| 1357 | # Scale check interval to idle timeout (check ~5 times before timeout) | ||
| 1358 | CHECK_INTERVAL=$((IDLE_TIMEOUT / 5)) | ||
| 1359 | [ "$CHECK_INTERVAL" -lt 10 ] && CHECK_INTERVAL=10 | ||
| 1360 | [ "$CHECK_INTERVAL" -gt 60 ] && CHECK_INTERVAL=60 | ||
| 1361 | |||
| 1362 | while true; do | ||
| 1363 | sleep "$CHECK_INTERVAL" | ||
| 1364 | [ -f "$ACTIVITY_FILE" ] || exit 0 # Clean exit if file removed | ||
| 1365 | [ -f "$DAEMON_PID_FILE" ] || exit 0 # PID file gone | ||
| 1366 | |||
| 1367 | # Check if QEMU process is still running | ||
| 1368 | QEMU_PID=$(cat "$DAEMON_PID_FILE" 2>/dev/null) | ||
| 1369 | [ -n "$QEMU_PID" ] && kill -0 "$QEMU_PID" 2>/dev/null || exit 0 | ||
| 1370 | |||
| 1371 | LAST_ACTIVITY=$(stat -c %Y "$ACTIVITY_FILE" 2>/dev/null || echo 0) | ||
| 1372 | NOW=$(date +%s) | ||
| 1373 | IDLE_SECONDS=$((NOW - LAST_ACTIVITY)) | ||
| 1374 | |||
| 1375 | if [ "$IDLE_SECONDS" -ge "$IDLE_TIMEOUT" ]; then | ||
| 1376 | # Check if any containers are running via shared file | ||
| 1377 | # Guest-side watchdog writes container IDs to this file | ||
| 1378 | if [ -f "$CONTAINER_STATUS_FILE" ] && [ -s "$CONTAINER_STATUS_FILE" ]; then | ||
| 1379 | # Containers are running - reset activity and skip shutdown | ||
| 1380 | touch "$ACTIVITY_FILE" | ||
| 1381 | continue | ||
| 1382 | fi | ||
| 1383 | |||
| 1384 | # No containers running - send QMP quit to gracefully stop QEMU | ||
| 1385 | if [ -S "$QMP_SOCKET" ]; then | ||
| 1386 | echo '{"execute":"qmp_capabilities"}{"execute":"quit"}' | \ | ||
| 1387 | socat - "UNIX-CONNECT:$QMP_SOCKET" >/dev/null 2>&1 || true | ||
| 1388 | fi | ||
| 1389 | rm -f "$ACTIVITY_FILE" | ||
| 1390 | exit 0 | ||
| 1391 | fi | ||
| 1392 | done | ||
| 1393 | ) & | ||
| 1394 | log "DEBUG" "Started host-side idle watchdog (timeout: ${IDLE_TIMEOUT}s)" | ||
| 1395 | fi | ||
| 1396 | |||
| 1339 | echo "Daemon running (PID: $QEMU_PID)" | 1397 | echo "Daemon running (PID: $QEMU_PID)" |
| 1340 | echo "Socket: $DAEMON_SOCKET" | 1398 | echo "Socket: $DAEMON_SOCKET" |
| 1341 | exit 0 | 1399 | exit 0 |
