summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruce Ashfield <bruce.ashfield@gmail.com>2026-02-15 04:35:55 +0000
committerBruce Ashfield <bruce.ashfield@gmail.com>2026-02-26 01:05:01 +0000
commit57d267db7878180d1ecd1936df5284550d0031c3 (patch)
treeb2205ccb2e6114fdda4384518d4b9047209756ef
parent0fe8c4444f3199b862a4ba52b2b62b5f9b2af85f (diff)
downloadmeta-virtualization-57d267db7878180d1ecd1936df5284550d0031c3.tar.gz
vxn: add Xen DomU container runtime with OCI image support
vxn runs OCI containers as Xen DomU guests — the VM IS the container. No Docker/containerd runs inside the guest; the init script directly mounts the container rootfs and execs the entrypoint via chroot. Host-side (Dom0): - vxn.sh: Docker-like CLI wrapper (sets HYPERVISOR=xen) - vrunner-backend-xen.sh: Xen xl backend for vrunner - hv_prepare_container(): pulls OCI images via skopeo, resolves entrypoint from OCI config using jq on host - xl create for VM lifecycle (PVH on aarch64, PV on x86_64) - Bridge networking with iptables DNAT for port forwards - Console capture via xl console for ephemeral mode Guest-side (DomU): - vxn-init.sh: mounts container rootfs from input disk, extracts OCI layers, execs entrypoint via chroot - Supports containers with or without /bin/sh - grep/sed fallback for OCI config parsing (no jq needed) - Daemon mode with command loop on hvc1 - vcontainer-init-common.sh: hypervisor detection, head -n fix - vcontainer-preinit.sh: init selection via vcontainer.init= Build system: - vxn-initramfs-create.inc: assembles boot blobs from vruntime multiconfig, injects vxn-init.sh into rootfs squashfs - vxn_1.0.bb: Dom0 package with scripts + blobs - nostamp on install/package chain (blobs from DEPLOY_DIR are untracked by sstate) - vxn.cfg: Xen PV kernel config fragment Tested: vxn -it --no-daemon run --rm hello-world Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
-rwxr-xr-xrecipes-containers/vcontainer/files/vcontainer-init-common.sh121
-rw-r--r--recipes-containers/vcontainer/files/vrunner-backend-xen.sh564
-rwxr-xr-xrecipes-containers/vcontainer/files/vrunner.sh433
-rwxr-xr-xrecipes-containers/vcontainer/files/vxn-init.sh545
-rw-r--r--recipes-containers/vcontainer/files/vxn.sh51
-rw-r--r--recipes-core/vxn/vxn-initramfs-create.inc223
-rw-r--r--recipes-core/vxn/vxn-initramfs-create_1.0.bb43
-rw-r--r--recipes-core/vxn/vxn_1.0.bb167
-rw-r--r--recipes-kernel/linux/linux-yocto/vxn.cfg24
9 files changed, 1860 insertions, 311 deletions
diff --git a/recipes-containers/vcontainer/files/vcontainer-init-common.sh b/recipes-containers/vcontainer/files/vcontainer-init-common.sh
index fe488ae2..ab8762b2 100755
--- a/recipes-containers/vcontainer/files/vcontainer-init-common.sh
+++ b/recipes-containers/vcontainer/files/vcontainer-init-common.sh
@@ -27,6 +27,43 @@ setup_base_environment() {
27} 27}
28 28
29# ============================================================================ 29# ============================================================================
30# Hypervisor Detection
31# ============================================================================
32
33# Detect hypervisor type and set device prefixes accordingly.
34# Must be called after /proc and /sys are mounted.
35# Sets: HV_TYPE, BLK_PREFIX, NINE_P_TRANSPORT
36detect_hypervisor() {
37 # Check kernel cmdline for explicit block prefix (set by Xen backend)
38 local cmdline_blk=""
39 for param in $(cat /proc/cmdline 2>/dev/null); do
40 case "$param" in
41 vcontainer.blk=*) cmdline_blk="${param#vcontainer.blk=}" ;;
42 esac
43 done
44
45 if [ -n "$cmdline_blk" ]; then
46 # Explicit prefix from kernel cmdline (most reliable)
47 BLK_PREFIX="$cmdline_blk"
48 if [ "$cmdline_blk" = "xvd" ]; then
49 HV_TYPE="xen"
50 NINE_P_TRANSPORT="xen"
51 else
52 HV_TYPE="qemu"
53 NINE_P_TRANSPORT="virtio"
54 fi
55 elif [ -d /proc/xen ] || grep -q "xen" /sys/hypervisor/type 2>/dev/null; then
56 HV_TYPE="xen"
57 BLK_PREFIX="xvd"
58 NINE_P_TRANSPORT="xen"
59 else
60 HV_TYPE="qemu"
61 BLK_PREFIX="vd"
62 NINE_P_TRANSPORT="virtio"
63 fi
64}
65
66# ============================================================================
30# Filesystem Mounts 67# Filesystem Mounts
31# ============================================================================ 68# ============================================================================
32 69
@@ -40,6 +77,9 @@ mount_base_filesystems() {
40 mkdir -p /dev/pts 77 mkdir -p /dev/pts
41 mountpoint -q /dev/pts || mount -t devpts devpts /dev/pts 78 mountpoint -q /dev/pts || mount -t devpts devpts /dev/pts
42 79
80 # Detect hypervisor type now that /proc and /sys are available
81 detect_hypervisor
82
43 # Enable IP forwarding (container runtimes check this) 83 # Enable IP forwarding (container runtimes check this)
44 echo 1 > /proc/sys/net/ipv4/ip_forward 84 echo 1 > /proc/sys/net/ipv4/ip_forward
45 85
@@ -178,29 +218,24 @@ detect_disks() {
178 log "Waiting for block devices..." 218 log "Waiting for block devices..."
179 sleep 2 219 sleep 2
180 220
181 log "Block devices:" 221 log "Block devices (${HV_TYPE:-qemu}, /dev/${BLK_PREFIX}*):"
182 [ "$QUIET_BOOT" = "0" ] && ls -la /dev/vd* 2>/dev/null || log "No /dev/vd* devices" 222 [ "$QUIET_BOOT" = "0" ] && ls -la /dev/${BLK_PREFIX}* 2>/dev/null || log "No /dev/${BLK_PREFIX}* devices"
183 223
184 # Determine which disk is input and which is state 224 # Determine which disk is input and which is state
185 # Drive layout (rootfs.img is always /dev/vda, mounted by preinit as /): 225 # Drive layout (rootfs is always the first block device, mounted by preinit as /):
186 # /dev/vda = rootfs.img (already mounted as /) 226 # QEMU: /dev/vda, /dev/vdb, /dev/vdc
187 # /dev/vdb = input (if present) 227 # Xen: /dev/xvda, /dev/xvdb, /dev/xvdc
188 # /dev/vdc = state (if both input and state present)
189 # /dev/vdb = state (if only state, no input)
190 228
191 INPUT_DISK="" 229 INPUT_DISK=""
192 STATE_DISK="" 230 STATE_DISK=""
193 231
194 if [ "$RUNTIME_INPUT" != "none" ] && [ "$RUNTIME_STATE" = "disk" ]; then 232 if [ "$RUNTIME_INPUT" != "none" ] && [ "$RUNTIME_STATE" = "disk" ]; then
195 # Both present: rootfs=vda, input=vdb, state=vdc 233 INPUT_DISK="/dev/${BLK_PREFIX}b"
196 INPUT_DISK="/dev/vdb" 234 STATE_DISK="/dev/${BLK_PREFIX}c"
197 STATE_DISK="/dev/vdc"
198 elif [ "$RUNTIME_STATE" = "disk" ]; then 235 elif [ "$RUNTIME_STATE" = "disk" ]; then
199 # Only state: rootfs=vda, state=vdb 236 STATE_DISK="/dev/${BLK_PREFIX}b"
200 STATE_DISK="/dev/vdb"
201 elif [ "$RUNTIME_INPUT" != "none" ]; then 237 elif [ "$RUNTIME_INPUT" != "none" ]; then
202 # Only input: rootfs=vda, input=vdb 238 INPUT_DISK="/dev/${BLK_PREFIX}b"
203 INPUT_DISK="/dev/vdb"
204 fi 239 fi
205} 240}
206 241
@@ -250,30 +285,56 @@ configure_networking() {
250 # Bring up the interface 285 # Bring up the interface
251 ip link set "$NET_IFACE" up 286 ip link set "$NET_IFACE" up
252 287
253 # QEMU slirp provides: 288 if [ "$HV_TYPE" = "xen" ]; then
254 # Guest IP: 10.0.2.15/24 289 # Xen bridge networking: use DHCP or static config
255 # Gateway: 10.0.2.2 290 # Try DHCP first if udhcpc is available
256 # DNS: 10.0.2.3 291 if command -v udhcpc >/dev/null 2>&1; then
257 ip addr add 10.0.2.15/24 dev "$NET_IFACE" 292 log "Requesting IP via DHCP (Xen bridge)..."
258 ip route add default via 10.0.2.2 293 udhcpc -i "$NET_IFACE" -t 5 -T 3 -q 2>/dev/null || {
294 log "DHCP failed, using static fallback"
295 ip addr add 10.0.0.15/24 dev "$NET_IFACE"
296 ip route add default via 10.0.0.1
297 }
298 else
299 # Static fallback for Xen bridge
300 ip addr add 10.0.0.15/24 dev "$NET_IFACE"
301 ip route add default via 10.0.0.1
302 fi
303 else
304 # QEMU slirp provides:
305 # Guest IP: 10.0.2.15/24
306 # Gateway: 10.0.2.2
307 # DNS: 10.0.2.3
308 ip addr add 10.0.2.15/24 dev "$NET_IFACE"
309 ip route add default via 10.0.2.2
310 fi
259 311
260 # Configure DNS 312 # Configure DNS
261 mkdir -p /etc 313 mkdir -p /etc
262 rm -f /etc/resolv.conf 314 rm -f /etc/resolv.conf
263 cat > /etc/resolv.conf << 'DNSEOF' 315 if [ "$HV_TYPE" = "xen" ]; then
316 cat > /etc/resolv.conf << 'DNSEOF'
317nameserver 8.8.8.8
318nameserver 1.1.1.1
319DNSEOF
320 else
321 cat > /etc/resolv.conf << 'DNSEOF'
264nameserver 10.0.2.3 322nameserver 10.0.2.3
265nameserver 8.8.8.8 323nameserver 8.8.8.8
266nameserver 1.1.1.1 324nameserver 1.1.1.1
267DNSEOF 325DNSEOF
326 fi
268 327
269 sleep 1 328 sleep 1
270 329
271 # Verify connectivity 330 # Verify connectivity
331 local gw_ip
332 gw_ip=$(ip route | awk '/default/{print $3}' | head -n 1)
272 log "Testing network connectivity..." 333 log "Testing network connectivity..."
273 if ping -c 1 -W 3 10.0.2.2 >/dev/null 2>&1; then 334 if [ -n "$gw_ip" ] && ping -c 1 -W 3 "$gw_ip" >/dev/null 2>&1; then
274 log " Gateway (10.0.2.2): OK" 335 log " Gateway ($gw_ip): OK"
275 else 336 else
276 log " Gateway (10.0.2.2): FAILED" 337 log " Gateway: FAILED"
277 fi 338 fi
278 339
279 if ping -c 1 -W 3 8.8.8.8 >/dev/null 2>&1; then 340 if ping -c 1 -W 3 8.8.8.8 >/dev/null 2>&1; then
@@ -282,7 +343,9 @@ DNSEOF
282 log " External (8.8.8.8): FAILED (may be filtered)" 343 log " External (8.8.8.8): FAILED (may be filtered)"
283 fi 344 fi
284 345
285 log "Network configured: $NET_IFACE (10.0.2.15)" 346 local my_ip
347 my_ip=$(ip -4 addr show "$NET_IFACE" 2>/dev/null | awk '/inet /{print $2}' | head -n 1)
348 log "Network configured: $NET_IFACE ($my_ip)"
286 [ "$QUIET_BOOT" = "0" ] && ip addr show "$NET_IFACE" 349 [ "$QUIET_BOOT" = "0" ] && ip addr show "$NET_IFACE"
287 [ "$QUIET_BOOT" = "0" ] && ip route 350 [ "$QUIET_BOOT" = "0" ] && ip route
288 [ "$QUIET_BOOT" = "0" ] && cat /etc/resolv.conf 351 [ "$QUIET_BOOT" = "0" ] && cat /etc/resolv.conf
@@ -325,11 +388,11 @@ run_daemon_mode() {
325 388
326 # Mount virtio-9p shared directory for file I/O 389 # Mount virtio-9p shared directory for file I/O
327 mkdir -p /mnt/share 390 mkdir -p /mnt/share
328 MOUNT_ERR=$(mount -t 9p -o trans=virtio,version=9p2000.L,cache=none ${VCONTAINER_SHARE_NAME} /mnt/share 2>&1) 391 MOUNT_ERR=$(mount -t 9p -o trans=${NINE_P_TRANSPORT},version=9p2000.L,cache=none ${VCONTAINER_SHARE_NAME} /mnt/share 2>&1)
329 if [ $? -eq 0 ]; then 392 if [ $? -eq 0 ]; then
330 log "Mounted virtio-9p share at /mnt/share" 393 log "Mounted 9p share at /mnt/share (transport: ${NINE_P_TRANSPORT})"
331 else 394 else
332 log "WARNING: Could not mount virtio-9p share: $MOUNT_ERR" 395 log "WARNING: Could not mount 9p share: $MOUNT_ERR"
333 log "Available filesystems:" 396 log "Available filesystems:"
334 cat /proc/filesystems 2>/dev/null | head -20 397 cat /proc/filesystems 2>/dev/null | head -20
335 fi 398 fi
@@ -644,7 +707,7 @@ graceful_shutdown() {
644 707
645 # Final sync and flush 708 # Final sync and flush
646 sync 709 sync
647 for dev in /dev/vd*; do 710 for dev in /dev/${BLK_PREFIX}*; do
648 [ -b "$dev" ] && blockdev --flushbufs "$dev" 2>/dev/null || true 711 [ -b "$dev" ] && blockdev --flushbufs "$dev" 2>/dev/null || true
649 done 712 done
650 sync 713 sync
diff --git a/recipes-containers/vcontainer/files/vrunner-backend-xen.sh b/recipes-containers/vcontainer/files/vrunner-backend-xen.sh
new file mode 100644
index 00000000..89e26b6b
--- /dev/null
+++ b/recipes-containers/vcontainer/files/vrunner-backend-xen.sh
@@ -0,0 +1,564 @@
1#!/bin/bash
2# SPDX-FileCopyrightText: Copyright (C) 2025 Bruce Ashfield
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6# vrunner-backend-xen.sh
7# Xen hypervisor backend for vrunner.sh
8#
9# This backend implements the hypervisor interface for Xen xl toolstack.
10# It is sourced by vrunner.sh when VCONTAINER_HYPERVISOR=xen.
11#
12# This backend runs on a Xen Dom0 host and creates DomU guests using xl.
13# Key differences from QEMU backend:
14# - Block devices appear as /dev/xvd* instead of /dev/vd*
15# - Network uses bridge + iptables NAT instead of QEMU slirp
16# - Console uses PV console (hvc0/hvc1) instead of virtio-serial
17# - 9p file sharing uses trans=xen instead of trans=virtio
18# - VM tracking uses domain name instead of PID
19
20# ============================================================================
21# Architecture Setup
22# ============================================================================
23
24hv_setup_arch() {
25 case "$TARGET_ARCH" in
26 aarch64)
27 KERNEL_IMAGE="$BLOB_DIR/aarch64/Image"
28 INITRAMFS="$BLOB_DIR/aarch64/initramfs.cpio.gz"
29 ROOTFS_IMG="$BLOB_DIR/aarch64/rootfs.img"
30 HV_CMD="xl"
31 HV_CONSOLE="hvc0"
32 ;;
33 x86_64)
34 KERNEL_IMAGE="$BLOB_DIR/x86_64/bzImage"
35 INITRAMFS="$BLOB_DIR/x86_64/initramfs.cpio.gz"
36 ROOTFS_IMG="$BLOB_DIR/x86_64/rootfs.img"
37 HV_CMD="xl"
38 HV_CONSOLE="hvc0"
39 ;;
40 *)
41 log "ERROR" "Unsupported architecture: $TARGET_ARCH"
42 exit 1
43 ;;
44 esac
45
46 # Xen domain name (unique per instance)
47 HV_DOMNAME="vxn-$$"
48 HV_VM_PID=""
49
50 # Xen domain config path (generated at runtime)
51 HV_XEN_CFG=""
52
53 # Xen-specific: pass block device prefix via kernel cmdline
54 # so preinit can find rootfs before /proc is mounted
55 HV_BLK_PREFIX="xvd"
56}
57
58hv_check_accel() {
59 # Xen IS the hypervisor - no KVM check needed
60 USE_KVM="false"
61 log "DEBUG" "Xen hypervisor (no KVM check needed)"
62}
63
64hv_skip_state_disk() {
65 # Xen DomU Docker storage lives in the guest's overlay filesystem.
66 # In daemon mode the domain stays running so storage persists naturally.
67 # No need to create a 2GB disk image on Dom0.
68 return 0
69}
70
71# ============================================================================
72# Container Image Preparation (OCI pull via skopeo)
73# ============================================================================
74
75# Pull OCI image on the host so the input disk creation code can package it.
76# Called from vrunner.sh before input disk creation. Modifies globals:
77# INPUT_PATH - set to the OCI layout directory
78# INPUT_TYPE - set to "oci"
79# DOCKER_CMD - rewritten to the resolved entrypoint/command
80#
81# The host resolves the OCI entrypoint using jq (available on Dom0)
82# so the guest doesn't need jq to determine what to execute.
83hv_prepare_container() {
84 # Skip if user already provided --input
85 [ -n "$INPUT_PATH" ] && return 0
86
87 # Only act on "run" commands
88 case "$DOCKER_CMD" in
89 *" run "*) ;;
90 *) return 0 ;;
91 esac
92
93 # Check for skopeo
94 if ! command -v skopeo >/dev/null 2>&1; then
95 log "ERROR" "skopeo not found. Install skopeo for OCI image pulling."
96 exit 1
97 fi
98
99 # Parse image name and any trailing command from "docker run [opts] <image> [cmd...]"
100 local args
101 args=$(echo "$DOCKER_CMD" | sed 's/^[a-z]* run //')
102
103 local image=""
104 local user_cmd=""
105 local skip_next=false
106 local found_image=false
107 for arg in $args; do
108 if [ "$skip_next" = "true" ]; then
109 skip_next=false
110 continue
111 fi
112 if [ "$found_image" = "true" ]; then
113 # Everything after image name is the user command
114 user_cmd="$user_cmd $arg"
115 continue
116 fi
117 case "$arg" in
118 --rm|--detach|-d|-i|--interactive|-t|--tty|--privileged|-it)
119 ;;
120 -p|--publish|-v|--volume|-e|--env|--name|--network|-w|--workdir|--entrypoint|-m|--memory|--cpus)
121 skip_next=true
122 ;;
123 --publish=*|--volume=*|--env=*|--name=*|--network=*|--workdir=*|--entrypoint=*|--dns=*|--memory=*|--cpus=*)
124 ;;
125 -*)
126 ;;
127 *)
128 image="$arg"
129 found_image=true
130 ;;
131 esac
132 done
133 user_cmd=$(echo "$user_cmd" | sed 's/^ *//')
134
135 if [ -z "$image" ]; then
136 log "DEBUG" "hv_prepare_container: no image found in DOCKER_CMD"
137 return 0
138 fi
139
140 log "INFO" "Pulling OCI image: $image"
141
142 local oci_dir="$TEMP_DIR/oci-image"
143 local skopeo_log="$TEMP_DIR/skopeo.log"
144 if skopeo copy "docker://$image" "oci:$oci_dir:latest" > "$skopeo_log" 2>&1; then
145 INPUT_PATH="$oci_dir"
146 INPUT_TYPE="oci"
147 log "INFO" "OCI image pulled to $oci_dir"
148 else
149 log "ERROR" "Failed to pull image: $image"
150 [ -f "$skopeo_log" ] && while IFS= read -r line; do
151 log "ERROR" "skopeo: $line"
152 done < "$skopeo_log"
153 exit 1
154 fi
155
156 # Resolve entrypoint from OCI config on the host (jq available here).
157 # Rewrite DOCKER_CMD so the guest receives the actual command to exec,
158 # avoiding any dependency on jq inside the minimal guest rootfs.
159 local resolved_cmd="$user_cmd"
160 if [ -z "$resolved_cmd" ] && command -v jq >/dev/null 2>&1; then
161 local entrypoint="" oci_cmd=""
162 local manifest_digest config_digest manifest_file config_file
163 manifest_digest=$(jq -r '.manifests[0].digest' "$oci_dir/index.json" 2>/dev/null)
164 manifest_file="$oci_dir/blobs/${manifest_digest/://}"
165 if [ -f "$manifest_file" ]; then
166 config_digest=$(jq -r '.config.digest' "$manifest_file" 2>/dev/null)
167 config_file="$oci_dir/blobs/${config_digest/://}"
168 if [ -f "$config_file" ]; then
169 entrypoint=$(jq -r '(.config.Entrypoint // []) | join(" ")' "$config_file" 2>/dev/null)
170 oci_cmd=$(jq -r '(.config.Cmd // []) | join(" ")' "$config_file" 2>/dev/null)
171 fi
172 fi
173 if [ -n "$entrypoint" ]; then
174 resolved_cmd="$entrypoint"
175 [ -n "$oci_cmd" ] && resolved_cmd="$resolved_cmd $oci_cmd"
176 elif [ -n "$oci_cmd" ]; then
177 resolved_cmd="$oci_cmd"
178 fi
179 log "INFO" "Resolved OCI entrypoint: $resolved_cmd"
180 fi
181
182 if [ -n "$resolved_cmd" ]; then
183 DOCKER_CMD="$resolved_cmd"
184 log "INFO" "DOCKER_CMD rewritten to: $DOCKER_CMD"
185 fi
186}
187
188hv_find_command() {
189 if ! command -v xl >/dev/null 2>&1; then
190 log "ERROR" "xl (Xen toolstack) not found. Install xen-tools-xl."
191 exit 1
192 fi
193 log "DEBUG" "Using Xen xl toolstack"
194}
195
196hv_get_console_device() {
197 echo "$HV_CONSOLE"
198}
199
200# ============================================================================
201# VM Configuration Building
202# ============================================================================
203
204# Internal: accumulate disk config entries
205_XEN_DISKS=()
206_XEN_VIF=""
207_XEN_9P=()
208
209hv_build_disk_opts() {
210 _XEN_DISKS=()
211
212 # Rootfs (read-only)
213 _XEN_DISKS+=("'format=raw,vdev=xvda,access=ro,target=$ROOTFS_IMG'")
214
215 # Input disk (if any) - check if DISK_OPTS is set (means input disk was created)
216 if [ -n "$DISK_OPTS" ]; then
217 # Extract the file path from QEMU-style DISK_OPTS
218 local input_file
219 input_file=$(echo "$DISK_OPTS" | sed -n 's/.*file=\([^,]*\).*/\1/p')
220 if [ -n "$input_file" ]; then
221 _XEN_DISKS+=("'format=raw,vdev=xvdb,access=rw,target=$input_file'")
222 fi
223 fi
224
225 # State disk (if any)
226 if [ -n "$STATE_DISK_OPTS" ]; then
227 local state_file
228 state_file=$(echo "$STATE_DISK_OPTS" | sed -n 's/.*file=\([^,]*\).*/\1/p')
229 if [ -n "$state_file" ]; then
230 _XEN_DISKS+=("'format=raw,vdev=xvdc,access=rw,target=$state_file'")
231 fi
232 fi
233}
234
235hv_build_network_opts() {
236 _XEN_VIF=""
237 if [ "$NETWORK" = "true" ]; then
238 # Use default bridge networking
239 # Xen will attach the vif to xenbr0 or the default bridge
240 _XEN_VIF="'bridge=xenbr0'"
241 fi
242 # If no network, _XEN_VIF stays empty → vif = [] in config
243}
244
245hv_build_9p_opts() {
246 local share_dir="$1"
247 local share_tag="$2"
248 # For Xen, 9p is configured in the domain config, not as a command-line option
249 # We accumulate these and include them in the config file
250 _XEN_9P+=("{ 'tag': '$share_tag', 'path': '$share_dir', 'security_model': 'none' }")
251 # Return empty string since Xen doesn't use command-line 9p options
252 echo ""
253}
254
255hv_build_daemon_opts() {
256 HV_DAEMON_OPTS=""
257 # Xen uses PV console (hvc1) for daemon command channel
258 # The init scripts already have /dev/hvc1 as a fallback for the daemon port
259 # No extra config needed - hvc1 is automatically available in PV guests
260 #
261 # For the host-side socket, we'll use xl console with a pipe
262 # The daemon socket is handled differently for Xen:
263 # We create a socat bridge between the Xen console and a unix socket
264}
265
266hv_build_vm_cmd() {
267 # For Xen, we generate a domain config file instead of a command line
268 # HV_OPTS is not used directly; the config file is written by _write_xen_config
269 HV_OPTS=""
270}
271
272# Internal: write Xen domain config file
273_write_xen_config() {
274 local kernel_append="$1"
275 local config_path="$2"
276
277 # Build disk array
278 local disk_array=""
279 for d in "${_XEN_DISKS[@]}"; do
280 if [ -n "$disk_array" ]; then
281 disk_array="$disk_array, $d"
282 else
283 disk_array="$d"
284 fi
285 done
286
287 # Build vif array
288 local vif_array=""
289 if [ -n "$_XEN_VIF" ]; then
290 vif_array="$_XEN_VIF"
291 fi
292
293 # Determine guest type per architecture:
294 # x86_64: PV guests work (paravirtualized, no HVM needed)
295 # aarch64: ARM Xen only supports PVH-style guests (no PV)
296 local xen_type="pv"
297 case "$TARGET_ARCH" in
298 aarch64) xen_type="pvh" ;;
299 esac
300
301 # Memory and vCPUs - configurable via environment
302 local xen_memory="${VXN_MEMORY:-512}"
303 local xen_vcpus="${VXN_VCPUS:-2}"
304
305 cat > "$config_path" <<XENEOF
306# Auto-generated Xen domain config for vxn
307name = "$HV_DOMNAME"
308type = "$xen_type"
309memory = $xen_memory
310vcpus = $xen_vcpus
311
312kernel = "$KERNEL_IMAGE"
313ramdisk = "$INITRAMFS"
314extra = "console=hvc0 quiet loglevel=0 init=/init vcontainer.blk=xvd vcontainer.init=/vxn-init.sh $kernel_append"
315
316disk = [ $disk_array ]
317vif = [ $vif_array ]
318
319on_poweroff = "destroy"
320on_reboot = "destroy"
321on_crash = "destroy"
322XENEOF
323
324 # Add 9p config if any shares were requested
325 if [ ${#_XEN_9P[@]} -gt 0 ]; then
326 local p9_array=""
327 for p in "${_XEN_9P[@]}"; do
328 if [ -n "$p9_array" ]; then
329 p9_array="$p9_array, $p"
330 else
331 p9_array="$p"
332 fi
333 done
334 echo "p9 = [ $p9_array ]" >> "$config_path"
335 fi
336
337 log "DEBUG" "Xen config written to $config_path"
338}
339
340# ============================================================================
341# VM Lifecycle
342# ============================================================================
343
344hv_start_vm_background() {
345 local kernel_append="$1"
346 local log_file="$2"
347 local timeout_val="$3"
348
349 # Write domain config
350 HV_XEN_CFG="${TEMP_DIR:-/tmp}/vxn-$$.cfg"
351 _write_xen_config "$kernel_append" "$HV_XEN_CFG"
352
353 # Create the domain
354 xl create "$HV_XEN_CFG" >> "$log_file" 2>&1
355
356 # For background monitoring, we need a PID-like concept
357 # Use the domain name as VM identifier
358 HV_VM_PID="$$" # Use our PID as a placeholder for compatibility
359
360 if [ -n "$DAEMON_SOCKET" ]; then
361 # Daemon mode: bridge xl console (hvc1) to the daemon unix socket
362 # xl console -n 1 connects to the second PV console (hvc1)
363 socat "UNIX-LISTEN:$DAEMON_SOCKET,fork" "EXEC:xl console -n 1 $HV_DOMNAME" &
364 _XEN_SOCAT_PID=$!
365 log "DEBUG" "Console-socket bridge started (PID: $_XEN_SOCAT_PID)"
366 else
367 # Ephemeral mode: capture guest console (hvc0) to log file
368 # so the monitoring loop in vrunner.sh can see output markers
369 xl console "$HV_DOMNAME" >> "$log_file" 2>&1 &
370 _XEN_CONSOLE_PID=$!
371 log "DEBUG" "Console capture started (PID: $_XEN_CONSOLE_PID)"
372 fi
373}
374
375hv_start_vm_foreground() {
376 local kernel_append="$1"
377
378 HV_XEN_CFG="${TEMP_DIR:-/tmp}/vxn-$$.cfg"
379 _write_xen_config "$kernel_append" "$HV_XEN_CFG"
380
381 # Create domain and attach console
382 xl create -c "$HV_XEN_CFG"
383}
384
385hv_is_vm_running() {
386 xl list "$HV_DOMNAME" >/dev/null 2>&1
387}
388
389hv_wait_vm_exit() {
390 local timeout="${1:-30}"
391 for i in $(seq 1 "$timeout"); do
392 hv_is_vm_running || return 0
393 sleep 1
394 done
395 return 1
396}
397
398hv_stop_vm() {
399 log "INFO" "Shutting down Xen domain $HV_DOMNAME..."
400 xl shutdown "$HV_DOMNAME" 2>/dev/null || true
401
402 # Wait for graceful shutdown
403 hv_wait_vm_exit 15 || {
404 log "WARN" "Domain didn't shut down gracefully, destroying..."
405 hv_destroy_vm
406 }
407}
408
409hv_destroy_vm() {
410 xl destroy "$HV_DOMNAME" 2>/dev/null || true
411
412 # Clean up console capture (ephemeral mode)
413 if [ -n "${_XEN_CONSOLE_PID:-}" ]; then
414 kill $_XEN_CONSOLE_PID 2>/dev/null || true
415 fi
416
417 # Clean up console bridge (daemon mode)
418 if [ -n "${_XEN_SOCAT_PID:-}" ]; then
419 kill $_XEN_SOCAT_PID 2>/dev/null || true
420 fi
421}
422
423hv_get_vm_id() {
424 echo "$HV_DOMNAME"
425}
426
427# ============================================================================
428# Port Forwarding (iptables NAT for Xen bridge networking)
429# ============================================================================
430
431# Track iptables rules for cleanup
432_XEN_IPTABLES_RULES=()
433
434hv_setup_port_forwards() {
435 if [ ${#PORT_FORWARDS[@]} -eq 0 ]; then
436 return
437 fi
438
439 # Get guest IP from Xen network config
440 # Wait briefly for the guest to get an IP
441 local guest_ip=""
442 for attempt in $(seq 1 30); do
443 guest_ip=$(xl network-list "$HV_DOMNAME" 2>/dev/null | awk 'NR>1{print $4}' | head -1)
444 if [ -n "$guest_ip" ] && [ "$guest_ip" != "-" ]; then
445 break
446 fi
447 sleep 1
448 done
449
450 if [ -z "$guest_ip" ] || [ "$guest_ip" = "-" ]; then
451 log "WARN" "Could not determine guest IP for port forwarding"
452 return
453 fi
454
455 log "INFO" "Guest IP: $guest_ip"
456
457 for pf in "${PORT_FORWARDS[@]}"; do
458 local host_port="${pf%%:*}"
459 local rest="${pf#*:}"
460 local container_port="${rest%%/*}"
461 local proto="tcp"
462 if [[ "$rest" == */* ]]; then
463 proto="${rest##*/}"
464 fi
465
466 iptables -t nat -A PREROUTING -p "$proto" --dport "$host_port" \
467 -j DNAT --to-destination "$guest_ip:$host_port" 2>/dev/null || true
468 iptables -A FORWARD -p "$proto" -d "$guest_ip" --dport "$host_port" \
469 -j ACCEPT 2>/dev/null || true
470 _XEN_IPTABLES_RULES+=("$proto:$host_port:$guest_ip")
471 log "INFO" "Port forward: host:$host_port -> $guest_ip:$host_port ($proto)"
472 done
473}
474
475hv_cleanup_port_forwards() {
476 for rule in "${_XEN_IPTABLES_RULES[@]}"; do
477 local proto="${rule%%:*}"
478 local rest="${rule#*:}"
479 local host_port="${rest%%:*}"
480 local guest_ip="${rest#*:}"
481
482 iptables -t nat -D PREROUTING -p "$proto" --dport "$host_port" \
483 -j DNAT --to-destination "$guest_ip:$host_port" 2>/dev/null || true
484 iptables -D FORWARD -p "$proto" -d "$guest_ip" --dport "$host_port" \
485 -j ACCEPT 2>/dev/null || true
486 done
487 _XEN_IPTABLES_RULES=()
488}
489
490# ============================================================================
491# Idle Timeout
492# ============================================================================
493
494hv_idle_shutdown() {
495 # For Xen, use xl shutdown for graceful stop
496 xl shutdown "$HV_DOMNAME" 2>/dev/null || true
497}
498
499# ============================================================================
500# Daemon Lifecycle (Xen-specific overrides)
501# ============================================================================
502# Xen domains persist via xl, not as child processes. The PID saved by
503# vrunner.sh is just a placeholder. These hooks let daemon_is_running()
504# and daemon_stop() work correctly for Xen.
505
506# Persist domain name alongside PID file so we can recover it on reconnect
507_xen_domname_file() {
508 echo "${DAEMON_SOCKET_DIR:-/tmp}/daemon.domname"
509}
510
511hv_daemon_save_state() {
512 echo "$HV_DOMNAME" > "$(_xen_domname_file)"
513}
514
515hv_daemon_load_state() {
516 local f="$(_xen_domname_file)"
517 if [ -f "$f" ]; then
518 HV_DOMNAME=$(cat "$f" 2>/dev/null)
519 fi
520}
521
522hv_daemon_is_running() {
523 hv_daemon_load_state
524 [ -n "$HV_DOMNAME" ] && xl list "$HV_DOMNAME" >/dev/null 2>&1
525}
526
527hv_daemon_stop() {
528 hv_daemon_load_state
529 if [ -z "$HV_DOMNAME" ]; then
530 return 0
531 fi
532
533 log "INFO" "Shutting down Xen domain $HV_DOMNAME..."
534
535 # Send shutdown command via socket first (graceful guest shutdown)
536 if [ -S "$DAEMON_SOCKET" ]; then
537 echo "===SHUTDOWN===" | socat - "UNIX-CONNECT:$DAEMON_SOCKET" 2>/dev/null || true
538 sleep 2
539 fi
540
541 # Try graceful xl shutdown
542 if xl list "$HV_DOMNAME" >/dev/null 2>&1; then
543 xl shutdown "$HV_DOMNAME" 2>/dev/null || true
544 # Wait for domain to disappear
545 for i in $(seq 1 15); do
546 xl list "$HV_DOMNAME" >/dev/null 2>&1 || break
547 sleep 1
548 done
549 fi
550
551 # Force destroy if still running
552 if xl list "$HV_DOMNAME" >/dev/null 2>&1; then
553 log "WARN" "Domain didn't shut down gracefully, destroying..."
554 xl destroy "$HV_DOMNAME" 2>/dev/null || true
555 fi
556
557 # Clean up console bridge
558 if [ -n "${_XEN_SOCAT_PID:-}" ]; then
559 kill $_XEN_SOCAT_PID 2>/dev/null || true
560 fi
561
562 rm -f "$(_xen_domname_file)"
563 log "INFO" "Xen domain stopped"
564}
diff --git a/recipes-containers/vcontainer/files/vrunner.sh b/recipes-containers/vcontainer/files/vrunner.sh
index 22e9229a..8ad45afe 100755
--- a/recipes-containers/vcontainer/files/vrunner.sh
+++ b/recipes-containers/vcontainer/files/vrunner.sh
@@ -4,29 +4,29 @@
4# SPDX-License-Identifier: GPL-2.0-only 4# SPDX-License-Identifier: GPL-2.0-only
5# 5#
6# vrunner.sh 6# vrunner.sh
7# Core runner for vdkr/vpdmn: execute container commands in QEMU-emulated environment 7# Core runner for vdkr/vpdmn/vxn: execute container commands in a hypervisor VM
8# 8#
9# This script is runtime-agnostic and supports both Docker and Podman via --runtime. 9# This script is runtime-agnostic and supports both Docker and Podman via --runtime.
10# It is also hypervisor-agnostic via pluggable backends (QEMU, Xen).
10# 11#
11# Boot flow: 12# Boot flow:
12# 1. QEMU loads kernel + tiny initramfs (busybox + preinit) 13# 1. Hypervisor boots kernel + tiny initramfs (busybox + preinit)
13# 2. preinit mounts rootfs.img (/dev/vda) and does switch_root 14# 2. preinit mounts rootfs.img and does switch_root
14# 3. Real /init runs on actual ext4 filesystem 15# 3. Real /init runs on actual filesystem
15# 4. Container runtime starts, executes command, outputs results 16# 4. Container runtime starts, executes command, outputs results
16# 17#
17# This two-stage boot is required because runc needs pivot_root, 18# This two-stage boot is required because runc needs pivot_root,
18# which doesn't work from initramfs (rootfs isn't a mount point). 19# which doesn't work from initramfs (rootfs isn't a mount point).
19# 20#
20# Drive layout: 21# Drive layout (device names vary by hypervisor):
21# /dev/vda = rootfs.img (ro, ext4 with container tools) 22# QEMU: /dev/vda, /dev/vdb, /dev/vdc (virtio-blk)
22# /dev/vdb = input disk (optional, user data) 23# Xen: /dev/xvda, /dev/xvdb, /dev/xvdc (xen-blkfront)
23# /dev/vdc = state disk (optional, persistent container storage)
24# 24#
25# Version: 3.4.0 25# Version: 3.5.0
26 26
27set -e 27set -e
28 28
29VERSION="3.4.0" 29VERSION="3.5.0"
30SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 30SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
31 31
32# Runtime selection: docker or podman 32# Runtime selection: docker or podman
@@ -42,19 +42,19 @@ VERBOSE="${VDKR_VERBOSE:-${VPDMN_VERBOSE:-false}}"
42set_runtime_config() { 42set_runtime_config() {
43 case "$RUNTIME" in 43 case "$RUNTIME" in
44 docker) 44 docker)
45 TOOL_NAME="vdkr" 45 TOOL_NAME="${VCONTAINER_RUNTIME_NAME:-vdkr}"
46 BLOB_SUBDIR="vdkr-blobs" 46 BLOB_SUBDIR="vdkr-blobs"
47 BLOB_SUBDIR_ALT="blobs" 47 BLOB_SUBDIR_ALT="blobs"
48 CMDLINE_PREFIX="docker" 48 CMDLINE_PREFIX="docker"
49 STATE_DIR_BASE="${VDKR_STATE_DIR:-$HOME/.vdkr}" 49 STATE_DIR_BASE="${VDKR_STATE_DIR:-$HOME/.${TOOL_NAME}}"
50 STATE_FILE="docker-state.img" 50 STATE_FILE="docker-state.img"
51 ;; 51 ;;
52 podman) 52 podman)
53 TOOL_NAME="vpdmn" 53 TOOL_NAME="${VCONTAINER_RUNTIME_NAME:-vpdmn}"
54 BLOB_SUBDIR="vpdmn-blobs" 54 BLOB_SUBDIR="vpdmn-blobs"
55 BLOB_SUBDIR_ALT="blobs/vpdmn" 55 BLOB_SUBDIR_ALT="blobs/vpdmn"
56 CMDLINE_PREFIX="podman" 56 CMDLINE_PREFIX="podman"
57 STATE_DIR_BASE="${VPDMN_STATE_DIR:-$HOME/.vpdmn}" 57 STATE_DIR_BASE="${VPDMN_STATE_DIR:-$HOME/.${TOOL_NAME}}"
58 STATE_FILE="podman-state.img" 58 STATE_FILE="podman-state.img"
59 ;; 59 ;;
60 *) 60 *)
@@ -417,6 +417,10 @@ while [ $# -gt 0 ]; do
417 DISABLE_KVM="true" 417 DISABLE_KVM="true"
418 shift 418 shift
419 ;; 419 ;;
420 --hypervisor)
421 VCONTAINER_HYPERVISOR="$2"
422 shift 2
423 ;;
420 --batch-import) 424 --batch-import)
421 BATCH_IMPORT="true" 425 BATCH_IMPORT="true"
422 # Force storage output type for batch import 426 # Force storage output type for batch import
@@ -485,6 +489,18 @@ done
485set_runtime_config 489set_runtime_config
486set_blob_dir 490set_blob_dir
487 491
492# Load hypervisor backend
493VCONTAINER_HYPERVISOR="${VCONTAINER_HYPERVISOR:-qemu}"
494VCONTAINER_LIBDIR="${VCONTAINER_LIBDIR:-$SCRIPT_DIR}"
495HV_BACKEND="$VCONTAINER_LIBDIR/vrunner-backend-${VCONTAINER_HYPERVISOR}.sh"
496if [ ! -f "$HV_BACKEND" ]; then
497 echo "ERROR: Hypervisor backend not found: $HV_BACKEND" >&2
498 echo "Available backends:" >&2
499 ls "$VCONTAINER_LIBDIR"/vrunner-backend-*.sh 2>/dev/null | sed 's/.*vrunner-backend-//;s/\.sh$//' >&2
500 exit 1
501fi
502source "$HV_BACKEND"
503
488# Daemon mode handling 504# Daemon mode handling
489# Set default socket directory based on architecture 505# Set default socket directory based on architecture
490# If --state-dir was provided, use it for daemon files too 506# If --state-dir was provided, use it for daemon files too
@@ -503,6 +519,12 @@ DAEMON_INPUT_SIZE_MB=2048 # 2GB input disk for daemon mode
503 519
504# Daemon helper functions 520# Daemon helper functions
505daemon_is_running() { 521daemon_is_running() {
522 # Use backend-specific check if available (e.g. Xen xl list)
523 if type hv_daemon_is_running >/dev/null 2>&1; then
524 hv_daemon_is_running
525 return $?
526 fi
527 # Default: check PID (works for QEMU)
506 if [ -f "$DAEMON_PID_FILE" ]; then 528 if [ -f "$DAEMON_PID_FILE" ]; then
507 local pid=$(cat "$DAEMON_PID_FILE" 2>/dev/null) 529 local pid=$(cat "$DAEMON_PID_FILE" 2>/dev/null)
508 if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then 530 if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
@@ -514,8 +536,10 @@ daemon_is_running() {
514 536
515daemon_status() { 537daemon_status() {
516 if daemon_is_running; then 538 if daemon_is_running; then
517 local pid=$(cat "$DAEMON_PID_FILE") 539 local pid=$(cat "$DAEMON_PID_FILE" 2>/dev/null)
518 echo "Daemon running (PID: $pid)" 540 local vm_id
541 vm_id=$(hv_get_vm_id 2>/dev/null || echo "$pid")
542 echo "Daemon running (VM: $vm_id)"
519 echo "Socket: $DAEMON_SOCKET" 543 echo "Socket: $DAEMON_SOCKET"
520 echo "Architecture: $TARGET_ARCH" 544 echo "Architecture: $TARGET_ARCH"
521 return 0 545 return 0
@@ -531,6 +555,14 @@ daemon_stop() {
531 return 0 555 return 0
532 fi 556 fi
533 557
558 # Use backend-specific stop if available (e.g. Xen xl shutdown/destroy)
559 if type hv_daemon_stop >/dev/null 2>&1; then
560 hv_daemon_stop
561 rm -f "$DAEMON_PID_FILE" "$DAEMON_SOCKET"
562 return 0
563 fi
564
565 # Default: PID-based stop (works for QEMU)
534 local pid=$(cat "$DAEMON_PID_FILE") 566 local pid=$(cat "$DAEMON_PID_FILE")
535 log "INFO" "Stopping daemon (PID: $pid)..." 567 log "INFO" "Stopping daemon (PID: $pid)..."
536 568
@@ -933,36 +965,16 @@ log "INFO" "Output type: $OUTPUT_TYPE"
933[ "$NETWORK" = "true" ] && log "INFO" "Networking: enabled (slirp)" 965[ "$NETWORK" = "true" ] && log "INFO" "Networking: enabled (slirp)"
934[ "$INTERACTIVE" = "true" ] && log "INFO" "Interactive mode: enabled" 966[ "$INTERACTIVE" = "true" ] && log "INFO" "Interactive mode: enabled"
935 967
936# Find kernel, initramfs, and rootfs 968# Initialize hypervisor backend: set arch-specific paths and commands
937case "$TARGET_ARCH" in 969hv_setup_arch
938 aarch64) 970hv_check_accel
939 KERNEL_IMAGE="$BLOB_DIR/aarch64/Image" 971hv_find_command
940 INITRAMFS="$BLOB_DIR/aarch64/initramfs.cpio.gz"
941 ROOTFS_IMG="$BLOB_DIR/aarch64/rootfs.img"
942 QEMU_CMD="qemu-system-aarch64"
943 QEMU_MACHINE="-M virt -cpu cortex-a57"
944 CONSOLE="ttyAMA0"
945 ;;
946 x86_64)
947 KERNEL_IMAGE="$BLOB_DIR/x86_64/bzImage"
948 INITRAMFS="$BLOB_DIR/x86_64/initramfs.cpio.gz"
949 ROOTFS_IMG="$BLOB_DIR/x86_64/rootfs.img"
950 QEMU_CMD="qemu-system-x86_64"
951 # Use q35 + Skylake-Client to match oe-core qemux86-64 machine
952 QEMU_MACHINE="-M q35 -cpu Skylake-Client"
953 CONSOLE="ttyS0"
954 ;;
955 *)
956 log "ERROR" "Unsupported architecture: $TARGET_ARCH"
957 exit 1
958 ;;
959esac
960 972
961# Check for kernel 973# Check for kernel
962if [ ! -f "$KERNEL_IMAGE" ]; then 974if [ ! -f "$KERNEL_IMAGE" ]; then
963 log "ERROR" "Kernel not found: $KERNEL_IMAGE" 975 log "ERROR" "Kernel not found: $KERNEL_IMAGE"
964 log "ERROR" "Set VDKR_BLOB_DIR or --blob-dir to location of vdkr blobs" 976 log "ERROR" "Set --blob-dir to location of blobs"
965 log "ERROR" "Build with: MACHINE=qemuarm64 bitbake vdkr-initramfs-build" 977 log "ERROR" "Build with: bitbake ${TOOL_NAME}-initramfs-create"
966 exit 1 978 exit 1
967fi 979fi
968 980
@@ -973,61 +985,20 @@ if [ ! -f "$INITRAMFS" ]; then
973 exit 1 985 exit 1
974fi 986fi
975 987
976# Check for rootfs image (ext4 with Docker tools) 988# Check for rootfs image
977if [ ! -f "$ROOTFS_IMG" ]; then 989if [ ! -f "$ROOTFS_IMG" ]; then
978 log "ERROR" "Rootfs image not found: $ROOTFS_IMG" 990 log "ERROR" "Rootfs image not found: $ROOTFS_IMG"
979 log "ERROR" "Build with: MACHINE=qemuarm64 bitbake vdkr-initramfs-create" 991 log "ERROR" "Build with: MACHINE=qemuarm64 bitbake vdkr-initramfs-create"
980 exit 1 992 exit 1
981fi 993fi
982 994
983# Find QEMU - check PATH and common locations 995log "DEBUG" "Using initramfs: $INITRAMFS"
984if ! command -v "$QEMU_CMD" >/dev/null 2>&1; then
985 # Try common locations
986 for path in \
987 "${STAGING_BINDIR_NATIVE:-}" \
988 "/usr/bin"; do
989 if [ -n "$path" ] && [ -x "$path/$QEMU_CMD" ]; then
990 QEMU_CMD="$path/$QEMU_CMD"
991 break
992 fi
993 done
994fi
995
996if ! command -v "$QEMU_CMD" >/dev/null 2>&1 && [ ! -x "$QEMU_CMD" ]; then
997 log "ERROR" "QEMU not found: $QEMU_CMD"
998 exit 1
999fi
1000
1001log "DEBUG" "Using QEMU: $QEMU_CMD"
1002 996
1003# Check for KVM acceleration (when host matches target) 997# Let backend prepare container image if needed (e.g., Xen pulls OCI via skopeo)
1004USE_KVM="false" 998if type hv_prepare_container >/dev/null 2>&1; then
1005if [ "$DISABLE_KVM" = "true" ]; then 999 hv_prepare_container
1006 log "DEBUG" "KVM disabled by --no-kvm flag"
1007else
1008 HOST_ARCH=$(uname -m)
1009 if [ "$HOST_ARCH" = "$TARGET_ARCH" ] || \
1010 { [ "$HOST_ARCH" = "x86_64" ] && [ "$TARGET_ARCH" = "x86_64" ]; }; then
1011 if [ -w /dev/kvm ]; then
1012 USE_KVM="true"
1013 # Use host CPU for best performance with KVM
1014 case "$TARGET_ARCH" in
1015 x86_64)
1016 QEMU_MACHINE="-M q35 -cpu host"
1017 ;;
1018 aarch64)
1019 QEMU_MACHINE="-M virt -cpu host"
1020 ;;
1021 esac
1022 log "INFO" "KVM acceleration enabled"
1023 else
1024 log "DEBUG" "KVM not available (no write access to /dev/kvm)"
1025 fi
1026 fi
1027fi 1000fi
1028 1001
1029log "DEBUG" "Using initramfs: $INITRAMFS"
1030
1031# Create input disk image if needed 1002# Create input disk image if needed
1032DISK_OPTS="" 1003DISK_OPTS=""
1033if [ -n "$INPUT_PATH" ] && [ "$INPUT_TYPE" != "none" ]; then 1004if [ -n "$INPUT_PATH" ] && [ "$INPUT_TYPE" != "none" ]; then
@@ -1062,8 +1033,10 @@ if [ -n "$INPUT_PATH" ] && [ "$INPUT_TYPE" != "none" ]; then
1062fi 1033fi
1063 1034
1064# Create state disk for persistent storage (--state-dir) 1035# Create state disk for persistent storage (--state-dir)
1036# Xen backend skips this: DomU Docker storage lives in the guest's overlay
1037# filesystem and persists as long as the domain is running (daemon mode).
1065STATE_DISK_OPTS="" 1038STATE_DISK_OPTS=""
1066if [ -n "$STATE_DIR" ]; then 1039if [ -n "$STATE_DIR" ] && ! type hv_skip_state_disk >/dev/null 2>&1; then
1067 mkdir -p "$STATE_DIR" 1040 mkdir -p "$STATE_DIR"
1068 STATE_IMG="$STATE_DIR/$STATE_FILE" 1041 STATE_IMG="$STATE_DIR/$STATE_FILE"
1069 1042
@@ -1090,6 +1063,10 @@ if [ -n "$STATE_DIR" ]; then
1090 # Combined with graceful shutdown wait, this ensures data integrity 1063 # Combined with graceful shutdown wait, this ensures data integrity
1091 STATE_DISK_OPTS="-drive file=$STATE_IMG,if=virtio,format=raw,cache=directsync" 1064 STATE_DISK_OPTS="-drive file=$STATE_IMG,if=virtio,format=raw,cache=directsync"
1092 log "DEBUG" "State disk: $(ls -lh "$STATE_IMG" | awk '{print $5}')" 1065 log "DEBUG" "State disk: $(ls -lh "$STATE_IMG" | awk '{print $5}')"
1066elif [ -n "$STATE_DIR" ]; then
1067 # Backend skips state disk but we still need the directory for daemon files
1068 mkdir -p "$STATE_DIR"
1069 log "DEBUG" "State disk: skipped (${VCONTAINER_HYPERVISOR} backend manages guest storage)"
1093fi 1070fi
1094 1071
1095# Create state disk from input-storage tar (--input-storage) 1072# Create state disk from input-storage tar (--input-storage)
@@ -1147,9 +1124,9 @@ DOCKER_CMD_B64=$(echo -n "$DOCKER_CMD" | base64 -w0)
1147# In interactive mode, use 'quiet' to suppress kernel boot messages 1124# In interactive mode, use 'quiet' to suppress kernel boot messages
1148# Use CMDLINE_PREFIX for runtime-specific parameters (docker_ or podman_) 1125# Use CMDLINE_PREFIX for runtime-specific parameters (docker_ or podman_)
1149if [ "$INTERACTIVE" = "true" ]; then 1126if [ "$INTERACTIVE" = "true" ]; then
1150 KERNEL_APPEND="console=$CONSOLE,115200 quiet loglevel=0 init=/init" 1127 KERNEL_APPEND="console=$(hv_get_console_device),115200 quiet loglevel=0 init=/init"
1151else 1128else
1152 KERNEL_APPEND="console=$CONSOLE,115200 init=/init" 1129 KERNEL_APPEND="console=$(hv_get_console_device),115200 init=/init"
1153fi 1130fi
1154# Tell init script which runtime we're using 1131# Tell init script which runtime we're using
1155KERNEL_APPEND="$KERNEL_APPEND runtime=$RUNTIME" 1132KERNEL_APPEND="$KERNEL_APPEND runtime=$RUNTIME"
@@ -1198,66 +1175,24 @@ if [ "$INTERACTIVE" = "true" ]; then
1198 KERNEL_APPEND="$KERNEL_APPEND ${CMDLINE_PREFIX}_interactive=1" 1175 KERNEL_APPEND="$KERNEL_APPEND ${CMDLINE_PREFIX}_interactive=1"
1199fi 1176fi
1200 1177
1201# Build QEMU command 1178# Build VM configuration via hypervisor backend
1202# Drive ordering is important: 1179# Drive ordering is important:
1203# /dev/vda = rootfs.img (read-only, ext4 with Docker tools) 1180# rootfs.img (read-only), input disk (if any), state disk (if any)
1204# /dev/vdb = input disk (if any) 1181hv_build_disk_opts
1205# /dev/vdc = state disk (if any) 1182hv_build_network_opts
1206# The preinit script in initramfs mounts /dev/vda and does switch_root 1183hv_build_vm_cmd
1207# Build QEMU options
1208QEMU_OPTS="$QEMU_MACHINE -nographic -smp 2 -m 2048 -no-reboot"
1209if [ "$USE_KVM" = "true" ]; then
1210 QEMU_OPTS="$QEMU_OPTS -enable-kvm"
1211fi
1212QEMU_OPTS="$QEMU_OPTS -kernel $KERNEL_IMAGE"
1213QEMU_OPTS="$QEMU_OPTS -initrd $INITRAMFS"
1214QEMU_OPTS="$QEMU_OPTS -drive file=$ROOTFS_IMG,if=virtio,format=raw,readonly=on"
1215QEMU_OPTS="$QEMU_OPTS $DISK_OPTS"
1216QEMU_OPTS="$QEMU_OPTS $STATE_DISK_OPTS"
1217
1218# Add networking if enabled (slirp user-mode networking)
1219if [ "$NETWORK" = "true" ]; then
1220 # Slirp provides NAT'd outbound connectivity without root privileges
1221 # Guest gets 10.0.2.15, gateway is 10.0.2.2, DNS is 10.0.2.3
1222 NETDEV_OPTS="user,id=net0"
1223
1224 # Add port forwards - QEMU forwards host:port -> VM:port
1225 # Docker's iptables handles VM:port -> container:port
1226 for pf in "${PORT_FORWARDS[@]}"; do
1227 # Parse host_port:container_port or host_port:container_port/protocol
1228 HOST_PORT="${pf%%:*}"
1229 CONTAINER_PART="${pf#*:}"
1230 CONTAINER_PORT="${CONTAINER_PART%%/*}"
1231
1232 # Check for protocol suffix (default to tcp)
1233 if [[ "$CONTAINER_PART" == */* ]]; then
1234 PROTOCOL="${CONTAINER_PART##*/}"
1235 else
1236 PROTOCOL="tcp"
1237 fi
1238
1239 # Forward to HOST_PORT on VM; Docker -p handles container port mapping
1240 NETDEV_OPTS="$NETDEV_OPTS,hostfwd=$PROTOCOL::$HOST_PORT-:$HOST_PORT"
1241 log "INFO" "Port forward: host:$HOST_PORT -> VM:$HOST_PORT (Docker maps to container:$CONTAINER_PORT)"
1242 done
1243
1244 QEMU_OPTS="$QEMU_OPTS -netdev $NETDEV_OPTS -device virtio-net-pci,netdev=net0"
1245else
1246 # Explicitly disable networking
1247 QEMU_OPTS="$QEMU_OPTS -nic none"
1248fi
1249 1184
1250# Batch-import mode: add virtio-9p for fast output (instead of slow console base64) 1185# Batch-import mode: add 9p for fast output (instead of slow console base64)
1251if [ "$BATCH_IMPORT" = "true" ]; then 1186if [ "$BATCH_IMPORT" = "true" ]; then
1252 BATCH_SHARE_DIR="$TEMP_DIR/share" 1187 BATCH_SHARE_DIR="$TEMP_DIR/share"
1253 mkdir -p "$BATCH_SHARE_DIR" 1188 mkdir -p "$BATCH_SHARE_DIR"
1254 SHARE_TAG="${TOOL_NAME}_share" 1189 SHARE_TAG="${TOOL_NAME}_share"
1255 QEMU_OPTS="$QEMU_OPTS -virtfs local,path=$BATCH_SHARE_DIR,mount_tag=$SHARE_TAG,security_model=none,id=$SHARE_TAG" 1190 HV_OPTS="$HV_OPTS $(hv_build_9p_opts "$BATCH_SHARE_DIR" "$SHARE_TAG")"
1256 KERNEL_APPEND="$KERNEL_APPEND ${CMDLINE_PREFIX}_9p=1" 1191 KERNEL_APPEND="$KERNEL_APPEND ${CMDLINE_PREFIX}_9p=1"
1257 log "INFO" "Using virtio-9p for fast storage output" 1192 log "INFO" "Using 9p for fast storage output"
1258fi 1193fi
1259 1194
1260# Daemon mode: add virtio-serial for command channel 1195# Daemon mode: add serial channel for command I/O
1261if [ "$DAEMON_MODE" = "start" ]; then 1196if [ "$DAEMON_MODE" = "start" ]; then
1262 # Check for required tools 1197 # Check for required tools
1263 if ! command -v socat >/dev/null 2>&1; then 1198 if ! command -v socat >/dev/null 2>&1; then
@@ -1275,30 +1210,18 @@ if [ "$DAEMON_MODE" = "start" ]; then
1275 # Create socket directory 1210 # Create socket directory
1276 mkdir -p "$DAEMON_SOCKET_DIR" 1211 mkdir -p "$DAEMON_SOCKET_DIR"
1277 1212
1278 # Create shared directory for file I/O (virtio-9p) 1213 # Create shared directory for file I/O (9p)
1279 DAEMON_SHARE_DIR="$DAEMON_SOCKET_DIR/share" 1214 DAEMON_SHARE_DIR="$DAEMON_SOCKET_DIR/share"
1280 mkdir -p "$DAEMON_SHARE_DIR" 1215 mkdir -p "$DAEMON_SHARE_DIR"
1281 1216
1282 # Add virtio-9p for shared directory access 1217 # Add 9p for shared directory access
1283 # Host writes to $DAEMON_SHARE_DIR, guest mounts as /mnt/share
1284 # Use runtime-specific mount tag (vdkr_share or vpdmn_share)
1285 SHARE_TAG="${TOOL_NAME}_share" 1218 SHARE_TAG="${TOOL_NAME}_share"
1286 # Use security_model=none for simplest file sharing (no permission mapping) 1219 HV_OPTS="$HV_OPTS $(hv_build_9p_opts "$DAEMON_SHARE_DIR" "$SHARE_TAG")"
1287 # This allows writes from container (running as root) to propagate to host
1288 QEMU_OPTS="$QEMU_OPTS -virtfs local,path=$DAEMON_SHARE_DIR,mount_tag=$SHARE_TAG,security_model=none,id=$SHARE_TAG"
1289 # Tell init script to mount the share
1290 KERNEL_APPEND="$KERNEL_APPEND ${CMDLINE_PREFIX}_9p=1" 1220 KERNEL_APPEND="$KERNEL_APPEND ${CMDLINE_PREFIX}_9p=1"
1291 1221
1292 # Add virtio-serial device for command channel 1222 # Add daemon command channel (backend-specific: virtio-serial or PV console)
1293 # Using virtserialport creates /dev/vport0p1 in guest, host sees unix socket 1223 hv_build_daemon_opts
1294 # virtconsole would use hvc* but requires virtio_console kernel module 1224 HV_OPTS="$HV_OPTS $HV_DAEMON_OPTS"
1295 QEMU_OPTS="$QEMU_OPTS -chardev socket,id=vdkr,path=$DAEMON_SOCKET,server=on,wait=off"
1296 QEMU_OPTS="$QEMU_OPTS -device virtio-serial-pci"
1297 QEMU_OPTS="$QEMU_OPTS -device virtserialport,chardev=vdkr,name=vdkr"
1298
1299 # Add QMP socket for dynamic control (port forwarding, etc.)
1300 QMP_SOCKET="$DAEMON_SOCKET_DIR/qmp.sock"
1301 QEMU_OPTS="$QEMU_OPTS -qmp unix:$QMP_SOCKET,server,nowait"
1302 1225
1303 # Tell init script to run in daemon mode with idle timeout 1226 # Tell init script to run in daemon mode with idle timeout
1304 KERNEL_APPEND="$KERNEL_APPEND ${CMDLINE_PREFIX}_daemon=1" 1227 KERNEL_APPEND="$KERNEL_APPEND ${CMDLINE_PREFIX}_daemon=1"
@@ -1308,36 +1231,18 @@ if [ "$DAEMON_MODE" = "start" ]; then
1308 if [ "$NETWORK" != "true" ]; then 1231 if [ "$NETWORK" != "true" ]; then
1309 log "INFO" "Enabling networking for daemon mode" 1232 log "INFO" "Enabling networking for daemon mode"
1310 NETWORK="true" 1233 NETWORK="true"
1311 # Build netdev options with any port forwards 1234 hv_build_network_opts
1312 DAEMON_NETDEV="user,id=net0" 1235 # Re-add network opts (they were built without port forwards initially)
1236 # The rebuild includes port forwards since NETWORK is now true
1237 fi
1238 # Ensure port forwards are logged
1239 if [ ${#PORT_FORWARDS[@]} -gt 0 ]; then
1313 for pf in "${PORT_FORWARDS[@]}"; do 1240 for pf in "${PORT_FORWARDS[@]}"; do
1314 # Parse host_port:container_port or host_port:container_port/protocol
1315 HOST_PORT="${pf%%:*}" 1241 HOST_PORT="${pf%%:*}"
1316 CONTAINER_PART="${pf#*:}" 1242 CONTAINER_PART="${pf#*:}"
1317 CONTAINER_PORT="${CONTAINER_PART%%/*}" 1243 CONTAINER_PORT="${CONTAINER_PART%%/*}"
1318 if [[ "$CONTAINER_PART" == */* ]]; then 1244 log "INFO" "Port forward configured: $HOST_PORT -> $CONTAINER_PORT"
1319 PROTOCOL="${CONTAINER_PART##*/}"
1320 else
1321 PROTOCOL="tcp"
1322 fi
1323 # Forward to HOST_PORT on VM; Docker -p handles container port mapping
1324 DAEMON_NETDEV="$DAEMON_NETDEV,hostfwd=$PROTOCOL::$HOST_PORT-:$HOST_PORT"
1325 log "INFO" "Port forward: host:$HOST_PORT -> VM:$HOST_PORT (Docker maps to container:$CONTAINER_PORT)"
1326 done 1245 done
1327 QEMU_OPTS="$QEMU_OPTS -netdev $DAEMON_NETDEV -device virtio-net-pci,netdev=net0"
1328 else
1329 # NETWORK was already true, but check if we need to add port forwards
1330 # that weren't included in the earlier networking setup
1331 # (This happens when NETWORK was set to true before daemon mode was detected)
1332 if [ ${#PORT_FORWARDS[@]} -gt 0 ]; then
1333 # Port forwards should already be included from earlier networking setup
1334 for pf in "${PORT_FORWARDS[@]}"; do
1335 HOST_PORT="${pf%%:*}"
1336 CONTAINER_PART="${pf#*:}"
1337 CONTAINER_PORT="${CONTAINER_PART%%/*}"
1338 log "INFO" "Port forward configured: $HOST_PORT -> $CONTAINER_PORT"
1339 done
1340 fi
1341 fi 1246 fi
1342 1247
1343 # Copy CA certificate to shared folder (too large for kernel cmdline) 1248 # Copy CA certificate to shared folder (too large for kernel cmdline)
@@ -1349,23 +1254,23 @@ if [ "$DAEMON_MODE" = "start" ]; then
1349 log "INFO" "Starting daemon..." 1254 log "INFO" "Starting daemon..."
1350 log "DEBUG" "PID file: $DAEMON_PID_FILE" 1255 log "DEBUG" "PID file: $DAEMON_PID_FILE"
1351 log "DEBUG" "Socket: $DAEMON_SOCKET" 1256 log "DEBUG" "Socket: $DAEMON_SOCKET"
1352 log "DEBUG" "Command: $QEMU_CMD $QEMU_OPTS -append \"$KERNEL_APPEND\""
1353 1257
1354 # Start QEMU in background 1258 # Start VM in background via backend
1355 $QEMU_CMD $QEMU_OPTS -append "$KERNEL_APPEND" > "$DAEMON_QEMU_LOG" 2>&1 & 1259 hv_start_vm_background "$KERNEL_APPEND" "$DAEMON_QEMU_LOG" ""
1356 QEMU_PID=$! 1260 echo "$HV_VM_PID" > "$DAEMON_PID_FILE"
1357 echo "$QEMU_PID" > "$DAEMON_PID_FILE"
1358 1261
1359 log "INFO" "QEMU started (PID: $QEMU_PID)" 1262 # Let backend save any extra state (e.g. Xen domain name)
1263 if type hv_daemon_save_state >/dev/null 2>&1; then
1264 hv_daemon_save_state
1265 fi
1360 1266
1361 # Wait for socket to appear (Docker starting) 1267 log "INFO" "VM started (PID: $HV_VM_PID)"
1362 # Docker can take 60+ seconds to start, so wait up to 120 seconds 1268
1269 # Wait for socket to appear (container runtime starting)
1363 log "INFO" "Waiting for daemon to be ready..." 1270 log "INFO" "Waiting for daemon to be ready..."
1364 READY=false 1271 READY=false
1365 for i in $(seq 1 120); do 1272 for i in $(seq 1 120); do
1366 if [ -S "$DAEMON_SOCKET" ]; then 1273 if [ -S "$DAEMON_SOCKET" ]; then
1367 # Socket exists, try to connect
1368 # Keep stdin open for 3 seconds to allow response to arrive
1369 RESPONSE=$( { echo "===PING==="; sleep 3; } | timeout 10 socat - "UNIX-CONNECT:$DAEMON_SOCKET" 2>/dev/null || true) 1274 RESPONSE=$( { echo "===PING==="; sleep 3; } | timeout 10 socat - "UNIX-CONNECT:$DAEMON_SOCKET" 2>/dev/null || true)
1370 if echo "$RESPONSE" | grep -q "===PONG==="; then 1275 if echo "$RESPONSE" | grep -q "===PONG==="; then
1371 log "DEBUG" "Got PONG response" 1276 log "DEBUG" "Got PONG response"
@@ -1376,64 +1281,54 @@ if [ "$DAEMON_MODE" = "start" ]; then
1376 fi 1281 fi
1377 fi 1282 fi
1378 1283
1379 # Check if QEMU died 1284 # Check if VM died
1380 if ! kill -0 "$QEMU_PID" 2>/dev/null; then 1285 if ! hv_is_vm_running; then
1381 log "ERROR" "QEMU process died during startup" 1286 log "ERROR" "VM process died during startup"
1382 cat "$DAEMON_QEMU_LOG" >&2 1287 cat "$DAEMON_QEMU_LOG" >&2
1383 rm -f "$DAEMON_PID_FILE" 1288 rm -f "$DAEMON_PID_FILE"
1384 exit 1 1289 exit 1
1385 fi 1290 fi
1386 1291
1387 log "DEBUG" "Waiting... ($i/60)" 1292 log "DEBUG" "Waiting... ($i/120)"
1388 sleep 1 1293 sleep 1
1389 done 1294 done
1390 1295
1391 if [ "$READY" = "true" ]; then 1296 if [ "$READY" = "true" ]; then
1392 log "INFO" "Daemon is ready!" 1297 log "INFO" "Daemon is ready!"
1393 1298
1299 # Set up port forwards via backend (e.g., iptables for Xen)
1300 hv_setup_port_forwards
1301
1394 # Start host-side idle watchdog if timeout is set 1302 # Start host-side idle watchdog if timeout is set
1395 if [ "$IDLE_TIMEOUT" -gt 0 ] 2>/dev/null; then 1303 if [ "$IDLE_TIMEOUT" -gt 0 ] 2>/dev/null; then
1396 ACTIVITY_FILE="$DAEMON_SOCKET_DIR/activity" 1304 ACTIVITY_FILE="$DAEMON_SOCKET_DIR/activity"
1397 touch "$ACTIVITY_FILE" 1305 touch "$ACTIVITY_FILE"
1398 1306
1399 # Spawn background watchdog
1400 ( 1307 (
1401 # Container status file - guest writes this via virtio-9p share
1402 # This avoids sending commands through daemon socket which corrupts output
1403 CONTAINER_STATUS_FILE="$DAEMON_SHARE_DIR/.containers_running" 1308 CONTAINER_STATUS_FILE="$DAEMON_SHARE_DIR/.containers_running"
1404
1405 # Scale check interval to idle timeout (check ~5 times before timeout)
1406 CHECK_INTERVAL=$((IDLE_TIMEOUT / 5)) 1309 CHECK_INTERVAL=$((IDLE_TIMEOUT / 5))
1407 [ "$CHECK_INTERVAL" -lt 10 ] && CHECK_INTERVAL=10 1310 [ "$CHECK_INTERVAL" -lt 10 ] && CHECK_INTERVAL=10
1408 [ "$CHECK_INTERVAL" -gt 60 ] && CHECK_INTERVAL=60 1311 [ "$CHECK_INTERVAL" -gt 60 ] && CHECK_INTERVAL=60
1409 1312
1410 while true; do 1313 while true; do
1411 sleep "$CHECK_INTERVAL" 1314 sleep "$CHECK_INTERVAL"
1412 [ -f "$ACTIVITY_FILE" ] || exit 0 # Clean exit if file removed 1315 [ -f "$ACTIVITY_FILE" ] || exit 0
1413 [ -f "$DAEMON_PID_FILE" ] || exit 0 # PID file gone 1316 [ -f "$DAEMON_PID_FILE" ] || exit 0
1414 1317
1415 # Check if QEMU process is still running 1318 # Check if VM is still running (backend-aware)
1416 QEMU_PID=$(cat "$DAEMON_PID_FILE" 2>/dev/null) 1319 hv_is_vm_running || exit 0
1417 [ -n "$QEMU_PID" ] && kill -0 "$QEMU_PID" 2>/dev/null || exit 0
1418 1320
1419 LAST_ACTIVITY=$(stat -c %Y "$ACTIVITY_FILE" 2>/dev/null || echo 0) 1321 LAST_ACTIVITY=$(stat -c %Y "$ACTIVITY_FILE" 2>/dev/null || echo 0)
1420 NOW=$(date +%s) 1322 NOW=$(date +%s)
1421 IDLE_SECONDS=$((NOW - LAST_ACTIVITY)) 1323 IDLE_SECONDS=$((NOW - LAST_ACTIVITY))
1422 1324
1423 if [ "$IDLE_SECONDS" -ge "$IDLE_TIMEOUT" ]; then 1325 if [ "$IDLE_SECONDS" -ge "$IDLE_TIMEOUT" ]; then
1424 # Check if any containers are running via shared file
1425 # Guest-side watchdog writes container IDs to this file
1426 if [ -f "$CONTAINER_STATUS_FILE" ] && [ -s "$CONTAINER_STATUS_FILE" ]; then 1326 if [ -f "$CONTAINER_STATUS_FILE" ] && [ -s "$CONTAINER_STATUS_FILE" ]; then
1427 # Containers are running - reset activity and skip shutdown
1428 touch "$ACTIVITY_FILE" 1327 touch "$ACTIVITY_FILE"
1429 continue 1328 continue
1430 fi 1329 fi
1431 1330 # Use backend-specific idle shutdown
1432 # No containers running - send QMP quit to gracefully stop QEMU 1331 hv_idle_shutdown
1433 if [ -S "$QMP_SOCKET" ]; then
1434 echo '{"execute":"qmp_capabilities"}{"execute":"quit"}' | \
1435 socat - "UNIX-CONNECT:$QMP_SOCKET" >/dev/null 2>&1 || true
1436 fi
1437 rm -f "$ACTIVITY_FILE" 1332 rm -f "$ACTIVITY_FILE"
1438 exit 0 1333 exit 0
1439 fi 1334 fi
@@ -1442,119 +1337,103 @@ if [ "$DAEMON_MODE" = "start" ]; then
1442 log "DEBUG" "Started host-side idle watchdog (timeout: ${IDLE_TIMEOUT}s)" 1337 log "DEBUG" "Started host-side idle watchdog (timeout: ${IDLE_TIMEOUT}s)"
1443 fi 1338 fi
1444 1339
1445 echo "Daemon running (PID: $QEMU_PID)" 1340 echo "Daemon running (PID: $HV_VM_PID)"
1446 echo "Socket: $DAEMON_SOCKET" 1341 echo "Socket: $DAEMON_SOCKET"
1447 exit 0 1342 exit 0
1448 else 1343 else
1449 log "ERROR" "Daemon failed to become ready within 120 seconds" 1344 log "ERROR" "Daemon failed to become ready within 120 seconds"
1450 cat "$DAEMON_QEMU_LOG" >&2 1345 cat "$DAEMON_QEMU_LOG" >&2
1451 kill "$QEMU_PID" 2>/dev/null || true 1346 hv_destroy_vm
1452 rm -f "$DAEMON_PID_FILE" "$DAEMON_SOCKET" 1347 rm -f "$DAEMON_PID_FILE" "$DAEMON_SOCKET"
1453 exit 1 1348 exit 1
1454 fi 1349 fi
1455fi 1350fi
1456 1351
1457# For non-daemon mode with CA cert, we need virtio-9p to pass the cert 1352# For non-daemon mode with CA cert, we need 9p to pass the cert
1458# (kernel cmdline is too small for base64-encoded certs)
1459if [ -n "$CA_CERT" ] && [ -f "$CA_CERT" ]; then 1353if [ -n "$CA_CERT" ] && [ -f "$CA_CERT" ]; then
1460 # Create temp share dir for CA cert
1461 CA_SHARE_DIR="$TEMP_DIR/ca_share" 1354 CA_SHARE_DIR="$TEMP_DIR/ca_share"
1462 mkdir -p "$CA_SHARE_DIR" 1355 mkdir -p "$CA_SHARE_DIR"
1463 cp "$CA_CERT" "$CA_SHARE_DIR/ca.crt" 1356 cp "$CA_CERT" "$CA_SHARE_DIR/ca.crt"
1464 1357
1465 # Add virtio-9p mount for CA cert
1466 SHARE_TAG="${TOOL_NAME}_share" 1358 SHARE_TAG="${TOOL_NAME}_share"
1467 QEMU_OPTS="$QEMU_OPTS -virtfs local,path=$CA_SHARE_DIR,mount_tag=$SHARE_TAG,security_model=none,readonly=on,id=cashare" 1359 HV_OPTS="$HV_OPTS $(hv_build_9p_opts "$CA_SHARE_DIR" "$SHARE_TAG" "readonly=on")"
1468 KERNEL_APPEND="$KERNEL_APPEND ${CMDLINE_PREFIX}_9p=1" 1360 KERNEL_APPEND="$KERNEL_APPEND ${CMDLINE_PREFIX}_9p=1"
1469 log "DEBUG" "CA certificate available via virtio-9p" 1361 log "DEBUG" "CA certificate available via 9p"
1470fi 1362fi
1471 1363
1472log "INFO" "Starting QEMU..." 1364log "INFO" "Starting VM ($VCONTAINER_HYPERVISOR)..."
1473log "DEBUG" "Command: $QEMU_CMD $QEMU_OPTS -append \"$KERNEL_APPEND\""
1474 1365
1475# Interactive mode runs QEMU in foreground with stdio connected 1366# Interactive mode runs VM in foreground with stdio connected
1476if [ "$INTERACTIVE" = "true" ]; then 1367if [ "$INTERACTIVE" = "true" ]; then
1477 # Check if stdin is a terminal
1478 if [ ! -t 0 ]; then 1368 if [ ! -t 0 ]; then
1479 log "WARN" "Interactive mode requested but stdin is not a terminal" 1369 log "WARN" "Interactive mode requested but stdin is not a terminal"
1480 fi 1370 fi
1481 1371
1482 # Show a starting message
1483 # The init script will clear this line when the container is ready
1484 if [ -t 1 ]; then 1372 if [ -t 1 ]; then
1485 printf "\r\033[0;36m[vdkr]\033[0m Starting container... \r" 1373 printf "\r\033[0;36m[${TOOL_NAME}]\033[0m Starting container... \r"
1486 fi 1374 fi
1487 1375
1488 # Save terminal settings to restore later
1489 if [ -t 0 ]; then 1376 if [ -t 0 ]; then
1490 SAVED_STTY=$(stty -g) 1377 SAVED_STTY=$(stty -g)
1491 # Put terminal in raw mode so Ctrl+C etc go to guest
1492 stty raw -echo 1378 stty raw -echo
1493 fi 1379 fi
1494 1380
1495 # Run QEMU with stdio (not in background) 1381 hv_start_vm_foreground "$KERNEL_APPEND"
1496 # The -serial mon:stdio connects the serial console to our terminal 1382 VM_EXIT=$?
1497 $QEMU_CMD $QEMU_OPTS -append "$KERNEL_APPEND"
1498 QEMU_EXIT=$?
1499 1383
1500 # Restore terminal settings
1501 if [ -t 0 ]; then 1384 if [ -t 0 ]; then
1502 stty "$SAVED_STTY" 1385 stty "$SAVED_STTY"
1503 fi 1386 fi
1504 1387
1505 echo "" 1388 echo ""
1506 log "INFO" "Interactive session ended (exit code: $QEMU_EXIT)" 1389 log "INFO" "Interactive session ended (exit code: $VM_EXIT)"
1507 exit $QEMU_EXIT 1390 exit $VM_EXIT
1508fi 1391fi
1509 1392
1510# Non-interactive mode: run QEMU in background and capture output 1393# Non-interactive mode: run VM in background and capture output
1511QEMU_OUTPUT="$TEMP_DIR/qemu_output.txt" 1394VM_OUTPUT="$TEMP_DIR/vm_output.txt"
1512timeout $TIMEOUT $QEMU_CMD $QEMU_OPTS -append "$KERNEL_APPEND" > "$QEMU_OUTPUT" 2>&1 & 1395hv_start_vm_background "$KERNEL_APPEND" "$VM_OUTPUT" "$TIMEOUT"
1513QEMU_PID=$!
1514 1396
1515# Monitor for completion 1397# Monitor for completion
1516COMPLETE=false 1398COMPLETE=false
1517for i in $(seq 1 $TIMEOUT); do 1399for i in $(seq 1 $TIMEOUT); do
1518 if [ ! -d "/proc/$QEMU_PID" ]; then 1400 if ! hv_is_vm_running; then
1519 log "DEBUG" "QEMU ended after $i seconds" 1401 log "DEBUG" "VM ended after $i seconds"
1520 break 1402 break
1521 fi 1403 fi
1522 1404
1523 # Check for completion markers based on output type 1405 # Check for completion markers based on output type
1524 case "$OUTPUT_TYPE" in 1406 case "$OUTPUT_TYPE" in
1525 text) 1407 text)
1526 if grep -q "===OUTPUT_END===" "$QEMU_OUTPUT" 2>/dev/null; then 1408 if grep -q "===OUTPUT_END===" "$VM_OUTPUT" 2>/dev/null; then
1527 COMPLETE=true 1409 COMPLETE=true
1528 break 1410 break
1529 fi 1411 fi
1530 ;; 1412 ;;
1531 tar) 1413 tar)
1532 if grep -q "===TAR_END===" "$QEMU_OUTPUT" 2>/dev/null; then 1414 if grep -q "===TAR_END===" "$VM_OUTPUT" 2>/dev/null; then
1533 COMPLETE=true 1415 COMPLETE=true
1534 break 1416 break
1535 fi 1417 fi
1536 ;; 1418 ;;
1537 storage) 1419 storage)
1538 # Check for both console (STORAGE_END) and virtio-9p (9P_STORAGE_DONE) markers 1420 if grep -qE "===STORAGE_END===|===9P_STORAGE_DONE===" "$VM_OUTPUT" 2>/dev/null; then
1539 if grep -qE "===STORAGE_END===|===9P_STORAGE_DONE===" "$QEMU_OUTPUT" 2>/dev/null; then
1540 COMPLETE=true 1421 COMPLETE=true
1541 break 1422 break
1542 fi 1423 fi
1543 ;; 1424 ;;
1544 esac 1425 esac
1545 1426
1546 # Check for error 1427 if grep -q "===ERROR===" "$VM_OUTPUT" 2>/dev/null; then
1547 if grep -q "===ERROR===" "$QEMU_OUTPUT" 2>/dev/null; then 1428 log "ERROR" "Error in VM:"
1548 log "ERROR" "Error in QEMU:" 1429 grep -A10 "===ERROR===" "$VM_OUTPUT"
1549 grep -A10 "===ERROR===" "$QEMU_OUTPUT"
1550 break 1430 break
1551 fi 1431 fi
1552 1432
1553 # Progress indicator
1554 if [ $((i % 30)) -eq 0 ]; then 1433 if [ $((i % 30)) -eq 0 ]; then
1555 if grep -q "Docker daemon is ready" "$QEMU_OUTPUT" 2>/dev/null; then 1434 if grep -q "Docker daemon is ready" "$VM_OUTPUT" 2>/dev/null; then
1556 log "INFO" "Docker is running, executing command..." 1435 log "INFO" "Docker is running, executing command..."
1557 elif grep -q "Starting Docker" "$QEMU_OUTPUT" 2>/dev/null; then 1436 elif grep -q "Starting Docker" "$VM_OUTPUT" 2>/dev/null; then
1558 log "INFO" "Docker is starting..." 1437 log "INFO" "Docker is starting..."
1559 fi 1438 fi
1560 fi 1439 fi
@@ -1562,38 +1441,28 @@ for i in $(seq 1 $TIMEOUT); do
1562 sleep 1 1441 sleep 1
1563done 1442done
1564 1443
1565# Wait for QEMU to exit gracefully (poweroff from inside flushes disks properly) 1444# Wait for VM to exit gracefully (poweroff from inside flushes disks properly)
1566# Only kill if it hangs after seeing completion marker 1445if [ "$COMPLETE" = "true" ] && hv_is_vm_running; then
1567if [ "$COMPLETE" = "true" ] && [ -d "/proc/$QEMU_PID" ]; then 1446 log "DEBUG" "Waiting for VM to complete graceful shutdown..."
1568 log "DEBUG" "Waiting for QEMU to complete graceful shutdown..." 1447 hv_wait_vm_exit 30 && log "DEBUG" "VM shutdown complete"
1569 # Give QEMU up to 30 seconds to poweroff after command completes
1570 for wait_i in $(seq 1 30); do
1571 if [ ! -d "/proc/$QEMU_PID" ]; then
1572 log "DEBUG" "QEMU shutdown complete"
1573 break
1574 fi
1575 sleep 1
1576 done
1577fi 1448fi
1578 1449
1579# Force kill QEMU only if still running after grace period 1450# Force kill VM only if still running after grace period
1580if [ -d "/proc/$QEMU_PID" ]; then 1451if hv_is_vm_running; then
1581 log "WARN" "QEMU still running, forcing termination..." 1452 hv_stop_vm
1582 kill $QEMU_PID 2>/dev/null || true
1583 wait $QEMU_PID 2>/dev/null || true
1584fi 1453fi
1585 1454
1586# Extract results 1455# Extract results
1587if [ "$COMPLETE" = "true" ]; then 1456if [ "$COMPLETE" = "true" ]; then
1588 # Get exit code 1457 # Get exit code
1589 EXIT_CODE=$(grep -oP '===EXIT_CODE=\K[0-9]+' "$QEMU_OUTPUT" | head -1) 1458 EXIT_CODE=$(grep -oP '===EXIT_CODE=\K[0-9]+' "$VM_OUTPUT" | head -1)
1590 EXIT_CODE="${EXIT_CODE:-0}" 1459 EXIT_CODE="${EXIT_CODE:-0}"
1591 1460
1592 case "$OUTPUT_TYPE" in 1461 case "$OUTPUT_TYPE" in
1593 text) 1462 text)
1594 log "INFO" "=== Command Output ===" 1463 log "INFO" "=== Command Output ==="
1595 # Use awk for precise extraction between markers 1464 # Use awk for precise extraction between markers
1596 awk '/===OUTPUT_START===/{capture=1; next} /===OUTPUT_END===/{capture=0} capture' "$QEMU_OUTPUT" 1465 awk '/===OUTPUT_START===/{capture=1; next} /===OUTPUT_END===/{capture=0} capture' "$VM_OUTPUT"
1597 log "INFO" "=== Exit Code: $EXIT_CODE ===" 1466 log "INFO" "=== Exit Code: $EXIT_CODE ==="
1598 ;; 1467 ;;
1599 1468
@@ -1601,7 +1470,7 @@ if [ "$COMPLETE" = "true" ]; then
1601 log "INFO" "Extracting tar output..." 1470 log "INFO" "Extracting tar output..."
1602 # Use awk for precise extraction between markers 1471 # Use awk for precise extraction between markers
1603 # Strip ANSI escape codes and non-base64 characters from serial console output 1472 # Strip ANSI escape codes and non-base64 characters from serial console output
1604 awk '/===TAR_START===/{capture=1; next} /===TAR_END===/{capture=0} capture' "$QEMU_OUTPUT" | \ 1473 awk '/===TAR_START===/{capture=1; next} /===TAR_END===/{capture=0} capture' "$VM_OUTPUT" | \
1605 tr -d '\r' | sed 's/\x1b\[[0-9;]*m//g' | tr -cd 'A-Za-z0-9+/=\n' | base64 -d > "$OUTPUT_FILE" 2>"${TEMP_DIR}/b64_errors.txt" 1474 tr -d '\r' | sed 's/\x1b\[[0-9;]*m//g' | tr -cd 'A-Za-z0-9+/=\n' | base64 -d > "$OUTPUT_FILE" 2>"${TEMP_DIR}/b64_errors.txt"
1606 1475
1607 if [ -s "${TEMP_DIR}/b64_errors.txt" ]; then 1476 if [ -s "${TEMP_DIR}/b64_errors.txt" ]; then
@@ -1634,7 +1503,7 @@ if [ "$COMPLETE" = "true" ]; then
1634 # 3. sed: remove ANSI escape codes 1503 # 3. sed: remove ANSI escape codes
1635 # 4. grep -v: remove kernel log messages (lines starting with [ followed by timestamp) 1504 # 4. grep -v: remove kernel log messages (lines starting with [ followed by timestamp)
1636 # 5. tr -cd: keep only valid base64 characters 1505 # 5. tr -cd: keep only valid base64 characters
1637 awk '/===STORAGE_START===/{capture=1; next} /===STORAGE_END===/{capture=0} capture' "$QEMU_OUTPUT" | \ 1506 awk '/===STORAGE_START===/{capture=1; next} /===STORAGE_END===/{capture=0} capture' "$VM_OUTPUT" | \
1638 tr -d '\r' | \ 1507 tr -d '\r' | \
1639 sed 's/\x1b\[[0-9;]*m//g' | \ 1508 sed 's/\x1b\[[0-9;]*m//g' | \
1640 grep -v '^\[[[:space:]]*[0-9]' | \ 1509 grep -v '^\[[[:space:]]*[0-9]' | \
@@ -1675,11 +1544,11 @@ if [ "$COMPLETE" = "true" ]; then
1675 exit "${EXIT_CODE:-0}" 1544 exit "${EXIT_CODE:-0}"
1676else 1545else
1677 log "ERROR" "Command execution failed or timed out" 1546 log "ERROR" "Command execution failed or timed out"
1678 log "ERROR" "QEMU output saved to: $QEMU_OUTPUT" 1547 log "ERROR" "QEMU output saved to: $VM_OUTPUT"
1679 1548
1680 if [ "$VERBOSE" = "true" ]; then 1549 if [ "$VERBOSE" = "true" ]; then
1681 log "DEBUG" "=== Last 50 lines of QEMU output ===" 1550 log "DEBUG" "=== Last 50 lines of QEMU output ==="
1682 tail -50 "$QEMU_OUTPUT" 1551 tail -50 "$VM_OUTPUT"
1683 fi 1552 fi
1684 1553
1685 exit 1 1554 exit 1
diff --git a/recipes-containers/vcontainer/files/vxn-init.sh b/recipes-containers/vcontainer/files/vxn-init.sh
new file mode 100755
index 00000000..93e631e1
--- /dev/null
+++ b/recipes-containers/vcontainer/files/vxn-init.sh
@@ -0,0 +1,545 @@
1#!/bin/sh
2# SPDX-FileCopyrightText: Copyright (C) 2025 Bruce Ashfield
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6# vxn-init.sh
7# Init script for vxn: execute container entrypoint directly in Xen DomU
8#
9# This script runs on a real filesystem after switch_root from initramfs.
10# Unlike vdkr-init.sh which starts Docker, this script directly mounts
11# the container's rootfs and executes the entrypoint via chroot.
12#
13# The VM IS the container — no container runtime runs inside the guest.
14#
15# Drive layout:
16# /dev/xvda = rootfs.img (this script runs from here, mounted as /)
17# /dev/xvdb = container rootfs (OCI image, passed from host)
18#
19# Kernel parameters (reuses docker_ prefix for frontend compatibility):
20# docker_cmd=<base64> Base64-encoded entrypoint command
21# docker_input=<type> Input type: none, oci, rootfs (default: none)
22# docker_output=<type> Output type: text (default: text)
23# docker_network=1 Enable networking
24# docker_interactive=1 Interactive mode (suppress boot messages)
25# docker_daemon=1 Daemon mode (command loop on hvc1)
26#
27# Version: 1.0.0
28
29# Set runtime-specific parameters before sourcing common code
30VCONTAINER_RUNTIME_NAME="vxn"
31VCONTAINER_RUNTIME_CMD="chroot"
32VCONTAINER_RUNTIME_PREFIX="docker"
33VCONTAINER_STATE_DIR="/var/lib/vxn"
34VCONTAINER_SHARE_NAME="vxn_share"
35VCONTAINER_VERSION="1.0.0"
36
37# Source common init functions
38. /vcontainer-init-common.sh
39
40# ============================================================================
41# Container Rootfs Handling
42# ============================================================================
43
44# Find the container rootfs directory from the input disk.
45# Sets CONTAINER_ROOT to the path of the extracted rootfs.
46find_container_rootfs() {
47 CONTAINER_ROOT=""
48
49 if [ ! -d /mnt/input ] || [ -z "$(ls -A /mnt/input 2>/dev/null)" ]; then
50 log "WARNING: No container rootfs found on input disk"
51 return 1
52 fi
53
54 # Check if the input disk IS the rootfs (has typical Linux dirs)
55 if [ -d /mnt/input/bin ] || [ -d /mnt/input/usr ]; then
56 CONTAINER_ROOT="/mnt/input"
57 log "Container rootfs: direct mount (/mnt/input)"
58 return 0
59 fi
60
61 # Check for OCI layout (index.json + blobs/)
62 if [ -f /mnt/input/index.json ] || [ -f /mnt/input/oci-layout ]; then
63 log "Found OCI layout on input disk, extracting layers..."
64 extract_oci_rootfs /mnt/input /mnt/container
65 CONTAINER_ROOT="/mnt/container"
66 return 0
67 fi
68
69 # Check for rootfs/ subdirectory
70 if [ -d /mnt/input/rootfs ]; then
71 CONTAINER_ROOT="/mnt/input/rootfs"
72 log "Container rootfs: /mnt/input/rootfs"
73 return 0
74 fi
75
76 log "WARNING: Could not determine rootfs layout in /mnt/input"
77 [ "$QUIET_BOOT" = "0" ] && ls -la /mnt/input/
78 return 1
79}
80
81# Extract OCI image layers into a flat rootfs.
82# Usage: extract_oci_rootfs <oci_dir> <target_dir>
83extract_oci_rootfs() {
84 local oci_dir="$1"
85 local target_dir="$2"
86
87 mkdir -p "$target_dir"
88
89 if [ ! -f "$oci_dir/index.json" ]; then
90 log "ERROR: No index.json in OCI layout"
91 return 1
92 fi
93
94 if command -v jq >/dev/null 2>&1; then
95 local manifest_digest=$(jq -r '.manifests[0].digest' "$oci_dir/index.json")
96 local manifest_file="$oci_dir/blobs/${manifest_digest/://}"
97
98 if [ -f "$manifest_file" ]; then
99 # Extract layer digests from manifest (in order, bottom to top)
100 local layers=$(jq -r '.layers[].digest' "$manifest_file")
101 for layer_digest in $layers; do
102 local layer_file="$oci_dir/blobs/${layer_digest/://}"
103 if [ -f "$layer_file" ]; then
104 log "Extracting layer: ${layer_digest#sha256:}"
105 tar -xf "$layer_file" -C "$target_dir" 2>/dev/null || true
106 fi
107 done
108 fi
109 else
110 # Fallback: find and extract all blobs that look like tarballs
111 log "No jq available, extracting all blob layers..."
112 for blob in "$oci_dir"/blobs/sha256/*; do
113 if [ -f "$blob" ]; then
114 tar -xf "$blob" -C "$target_dir" 2>/dev/null || true
115 fi
116 done
117 fi
118
119 if [ -d "$target_dir/bin" ] || [ -d "$target_dir/usr" ] || [ -f "$target_dir/hello" ]; then
120 log "OCI rootfs extracted to $target_dir"
121 return 0
122 else
123 log "WARNING: Extracted OCI rootfs may be incomplete"
124 [ "$QUIET_BOOT" = "0" ] && ls -la "$target_dir/"
125 return 0
126 fi
127}
128
129# Parse OCI config for environment, entrypoint, cmd, workdir.
130# Sets: OCI_ENTRYPOINT, OCI_CMD, OCI_ENV, OCI_WORKDIR
131parse_oci_config() {
132 OCI_ENTRYPOINT=""
133 OCI_CMD=""
134 OCI_ENV=""
135 OCI_WORKDIR=""
136
137 local config_file=""
138
139 # Look for config in OCI layout on input disk
140 if [ -f /mnt/input/index.json ]; then
141 config_file=$(oci_find_config_blob /mnt/input)
142 fi
143
144 # Check for standalone config.json
145 [ -z "$config_file" ] && [ -f /mnt/input/config.json ] && config_file="/mnt/input/config.json"
146
147 if [ -z "$config_file" ] || [ ! -f "$config_file" ]; then
148 log "No OCI config found (using command from kernel cmdline)"
149 return
150 fi
151
152 log "Parsing OCI config: $config_file"
153
154 if command -v jq >/dev/null 2>&1; then
155 OCI_ENTRYPOINT=$(jq -r '(.config.Entrypoint // []) | join(" ")' "$config_file" 2>/dev/null)
156 OCI_CMD=$(jq -r '(.config.Cmd // []) | join(" ")' "$config_file" 2>/dev/null)
157 OCI_WORKDIR=$(jq -r '.config.WorkingDir // ""' "$config_file" 2>/dev/null)
158 OCI_ENV=$(jq -r '(.config.Env // []) | .[]' "$config_file" 2>/dev/null)
159 else
160 # Fallback: parse OCI config JSON with grep/sed (no jq in minimal rootfs)
161 log "Using grep/sed fallback for OCI config parsing"
162 OCI_ENTRYPOINT=$(oci_grep_json_array "Entrypoint" "$config_file")
163 OCI_CMD=$(oci_grep_json_array "Cmd" "$config_file")
164 OCI_WORKDIR=$(grep -o '"WorkingDir":"[^"]*"' "$config_file" 2>/dev/null | sed 's/"WorkingDir":"//;s/"$//')
165 OCI_ENV=$(grep -o '"Env":\[[^]]*\]' "$config_file" 2>/dev/null | \
166 sed 's/"Env":\[//;s/\]$//' | tr ',' '\n' | sed 's/^ *"//;s/"$//')
167 fi
168
169 log "OCI config: entrypoint='$OCI_ENTRYPOINT' cmd='$OCI_CMD' workdir='$OCI_WORKDIR'"
170}
171
172# Follow OCI index.json → manifest → config blob using grep/sed.
173# Works with or without jq.
174oci_find_config_blob() {
175 local oci_dir="$1"
176 local digest=""
177 local blob_file=""
178
179 if command -v jq >/dev/null 2>&1; then
180 digest=$(jq -r '.manifests[0].digest' "$oci_dir/index.json" 2>/dev/null)
181 blob_file="$oci_dir/blobs/${digest/://}"
182 [ -f "$blob_file" ] && digest=$(jq -r '.config.digest' "$blob_file" 2>/dev/null)
183 blob_file="$oci_dir/blobs/${digest/://}"
184 else
185 # grep fallback: extract first digest from index.json
186 digest=$(grep -o '"digest":"sha256:[a-f0-9]*"' "$oci_dir/index.json" 2>/dev/null | \
187 head -n 1 | sed 's/"digest":"//;s/"$//')
188 blob_file="$oci_dir/blobs/${digest/://}"
189 if [ -f "$blob_file" ]; then
190 # Extract config digest from manifest (mediaType contains "config")
191 digest=$(grep -o '"config":{[^}]*}' "$blob_file" 2>/dev/null | \
192 grep -o '"digest":"sha256:[a-f0-9]*"' | sed 's/"digest":"//;s/"$//')
193 blob_file="$oci_dir/blobs/${digest/://}"
194 fi
195 fi
196
197 [ -f "$blob_file" ] && echo "$blob_file"
198}
199
200# Extract a JSON array value as a space-separated string using grep/sed.
201# Usage: oci_grep_json_array "Entrypoint" config_file
202# Handles: "Entrypoint":["/hello"], "Cmd":["/bin/sh","-c","echo hi"]
203oci_grep_json_array() {
204 local key="$1"
205 local file="$2"
206 grep -o "\"$key\":\\[[^]]*\\]" "$file" 2>/dev/null | \
207 sed "s/\"$key\":\\[//;s/\\]$//" | \
208 tr ',' '\n' | sed 's/^ *"//;s/"$//' | tr '\n' ' ' | sed 's/ $//'
209}
210
211# ============================================================================
212# Command Resolution
213# ============================================================================
214
215# Parse a "docker run" command to extract the container command (after image name).
216# "docker run --rm hello-world" → "" (no cmd, use OCI defaults)
217# "docker run --rm hello-world /bin/sh" → "/bin/sh"
218parse_docker_run_cmd() {
219 local full_cmd="$1"
220 local found_image=false
221 local container_cmd=""
222 local skip_next=false
223
224 # Strip "docker run" or "podman run" prefix
225 local args=$(echo "$full_cmd" | sed 's/^[a-z]* run //')
226
227 for arg in $args; do
228 if [ "$found_image" = "true" ]; then
229 container_cmd="$container_cmd $arg"
230 continue
231 fi
232
233 if [ "$skip_next" = "true" ]; then
234 skip_next=false
235 continue
236 fi
237
238 case "$arg" in
239 --rm|--detach|-d|-i|--interactive|-t|--tty|--privileged)
240 ;;
241 -p|--publish|-v|--volume|-e|--env|--name|--network|-w|--workdir|--entrypoint|-m|--memory|--cpus)
242 skip_next=true
243 ;;
244 -p=*|--publish=*|-v=*|--volume=*|-e=*|--env=*|--name=*|--network=*|-w=*|--workdir=*|--entrypoint=*)
245 ;;
246 -*)
247 ;;
248 *)
249 # First non-option argument is the image name — skip it
250 found_image=true
251 ;;
252 esac
253 done
254
255 echo "$container_cmd" | sed 's/^ *//'
256}
257
258# Determine the command to execute inside the container.
259# Priority: 1) explicit command from docker run args, 2) RUNTIME_CMD as raw command,
260# 3) OCI entrypoint + cmd, 4) /bin/sh fallback
261determine_exec_command() {
262 local cmd=""
263
264 if [ -n "$RUNTIME_CMD" ]; then
265 # Check if this is a "docker run" wrapper command
266 if echo "$RUNTIME_CMD" | grep -qE '^(docker|podman) run '; then
267 cmd=$(parse_docker_run_cmd "$RUNTIME_CMD")
268 # If no command after image name, fall through to OCI config
269 else
270 # Raw command — use as-is
271 cmd="$RUNTIME_CMD"
272 fi
273 fi
274
275 # If no explicit command, use OCI config
276 if [ -z "$cmd" ]; then
277 if [ -n "$OCI_ENTRYPOINT" ]; then
278 cmd="$OCI_ENTRYPOINT"
279 [ -n "$OCI_CMD" ] && cmd="$cmd $OCI_CMD"
280 elif [ -n "$OCI_CMD" ]; then
281 cmd="$OCI_CMD"
282 fi
283 fi
284
285 # Final fallback
286 if [ -z "$cmd" ]; then
287 cmd="/bin/sh"
288 log "No command specified, defaulting to /bin/sh"
289 fi
290
291 echo "$cmd"
292}
293
294# ============================================================================
295# Container Execution
296# ============================================================================
297
298# Set up environment variables for the container
299setup_container_env() {
300 # Apply OCI environment variables
301 if [ -n "$OCI_ENV" ]; then
302 echo "$OCI_ENV" | while IFS= read -r env_line; do
303 [ -n "$env_line" ] && export "$env_line" 2>/dev/null || true
304 done
305 fi
306
307 # Ensure basic environment
308 export PATH="${PATH:-/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin}"
309 export HOME="${HOME:-/root}"
310 export TERM="${TERM:-linux}"
311}
312
313# Execute a command inside the container rootfs via chroot.
314# Mounts /proc, /sys, /dev inside the container and copies DNS config.
315exec_in_container() {
316 local rootfs="$1"
317 local cmd="$2"
318 local workdir="${OCI_WORKDIR:-/}"
319
320 # Mount essential filesystems inside the container rootfs
321 mkdir -p "$rootfs/proc" "$rootfs/sys" "$rootfs/dev" "$rootfs/tmp" 2>/dev/null || true
322 mount -t proc proc "$rootfs/proc" 2>/dev/null || true
323 mount -t sysfs sysfs "$rootfs/sys" 2>/dev/null || true
324 mount --bind /dev "$rootfs/dev" 2>/dev/null || true
325
326 # Copy resolv.conf for DNS
327 if [ -f /etc/resolv.conf ]; then
328 mkdir -p "$rootfs/etc" 2>/dev/null || true
329 cp /etc/resolv.conf "$rootfs/etc/resolv.conf" 2>/dev/null || true
330 fi
331
332 log "Executing in container: $cmd"
333 log "Working directory: $workdir"
334
335 # Determine how to exec: use /bin/sh if available, otherwise direct exec
336 local use_sh=true
337 if [ ! -x "$rootfs/bin/sh" ]; then
338 use_sh=false
339 log "No /bin/sh in container, using direct exec"
340 fi
341
342 if [ "$RUNTIME_INTERACTIVE" = "1" ]; then
343 # Interactive mode: connect stdin/stdout directly
344 export TERM=linux
345 printf '\r\033[K'
346 if [ "$use_sh" = "true" ]; then
347 chroot "$rootfs" /bin/sh -c "cd '$workdir' 2>/dev/null; exec $cmd"
348 else
349 chroot "$rootfs" $cmd
350 fi
351 EXEC_EXIT_CODE=$?
352 else
353 # Non-interactive: capture output
354 EXEC_OUTPUT="/tmp/container_output.txt"
355 EXEC_EXIT_CODE=0
356 if [ "$use_sh" = "true" ]; then
357 chroot "$rootfs" /bin/sh -c "cd '$workdir' 2>/dev/null; exec $cmd" \
358 > "$EXEC_OUTPUT" 2>&1 || EXEC_EXIT_CODE=$?
359 else
360 chroot "$rootfs" $cmd \
361 > "$EXEC_OUTPUT" 2>&1 || EXEC_EXIT_CODE=$?
362 fi
363
364 log "Exit code: $EXEC_EXIT_CODE"
365
366 echo "===OUTPUT_START==="
367 cat "$EXEC_OUTPUT"
368 echo "===OUTPUT_END==="
369 echo "===EXIT_CODE=$EXEC_EXIT_CODE==="
370 fi
371
372 # Cleanup mounts inside container
373 umount "$rootfs/proc" 2>/dev/null || true
374 umount "$rootfs/sys" 2>/dev/null || true
375 umount "$rootfs/dev" 2>/dev/null || true
376}
377
378# ============================================================================
379# Daemon Mode (vxn-specific)
380# ============================================================================
381
382# In daemon mode, commands come via the hvc1 console channel
383# and are executed in the container rootfs via chroot.
384run_vxn_daemon_mode() {
385 log "=== vxn Daemon Mode ==="
386 log "Container rootfs: ${CONTAINER_ROOT:-(none)}"
387 log "Idle timeout: ${RUNTIME_IDLE_TIMEOUT}s"
388
389 # Find the command channel (prefer hvc1 for Xen)
390 DAEMON_PORT=""
391 for port in /dev/hvc1 /dev/vport0p1 /dev/vport1p1 /dev/virtio-ports/vxn; do
392 if [ -c "$port" ]; then
393 DAEMON_PORT="$port"
394 log "Found command channel: $port"
395 break
396 fi
397 done
398
399 if [ -z "$DAEMON_PORT" ]; then
400 log "ERROR: No command channel for daemon mode"
401 ls -la /dev/hvc* /dev/vport* /dev/virtio-ports/ 2>/dev/null || true
402 sleep 5
403 reboot -f
404 fi
405
406 # Open bidirectional FD
407 exec 3<>"$DAEMON_PORT"
408
409 log "Daemon ready, waiting for commands..."
410
411 ACTIVITY_FILE="/tmp/.daemon_activity"
412 touch "$ACTIVITY_FILE"
413 DAEMON_PID=$$
414
415 trap 'log "Shutdown signal"; sync; reboot -f' TERM
416 trap 'rm -f "$ACTIVITY_FILE"; exit' INT
417
418 # Command loop
419 while true; do
420 CMD_B64=""
421 read -r CMD_B64 <&3
422 READ_EXIT=$?
423
424 if [ $READ_EXIT -eq 0 ] && [ -n "$CMD_B64" ]; then
425 touch "$ACTIVITY_FILE"
426
427 case "$CMD_B64" in
428 "===PING===")
429 echo "===PONG===" | cat >&3
430 continue
431 ;;
432 "===SHUTDOWN===")
433 log "Received shutdown command"
434 echo "===SHUTTING_DOWN===" | cat >&3
435 break
436 ;;
437 esac
438
439 # Decode command
440 CMD=$(echo "$CMD_B64" | base64 -d 2>/dev/null)
441 if [ -z "$CMD" ]; then
442 printf "===ERROR===\nFailed to decode command\n===END===\n" | cat >&3
443 continue
444 fi
445
446 log "Executing: $CMD"
447
448 # Execute command in container rootfs (or host rootfs if no container)
449 EXEC_OUTPUT="/tmp/daemon_output.txt"
450 EXEC_EXIT_CODE=0
451 if [ -n "$CONTAINER_ROOT" ]; then
452 chroot "$CONTAINER_ROOT" /bin/sh -c "$CMD" \
453 > "$EXEC_OUTPUT" 2>&1 || EXEC_EXIT_CODE=$?
454 else
455 eval "$CMD" > "$EXEC_OUTPUT" 2>&1 || EXEC_EXIT_CODE=$?
456 fi
457
458 {
459 echo "===OUTPUT_START==="
460 cat "$EXEC_OUTPUT"
461 echo "===OUTPUT_END==="
462 echo "===EXIT_CODE=$EXEC_EXIT_CODE==="
463 echo "===END==="
464 } | cat >&3
465
466 log "Command completed (exit code: $EXEC_EXIT_CODE)"
467 else
468 sleep 0.1
469 fi
470 done
471
472 exec 3>&-
473 log "Daemon shutting down..."
474}
475
476# ============================================================================
477# Main
478# ============================================================================
479
480# Initialize base environment
481setup_base_environment
482mount_base_filesystems
483
484# Check for quiet boot mode
485check_quiet_boot
486
487log "=== vxn Init ==="
488log "Version: $VCONTAINER_VERSION"
489
490# Mount tmpfs directories and cgroups
491mount_tmpfs_dirs
492setup_cgroups
493
494# Parse kernel command line
495parse_cmdline
496
497# Detect and configure disks
498detect_disks
499
500# Mount input disk (container rootfs from host)
501mount_input_disk
502
503# Configure networking
504configure_networking
505
506# Find the container rootfs on the input disk
507if ! find_container_rootfs; then
508 if [ "$RUNTIME_DAEMON" = "1" ]; then
509 log "No container rootfs, daemon mode will execute on host rootfs"
510 CONTAINER_ROOT=""
511 else
512 echo "===ERROR==="
513 echo "No container rootfs found on input disk"
514 echo "Contents of /mnt/input:"
515 ls -la /mnt/input/ 2>/dev/null || echo "(empty)"
516 sleep 2
517 reboot -f
518 fi
519fi
520
521# Parse OCI config for entrypoint/env/workdir
522parse_oci_config
523
524# Set up container environment
525setup_container_env
526
527if [ "$RUNTIME_DAEMON" = "1" ]; then
528 run_vxn_daemon_mode
529else
530 # Determine command to execute
531 EXEC_CMD=$(determine_exec_command)
532
533 if [ -z "$EXEC_CMD" ]; then
534 echo "===ERROR==="
535 echo "No command to execute"
536 sleep 2
537 reboot -f
538 fi
539
540 # Execute in container rootfs
541 exec_in_container "$CONTAINER_ROOT" "$EXEC_CMD"
542fi
543
544# Graceful shutdown
545graceful_shutdown
diff --git a/recipes-containers/vcontainer/files/vxn.sh b/recipes-containers/vcontainer/files/vxn.sh
new file mode 100644
index 00000000..43693438
--- /dev/null
+++ b/recipes-containers/vcontainer/files/vxn.sh
@@ -0,0 +1,51 @@
1#!/bin/bash
2# SPDX-FileCopyrightText: Copyright (C) 2025 Bruce Ashfield
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6# vxn: Docker CLI for Xen-based container execution
7#
8# This provides a familiar docker-like CLI that executes commands inside
9# a Xen DomU guest with the target architecture's Docker.
10#
11# This is the Xen equivalent of vdkr (QEMU). It uses the same rootfs
12# images and init scripts, but boots as a Xen PV guest instead of QEMU.
13#
14# Requires: Xen Dom0 with xl toolstack
15
16# Set runtime-specific parameters before sourcing common code
17VCONTAINER_RUNTIME_NAME="vxn"
18VCONTAINER_RUNTIME_CMD="docker"
19VCONTAINER_RUNTIME_PREFIX="VXN"
20VCONTAINER_IMPORT_TARGET="docker-daemon:"
21VCONTAINER_STATE_FILE="docker-state.img"
22VCONTAINER_OTHER_PREFIX="VDKR"
23VCONTAINER_VERSION="1.0.0"
24
25# Select Xen hypervisor backend
26export VCONTAINER_HYPERVISOR="xen"
27
28# Set blob directory for target install (/usr/share/vxn has kernel, initramfs, rootfs)
29# Use VXN_BLOB_DIR which _get_env_var() in vcontainer-common.sh will find
30# via ${VCONTAINER_RUNTIME_PREFIX}_BLOB_DIR
31if [ -z "${VXN_BLOB_DIR:-}" ]; then
32 if [ -d "/usr/share/vxn" ]; then
33 export VXN_BLOB_DIR="/usr/share/vxn"
34 fi
35fi
36
37# Export runtime name so vrunner.sh (separate process) sees it
38export VCONTAINER_RUNTIME_NAME
39
40# Locate shared scripts - check script directory first, then /usr/lib/vxn
41SCRIPT_DIR="$(dirname "${BASH_SOURCE[0]}")"
42if [ -f "${SCRIPT_DIR}/vcontainer-common.sh" ]; then
43 export VCONTAINER_LIBDIR="${SCRIPT_DIR}"
44 source "${SCRIPT_DIR}/vcontainer-common.sh" "$@"
45elif [ -f "/usr/lib/vxn/vcontainer-common.sh" ]; then
46 export VCONTAINER_LIBDIR="/usr/lib/vxn"
47 source "/usr/lib/vxn/vcontainer-common.sh" "$@"
48else
49 echo "Error: vcontainer-common.sh not found" >&2
50 exit 1
51fi
diff --git a/recipes-core/vxn/vxn-initramfs-create.inc b/recipes-core/vxn/vxn-initramfs-create.inc
new file mode 100644
index 00000000..bde8bba9
--- /dev/null
+++ b/recipes-core/vxn/vxn-initramfs-create.inc
@@ -0,0 +1,223 @@
1# SPDX-FileCopyrightText: Copyright (C) 2025 Bruce Ashfield
2#
3# SPDX-License-Identifier: MIT
4#
5# vxn-initramfs-create.inc
6# ===========================================================================
7# Shared code for building Xen DomU boot blobs for vxn
8# ===========================================================================
9#
10# This .inc file packages boot blobs for vxn (vcontainer on Xen).
11# It reuses the same initramfs and rootfs images built by vruntime
12# multiconfig (same images as vdkr/vpdmn), since the init scripts
13# detect the hypervisor at boot time.
14#
15# The kernel from vruntime already includes Xen PV support via vxn.cfg
16# fragment (added to DISTRO_FEATURES in vruntime.conf).
17#
18# Required variables from including recipe:
19# VXN_RUNTIME - runtime to source blobs from ("vdkr" or "vpdmn")
20#
21# Boot flow on Xen Dom0:
22# xl create <domain.cfg>
23# -> Xen boots kernel + tiny initramfs in DomU
24# -> preinit mounts rootfs.img from /dev/xvda
25# -> switch_root into rootfs.img
26# -> init detects Xen, uses /dev/xvd* and trans=xen
27#
28# ===========================================================================
29
30HOMEPAGE = "https://git.yoctoproject.org/meta-virtualization/"
31LICENSE = "MIT"
32LIC_FILES_CHKSUM = "file://${COMMON_LICENSE_DIR}/MIT;md5=0835ade698e0bcf8506ecda2f7b4f302"
33
34inherit deploy
35
36EXCLUDE_FROM_WORLD = "1"
37DEPENDS = "squashfs-tools-native"
38
39# Default to vdkr (Docker) as the source runtime
40VXN_RUNTIME ?= "vdkr"
41
42# Always rebuild - init script injection must not be sstate-cached
43# because the rootfs.img content comes from the deploy dir (untracked)
44SSTATE_SKIP_CREATION = "1"
45do_compile[nostamp] = "1"
46do_deploy[nostamp] = "1"
47
48python () {
49
50 mc = d.getVar('VXN_MULTICONFIG')
51 runtime = d.getVar('VXN_RUNTIME')
52 bbmulticonfig = (d.getVar('BBMULTICONFIG') or "").split()
53 if mc in bbmulticonfig:
54 # All blobs come from the vruntime multiconfig - kernel, initramfs, rootfs
55 mcdeps = ' '.join([
56 'mc::%s:%s-tiny-initramfs-image:do_image_complete' % (mc, runtime),
57 'mc::%s:%s-rootfs-image:do_image_complete' % (mc, runtime),
58 'mc::%s:virtual/kernel:do_deploy' % mc,
59 ])
60 d.setVarFlag('do_compile', 'mcdepends', mcdeps)
61}
62
63INHIBIT_DEFAULT_DEPS = "1"
64
65# Init scripts to inject into the rootfs squashfs
66FILESEXTRAPATHS:prepend := "${THISDIR}/../../recipes-containers/vcontainer/files:"
67SRC_URI = "\
68 file://vxn-init.sh \
69 file://vcontainer-init-common.sh \
70"
71
72S = "${UNPACKDIR}"
73B = "${WORKDIR}/build"
74
75def vxn_get_kernel_image_name(d):
76 arch = d.getVar('TARGET_ARCH')
77 if arch == 'aarch64':
78 return 'Image'
79 elif arch in ['x86_64', 'i686', 'i586']:
80 return 'bzImage'
81 elif arch == 'arm':
82 return 'zImage'
83 return 'Image'
84
85def vxn_get_multiconfig_name(d):
86 arch = d.getVar('TARGET_ARCH')
87 if arch == 'aarch64':
88 return 'vruntime-aarch64'
89 elif arch in ['x86_64', 'i686', 'i586']:
90 return 'vruntime-x86-64'
91 return 'vruntime-aarch64'
92
93def vxn_get_blob_arch(d):
94 arch = d.getVar('TARGET_ARCH')
95 if arch == 'aarch64':
96 return 'aarch64'
97 elif arch in ['x86_64', 'i686', 'i586']:
98 return 'x86_64'
99 return 'aarch64'
100
101KERNEL_IMAGETYPE_INITRAMFS = "${@vxn_get_kernel_image_name(d)}"
102VXN_MULTICONFIG = "${@vxn_get_multiconfig_name(d)}"
103BLOB_ARCH = "${@vxn_get_blob_arch(d)}"
104
105VXN_MC_DEPLOY = "${TOPDIR}/tmp-${VXN_MULTICONFIG}/deploy/images/${MACHINE}"
106
107do_compile() {
108 mkdir -p ${B}
109
110 MC_TMPDIR="${TOPDIR}/tmp-${VXN_MULTICONFIG}"
111 MC_DEPLOY="${MC_TMPDIR}/deploy/images/${MACHINE}"
112
113 # =========================================================================
114 # PART 1: COPY TINY INITRAMFS (same as vdkr/vpdmn)
115 # =========================================================================
116 bbnote "Copying tiny initramfs from image build..."
117
118 INITRAMFS_SRC="${MC_DEPLOY}/${VXN_RUNTIME}-tiny-initramfs-image-${MACHINE}.cpio.gz"
119
120 if [ ! -f "${INITRAMFS_SRC}" ]; then
121 bbfatal "Initramfs not found at ${INITRAMFS_SRC}. Build it first with: bitbake mc:${VXN_MULTICONFIG}:${VXN_RUNTIME}-tiny-initramfs-image"
122 fi
123
124 cp "${INITRAMFS_SRC}" ${B}/initramfs.cpio.gz
125 INITRAMFS_SIZE=$(stat -c%s ${B}/initramfs.cpio.gz)
126 bbnote "Initramfs copied: ${INITRAMFS_SIZE} bytes ($(expr ${INITRAMFS_SIZE} / 1024)KB)"
127
128 # =========================================================================
129 # PART 2: COPY ROOTFS (same squashfs, works under both QEMU and Xen)
130 # =========================================================================
131 bbnote "Copying rootfs from image build..."
132
133 ROOTFS_SRC="${MC_DEPLOY}/${VXN_RUNTIME}-rootfs-image-${MACHINE}.rootfs.squashfs"
134
135 if [ ! -f "${ROOTFS_SRC}" ]; then
136 bbfatal "Rootfs image not found at ${ROOTFS_SRC}. Build it first with: bitbake mc:${VXN_MULTICONFIG}:${VXN_RUNTIME}-rootfs-image"
137 fi
138
139 cp "${ROOTFS_SRC}" ${B}/rootfs.img
140 ROOTFS_SIZE=$(stat -c%s ${B}/rootfs.img)
141 bbnote "Rootfs image copied: ${ROOTFS_SIZE} bytes ($(expr ${ROOTFS_SIZE} / 1024 / 1024)MB)"
142
143 # Inject vxn init scripts into the rootfs squashfs
144 bbnote "Injecting vxn init scripts into rootfs..."
145 UNSQUASH_DIR="${B}/rootfs-unsquash"
146 rm -rf "${UNSQUASH_DIR}"
147 unsquashfs -d "${UNSQUASH_DIR}" ${B}/rootfs.img
148 install -m 0755 ${S}/vxn-init.sh ${UNSQUASH_DIR}/vxn-init.sh
149 install -m 0755 ${S}/vcontainer-init-common.sh ${UNSQUASH_DIR}/vcontainer-init-common.sh
150 rm -f ${B}/rootfs.img
151 mksquashfs "${UNSQUASH_DIR}" ${B}/rootfs.img -noappend -comp xz
152 rm -rf "${UNSQUASH_DIR}"
153 ROOTFS_SIZE=$(stat -c%s ${B}/rootfs.img)
154 bbnote "Rootfs with vxn init scripts: ${ROOTFS_SIZE} bytes ($(expr ${ROOTFS_SIZE} / 1024 / 1024)MB)"
155
156 # =========================================================================
157 # PART 3: COPY KERNEL (Xen PV-capable via vxn.cfg fragment)
158 # =========================================================================
159 bbnote "Copying kernel image..."
160 KERNEL_FILE="${DEPLOY_DIR_IMAGE}/${KERNEL_IMAGETYPE_INITRAMFS}"
161 if [ -f "${KERNEL_FILE}" ]; then
162 cp "${KERNEL_FILE}" ${B}/kernel
163 KERNEL_SIZE=$(stat -c%s ${B}/kernel)
164 bbnote "Kernel copied: ${KERNEL_SIZE} bytes ($(expr ${KERNEL_SIZE} / 1024 / 1024)MB)"
165 else
166 bbwarn "Kernel not found at ${KERNEL_FILE}"
167 fi
168}
169
170# This is a deploy-only recipe - no packages produced.
171# PACKAGES="" prevents the rootfs task from looking for package manifests.
172PACKAGES = ""
173do_install[noexec] = "1"
174do_package[noexec] = "1"
175do_packagedata[noexec] = "1"
176do_package_write_rpm[noexec] = "1"
177do_package_write_ipk[noexec] = "1"
178do_package_write_deb[noexec] = "1"
179do_populate_sysroot[noexec] = "1"
180
181do_deploy() {
182 install -d ${DEPLOYDIR}/vxn/${BLOB_ARCH}
183
184 if [ -f ${B}/initramfs.cpio.gz ]; then
185 install -m 0644 ${B}/initramfs.cpio.gz ${DEPLOYDIR}/vxn/${BLOB_ARCH}/
186 bbnote "Deployed initramfs.cpio.gz to vxn/${BLOB_ARCH}/"
187 fi
188
189 if [ -f ${B}/rootfs.img ]; then
190 install -m 0644 ${B}/rootfs.img ${DEPLOYDIR}/vxn/${BLOB_ARCH}/
191 bbnote "Deployed rootfs.img to vxn/${BLOB_ARCH}/"
192 fi
193
194 if [ -f ${B}/kernel ]; then
195 install -m 0644 ${B}/kernel ${DEPLOYDIR}/vxn/${BLOB_ARCH}/${KERNEL_IMAGETYPE_INITRAMFS}
196 bbnote "Deployed kernel as vxn/${BLOB_ARCH}/${KERNEL_IMAGETYPE_INITRAMFS}"
197 fi
198
199 cat > ${DEPLOYDIR}/vxn/${BLOB_ARCH}/README << EOF
200vxn Boot Blobs (Xen DomU)
201==========================
202
203Built for: ${TARGET_ARCH}
204Machine: ${MACHINE}
205Multiconfig: ${VXN_MULTICONFIG}
206Source runtime: ${VXN_RUNTIME}
207Date: $(date)
208
209Files:
210 ${KERNEL_IMAGETYPE_INITRAMFS} - Kernel image (Xen PV-capable)
211 initramfs.cpio.gz - Tiny initramfs (busybox + preinit)
212 rootfs.img - Root filesystem with container tools
213
214Boot flow:
215 xl create <domain.cfg>
216 -> Xen boots kernel + initramfs in DomU
217 -> preinit detects Xen, mounts rootfs.img from /dev/xvda
218 -> switch_root into rootfs.img
219 -> init script runs container commands
220EOF
221}
222
223addtask deploy after do_compile before do_build
diff --git a/recipes-core/vxn/vxn-initramfs-create_1.0.bb b/recipes-core/vxn/vxn-initramfs-create_1.0.bb
new file mode 100644
index 00000000..edbef12f
--- /dev/null
+++ b/recipes-core/vxn/vxn-initramfs-create_1.0.bb
@@ -0,0 +1,43 @@
1# SPDX-FileCopyrightText: Copyright (C) 2025 Bruce Ashfield
2#
3# SPDX-License-Identifier: MIT
4#
5# vxn-initramfs-create_1.0.bb
6# ===========================================================================
7# Builds Xen DomU boot blobs for vxn
8# ===========================================================================
9#
10# This recipe packages boot blobs for vxn (vcontainer on Xen):
11# - A tiny initramfs (reused from vdkr/vpdmn build)
12# - The rootfs.img squashfs (same as vdkr, with HV detection in init)
13# - The kernel (Xen PV-capable via vxn.cfg fragment in vruntime)
14#
15# Boot flow on Xen Dom0:
16# xl create domain.cfg
17# -> Xen boots kernel + tiny initramfs in DomU
18# -> preinit detects Xen block prefix, mounts rootfs.img from /dev/xvda
19# -> switch_root into rootfs.img
20# -> vdkr-init.sh detects Xen via /proc/xen, uses xvd* devices
21#
22# ===========================================================================
23# BUILD INSTRUCTIONS
24# ===========================================================================
25#
26# For aarch64:
27# MACHINE=qemuarm64 bitbake vxn-initramfs-create
28#
29# For x86_64:
30# MACHINE=qemux86-64 bitbake vxn-initramfs-create
31#
32# Blobs are deployed to: tmp/deploy/images/${MACHINE}/vxn/
33#
34# ===========================================================================
35
36SUMMARY = "Build Xen DomU boot blobs for vxn"
37DESCRIPTION = "Packages kernel, initramfs and rootfs for running \
38 vcontainer workloads as Xen DomU guests."
39
40# Source blobs from vdkr (Docker) build - same rootfs works under Xen
41VXN_RUNTIME = "vdkr"
42
43require vxn-initramfs-create.inc
diff --git a/recipes-core/vxn/vxn_1.0.bb b/recipes-core/vxn/vxn_1.0.bb
new file mode 100644
index 00000000..2a36274a
--- /dev/null
+++ b/recipes-core/vxn/vxn_1.0.bb
@@ -0,0 +1,167 @@
1# SPDX-FileCopyrightText: Copyright (C) 2025 Bruce Ashfield
2#
3# SPDX-License-Identifier: MIT
4#
5# vxn_1.0.bb
6# ===========================================================================
7# Target integration package for vxn (vcontainer on Xen)
8# ===========================================================================
9#
10# This recipe installs vxn onto a Xen Dom0 target. It provides:
11# - vxn CLI wrapper (docker-like interface for Xen DomU containers)
12# - vrunner.sh (hypervisor-agnostic VM runner)
13# - vrunner-backend-xen.sh (Xen xl backend)
14# - vcontainer-common.sh (shared CLI code)
15# - Kernel, initramfs, and rootfs blobs for booting DomU guests
16#
17# The blobs are sourced from the vxn-initramfs-create recipe which
18# reuses the same rootfs images built by vdkr/vpdmn (the init scripts
19# detect the hypervisor at boot time).
20#
21# ===========================================================================
22# BUILD INSTRUCTIONS
23# ===========================================================================
24#
25# For aarch64 Dom0:
26# MACHINE=qemuarm64 bitbake vxn
27#
28# For x86_64 Dom0:
29# MACHINE=qemux86-64 bitbake vxn
30#
31# Add to a Dom0 image:
32# IMAGE_INSTALL:append = " vxn"
33#
34# Usage on Dom0:
35# vxn run hello-world # Run OCI container as Xen DomU
36# vxn vmemres start # Start persistent DomU (daemon mode)
37# vxn vexpose # Expose Docker API on Dom0
38#
39# ===========================================================================
40
41SUMMARY = "Docker CLI for Xen-based container execution"
42DESCRIPTION = "vxn provides a familiar docker-like CLI that executes commands \
43 inside a Xen DomU guest with Docker. It uses the vcontainer \
44 infrastructure with a Xen hypervisor backend."
45HOMEPAGE = "https://git.yoctoproject.org/meta-virtualization/"
46LICENSE = "MIT"
47LIC_FILES_CHKSUM = "file://${COMMON_LICENSE_DIR}/MIT;md5=0835ade698e0bcf8506ecda2f7b4f302"
48
49inherit features_check
50REQUIRED_DISTRO_FEATURES = "xen"
51
52SRC_URI = "\
53 file://vxn.sh \
54 file://vrunner.sh \
55 file://vrunner-backend-xen.sh \
56 file://vrunner-backend-qemu.sh \
57 file://vcontainer-common.sh \
58"
59
60FILESEXTRAPATHS:prepend := "${THISDIR}/../../recipes-containers/vcontainer/files:"
61
62S = "${UNPACKDIR}"
63
64# Runtime dependencies on Dom0
65RDEPENDS:${PN} = "\
66 xen-tools-xl \
67 bash \
68 jq \
69 socat \
70 coreutils \
71 util-linux \
72 e2fsprogs \
73 skopeo \
74"
75
76# Blobs are sourced from vxn-initramfs-create deploy output.
77# Build blobs first: bitbake vxn-initramfs-create
78# No task dependency here - vxn-initramfs-create is deploy-only (no packages).
79# Adding any dependency from a packaged recipe to a deploy-only recipe
80# breaks do_rootfs (sstate manifest not found for package_write_rpm).
81
82# Blobs come from DEPLOY_DIR which is untracked by sstate hash.
83# nostamp on do_install alone is insufficient — do_package and
84# do_package_write_rpm have unchanged sstate hashes so they restore
85# the OLD RPM from cache, discarding the fresh do_install output.
86# Force the entire install→package→RPM chain to always re-run.
87do_install[nostamp] = "1"
88do_package[nostamp] = "1"
89do_packagedata[nostamp] = "1"
90do_package_write_rpm[nostamp] = "1"
91do_package_write_ipk[nostamp] = "1"
92do_package_write_deb[nostamp] = "1"
93
94def vxn_get_blob_arch(d):
95 arch = d.getVar('TARGET_ARCH')
96 if arch == 'aarch64':
97 return 'aarch64'
98 elif arch in ['x86_64', 'i686', 'i586']:
99 return 'x86_64'
100 return 'aarch64'
101
102def vxn_get_kernel_image_name(d):
103 arch = d.getVar('TARGET_ARCH')
104 if arch == 'aarch64':
105 return 'Image'
106 elif arch in ['x86_64', 'i686', 'i586']:
107 return 'bzImage'
108 elif arch == 'arm':
109 return 'zImage'
110 return 'Image'
111
112BLOB_ARCH = "${@vxn_get_blob_arch(d)}"
113KERNEL_IMAGETYPE_VXN = "${@vxn_get_kernel_image_name(d)}"
114
115VXN_DEPLOY = "${DEPLOY_DIR_IMAGE}"
116
117do_install() {
118 # Install CLI wrapper
119 install -d ${D}${bindir}
120 install -m 0755 ${S}/vxn.sh ${D}${bindir}/vxn
121
122 # Install shared scripts into libdir
123 install -d ${D}${libdir}/vxn
124 install -m 0755 ${S}/vrunner.sh ${D}${libdir}/vxn/
125 install -m 0755 ${S}/vrunner-backend-xen.sh ${D}${libdir}/vxn/
126 install -m 0755 ${S}/vrunner-backend-qemu.sh ${D}${libdir}/vxn/
127 install -m 0644 ${S}/vcontainer-common.sh ${D}${libdir}/vxn/
128
129 # Install blobs from vxn-initramfs-create deployment
130 # Layout must match what vrunner backends expect: $BLOB_DIR/<arch>/{Image,initramfs.cpio.gz,rootfs.img}
131 install -d ${D}${datadir}/vxn/${BLOB_ARCH}
132
133 VXN_BLOB_SRC="${VXN_DEPLOY}/vxn/${BLOB_ARCH}"
134 if [ -d "${VXN_BLOB_SRC}" ]; then
135 if [ -f "${VXN_BLOB_SRC}/${KERNEL_IMAGETYPE_VXN}" ]; then
136 install -m 0644 "${VXN_BLOB_SRC}/${KERNEL_IMAGETYPE_VXN}" ${D}${datadir}/vxn/${BLOB_ARCH}/
137 bbnote "Installed kernel ${KERNEL_IMAGETYPE_VXN}"
138 else
139 bbwarn "Kernel not found at ${VXN_BLOB_SRC}/${KERNEL_IMAGETYPE_VXN}"
140 fi
141
142 if [ -f "${VXN_BLOB_SRC}/initramfs.cpio.gz" ]; then
143 install -m 0644 "${VXN_BLOB_SRC}/initramfs.cpio.gz" ${D}${datadir}/vxn/${BLOB_ARCH}/
144 bbnote "Installed initramfs"
145 else
146 bbwarn "Initramfs not found at ${VXN_BLOB_SRC}/initramfs.cpio.gz"
147 fi
148
149 if [ -f "${VXN_BLOB_SRC}/rootfs.img" ]; then
150 install -m 0644 "${VXN_BLOB_SRC}/rootfs.img" ${D}${datadir}/vxn/${BLOB_ARCH}/
151 bbnote "Installed rootfs.img"
152 else
153 bbwarn "Rootfs not found at ${VXN_BLOB_SRC}/rootfs.img"
154 fi
155 else
156 bbwarn "VXN blob directory not found at ${VXN_BLOB_SRC}. Build with: bitbake vxn-initramfs-create"
157 fi
158}
159
160FILES:${PN} = "\
161 ${bindir}/vxn \
162 ${libdir}/vxn/ \
163 ${datadir}/vxn/ \
164"
165
166# Blobs are large binary files
167INSANE_SKIP:${PN} += "already-stripped"
diff --git a/recipes-kernel/linux/linux-yocto/vxn.cfg b/recipes-kernel/linux/linux-yocto/vxn.cfg
new file mode 100644
index 00000000..932732dc
--- /dev/null
+++ b/recipes-kernel/linux/linux-yocto/vxn.cfg
@@ -0,0 +1,24 @@
1# Xen PV guest support for vxn (vcontainer on Xen)
2# These configs enable a kernel to run as a Xen PV or PVHVM guest
3# with virtio-equivalent functionality via Xen paravirtualized drivers.
4
5# Core Xen guest support
6CONFIG_XEN=y
7CONFIG_XEN_PV=y
8CONFIG_XEN_PVHVM=y
9
10# Block device frontend (guest sees /dev/xvd*)
11CONFIG_XEN_BLKDEV_FRONTEND=y
12
13# Network frontend (guest sees eth0 via xennet)
14CONFIG_XEN_NETDEV_FRONTEND=y
15
16# PV console (guest sees /dev/hvc0, /dev/hvc1)
17CONFIG_HVC_XEN=y
18CONFIG_HVC_XEN_FRONTEND=y
19
20# Framebuffer frontend (not strictly required but useful)
21CONFIG_XEN_FBDEV_FRONTEND=y
22
23# 9pfs frontend for file sharing (trans=xen)
24CONFIG_XEN_9PFS_FRONTEND=y