summaryrefslogtreecommitdiffstats
path: root/recipes-containers/vcontainer/files
diff options
context:
space:
mode:
authorBruce Ashfield <bruce.ashfield@gmail.com>2026-02-18 14:07:49 +0000
committerBruce Ashfield <bruce.ashfield@gmail.com>2026-02-26 01:05:01 +0000
commit9377aede3157a3e7b702dc389c15f27523b673e7 (patch)
tree9ea01493815cfb58e642b65b5b31472235b5a09a /recipes-containers/vcontainer/files
parentfa4b171a436559787cfcebd4046a1354a1f5cacf (diff)
downloadmeta-virtualization-9377aede3157a3e7b702dc389c15f27523b673e7.tar.gz
vxn: add containerd OCI runtime integration
Add shell-based OCI runtime (vxn-oci-runtime) that enables containerd to manage Xen DomU containers through the standard runc shim. Non-terminal container output flows back to ctr via the shim's pipe mechanism. New files: - vxn-oci-runtime: OCI runtime (create/start/state/kill/delete/features/logs) - vxn-sendtty.c: SCM_RIGHTS helper for terminal mode PTY passing - containerd-shim-vxn-v2: PATH trick wrapper for runc shim coexistence - containerd-config-vxn.toml: CRI config (vxn default, runc fallback) - vctr: convenience wrapper injecting --runtime io.containerd.vxn.v2 Key design: - Monitor subprocess uses wait on xl console (not sleep-polling) for instant reaction when domain dies, then extracts output markers and writes to stdout (shim pipe -> containerd FIFO -> ctr client) - cmd_state checks monitor PID liveness (not domain status) to prevent premature cleanup race that killed monitor before output - cmd_delete always destroys remnant domains (no --force needed) - Coexists with runc: /usr/libexec/vxn/shim/runc symlink + PATH trick Verified: vctr run --rm, vctr run -d, vxn standalone, vxn daemon mode. Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
Diffstat (limited to 'recipes-containers/vcontainer/files')
-rw-r--r--recipes-containers/vcontainer/files/containerd-config-vxn.toml19
-rw-r--r--recipes-containers/vcontainer/files/containerd-shim-vxn-v27
-rw-r--r--recipes-containers/vcontainer/files/vctr16
-rw-r--r--recipes-containers/vcontainer/files/vxn-oci-runtime650
-rw-r--r--recipes-containers/vcontainer/files/vxn-sendtty.c90
5 files changed, 782 insertions, 0 deletions
diff --git a/recipes-containers/vcontainer/files/containerd-config-vxn.toml b/recipes-containers/vcontainer/files/containerd-config-vxn.toml
new file mode 100644
index 00000000..4dc84630
--- /dev/null
+++ b/recipes-containers/vcontainer/files/containerd-config-vxn.toml
@@ -0,0 +1,19 @@
1version = 2
2
3# Register vxn shim: containerd-shim-vxn-v2 (symlink to runc shim)
4# with BinaryName pointing to vxn-oci-runtime.
5# This allows: ctr run --runtime io.containerd.vxn.v2 ...
6
7# CRI plugin: make vxn the default runtime for Kubernetes
8[plugins."io.containerd.grpc.v1.cri".containerd]
9 default_runtime_name = "vxn"
10
11[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.vxn]
12 runtime_type = "io.containerd.vxn.v2"
13 [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.vxn.options]
14 BinaryName = "/usr/bin/vxn-oci-runtime"
15
16[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
17 runtime_type = "io.containerd.runc.v2"
18 [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
19 BinaryName = "runc"
diff --git a/recipes-containers/vcontainer/files/containerd-shim-vxn-v2 b/recipes-containers/vcontainer/files/containerd-shim-vxn-v2
new file mode 100644
index 00000000..9a4669f9
--- /dev/null
+++ b/recipes-containers/vcontainer/files/containerd-shim-vxn-v2
@@ -0,0 +1,7 @@
1#!/bin/sh
2# containerd-shim-vxn-v2
3# Wraps containerd-shim-runc-v2 so that when the shim execs "runc",
4# it finds vxn-oci-runtime instead. PATH trick — no temp files,
5# no symlink conflicts with the real runc package.
6export PATH="/usr/libexec/vxn/shim:$PATH"
7exec /usr/bin/containerd-shim-runc-v2 "$@"
diff --git a/recipes-containers/vcontainer/files/vctr b/recipes-containers/vcontainer/files/vctr
new file mode 100644
index 00000000..ca84644a
--- /dev/null
+++ b/recipes-containers/vcontainer/files/vctr
@@ -0,0 +1,16 @@
1#!/bin/sh
2# vctr - convenience wrapper for ctr with vxn runtime
3# Usage: vctr run <image> <cmd> (same as: ctr run --runtime io.containerd.vxn.v2 ...)
4# vctr <any ctr command> (passed through to ctr)
5
6VXN_RUNTIME="io.containerd.vxn.v2"
7
8case "$1" in
9 run)
10 shift
11 exec ctr run --runtime "$VXN_RUNTIME" "$@"
12 ;;
13 *)
14 exec ctr "$@"
15 ;;
16esac
diff --git a/recipes-containers/vcontainer/files/vxn-oci-runtime b/recipes-containers/vcontainer/files/vxn-oci-runtime
new file mode 100644
index 00000000..6158cddd
--- /dev/null
+++ b/recipes-containers/vcontainer/files/vxn-oci-runtime
@@ -0,0 +1,650 @@
1#!/bin/bash
2# SPDX-FileCopyrightText: Copyright (C) 2025 Bruce Ashfield
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6# vxn-oci-runtime
7# OCI runtime for containerd integration via containerd-shim-runc-v2
8#
9# This implements the OCI runtime CLI spec so containerd can manage
10# Xen DomU containers through the built-in runc shim:
11#
12# containerd -> containerd-shim-runc-v2 -> vxn-oci-runtime create/start/state/kill/delete
13# |
14# v
15# xl create/unpause/list/shutdown/destroy
16# |
17# v
18# Xen DomU (vxn-init.sh)
19#
20# This is a standalone script — it does not source vrunner.sh or
21# vcontainer-common.sh. The OCI runtime lifecycle (separate create/start/
22# state invocations) is fundamentally different from the all-in-one
23# vrunner flow.
24#
25# State directory: /run/vxn-oci-runtime/containers/<container-id>/
26
27set -e
28
29RUNTIME_ROOT="/run/vxn-oci-runtime"
30OCI_VERSION="1.0.2"
31BLOB_DIR="/usr/share/vxn"
32
33# ============================================================================
34# Logging
35# ============================================================================
36
37LOG_FILE="/var/log/vxn-oci-runtime.log"
38VXN_LOG="/var/log/vxn-oci-runtime.log"
39
40log() {
41 local ts
42 ts=$(date '+%Y-%m-%d %H:%M:%S' 2>/dev/null || echo "-")
43 # Always write to our own log (shim overrides LOG_FILE via --log)
44 echo "[$ts] $*" >> "$VXN_LOG" 2>/dev/null || true
45 if [ "$LOG_FILE" != "$VXN_LOG" ]; then
46 echo "[$ts] $*" >> "$LOG_FILE" 2>/dev/null || true
47 fi
48}
49
50die() {
51 log "FATAL: $*"
52 echo "vxn-oci-runtime: $*" >&2
53 exit 1
54}
55
56# ============================================================================
57# Architecture Detection
58# ============================================================================
59
60detect_arch() {
61 local arch
62 arch=$(uname -m)
63 case "$arch" in
64 aarch64)
65 VXN_ARCH="aarch64"
66 VXN_KERNEL="$BLOB_DIR/aarch64/Image"
67 VXN_INITRAMFS="$BLOB_DIR/aarch64/initramfs.cpio.gz"
68 VXN_ROOTFS="$BLOB_DIR/aarch64/rootfs.img"
69 VXN_TYPE="pvh"
70 ;;
71 x86_64)
72 VXN_ARCH="x86_64"
73 VXN_KERNEL="$BLOB_DIR/x86_64/bzImage"
74 VXN_INITRAMFS="$BLOB_DIR/x86_64/initramfs.cpio.gz"
75 VXN_ROOTFS="$BLOB_DIR/x86_64/rootfs.img"
76 VXN_TYPE="pv"
77 ;;
78 *)
79 die "Unsupported architecture: $arch"
80 ;;
81 esac
82}
83
84# ============================================================================
85# State Management
86# ============================================================================
87
88state_dir() {
89 echo "$RUNTIME_ROOT/containers/$1"
90}
91
92load_state() {
93 local id="$1"
94 local dir
95 dir=$(state_dir "$id")
96 [ -f "$dir/state.json" ] || die "container $id does not exist"
97}
98
99read_state_field() {
100 local id="$1"
101 local field="$2"
102 local dir
103 dir=$(state_dir "$id")
104 # Use grep/sed — jq may not be available in all environments
105 grep -o "\"$field\"[[:space:]]*:[[:space:]]*\"[^\"]*\"" "$dir/state.json" 2>/dev/null | \
106 sed 's/.*"'"$field"'"[[:space:]]*:[[:space:]]*"//;s/"$//'
107}
108
109read_state_pid() {
110 local id="$1"
111 local dir
112 dir=$(state_dir "$id")
113 grep -o '"pid"[[:space:]]*:[[:space:]]*[0-9]*' "$dir/state.json" 2>/dev/null | \
114 grep -o '[0-9]*$'
115}
116
117write_state() {
118 local id="$1"
119 local status="$2"
120 local pid="$3"
121 local bundle="$4"
122 local created="$5"
123 local dir
124 dir=$(state_dir "$id")
125 cat > "$dir/state.json" <<EOF
126{
127 "ociVersion": "$OCI_VERSION",
128 "id": "$id",
129 "status": "$status",
130 "pid": $pid,
131 "bundle": "$bundle",
132 "created": "$created",
133 "annotations": {}
134}
135EOF
136}
137
138# ============================================================================
139# OCI Runtime Commands
140# ============================================================================
141
142cmd_create() {
143 local container_id=""
144 local bundle=""
145 local pid_file=""
146 local console_socket=""
147
148 # Parse arguments
149 while [ $# -gt 0 ]; do
150 case "$1" in
151 --bundle) bundle="$2"; shift 2 ;;
152 --bundle=*) bundle="${1#--bundle=}"; shift ;;
153 --pid-file) pid_file="$2"; shift 2 ;;
154 --pid-file=*) pid_file="${1#--pid-file=}"; shift ;;
155 --console-socket) console_socket="$2"; shift 2 ;;
156 --console-socket=*) console_socket="${1#--console-socket=}"; shift ;;
157 -*) log " DEBUG: unknown create flag: $1"; shift ;;
158 *)
159 if [ -z "$container_id" ]; then
160 container_id="$1"
161 fi
162 shift
163 ;;
164 esac
165 done
166
167 [ -n "$container_id" ] || die "create: container ID required"
168 [ -n "$bundle" ] || die "create: --bundle required"
169 [ -f "$bundle/config.json" ] || die "create: $bundle/config.json not found"
170
171 log "CREATE: id=$container_id bundle=$bundle console_socket=$console_socket"
172 # Debug: log what the shim gives us
173 log " DEBUG: fd0=$(readlink /proc/$$/fd/0 2>/dev/null) fd1=$(readlink /proc/$$/fd/1 2>/dev/null) fd2=$(readlink /proc/$$/fd/2 2>/dev/null)"
174 log " DEBUG: bundle dir: $(ls -F $bundle/ 2>/dev/null | tr '\n' ' ')"
175 local _taskdir
176 _taskdir=$(dirname "$bundle")
177 log " DEBUG: task dir ($bundle): $(ls -F $bundle/ 2>/dev/null | tr '\n' ' ')"
178 log " DEBUG: parent dir ($_taskdir): $(ls -F $_taskdir/ 2>/dev/null | tr '\n' ' ')"
179 log " DEBUG: all pipes: $(find /run -type p 2>/dev/null | tr '\n' ' ')"
180
181 detect_arch
182
183 local dir
184 dir=$(state_dir "$container_id")
185 mkdir -p "$dir"
186
187 # Read config.json — parse process.args, process.env, process.cwd, process.terminal
188 local config="$bundle/config.json"
189 local entrypoint="" env_vars="" cwd="/" terminal="false"
190
191 if command -v jq >/dev/null 2>&1; then
192 entrypoint=$(jq -r '(.process.args // []) | join(" ")' "$config" 2>/dev/null)
193 cwd=$(jq -r '.process.cwd // "/"' "$config" 2>/dev/null)
194 env_vars=$(jq -r '(.process.env // []) | join("\n")' "$config" 2>/dev/null)
195 terminal=$(jq -r '.process.terminal // false' "$config" 2>/dev/null)
196 else
197 # Fallback: grep/sed parsing
198 entrypoint=$(grep -o '"args"[[:space:]]*:[[:space:]]*\[[^]]*\]' "$config" 2>/dev/null | \
199 sed 's/"args"[[:space:]]*:[[:space:]]*\[//;s/\]$//' | \
200 tr ',' '\n' | sed 's/^ *"//;s/"$//' | tr '\n' ' ' | sed 's/ $//')
201 cwd=$(grep -o '"cwd"[[:space:]]*:[[:space:]]*"[^"]*"' "$config" 2>/dev/null | \
202 sed 's/"cwd"[[:space:]]*:[[:space:]]*"//;s/"$//')
203 [ -z "$cwd" ] && cwd="/"
204 if grep -q '"terminal"[[:space:]]*:[[:space:]]*true' "$config" 2>/dev/null; then
205 terminal="true"
206 fi
207 fi
208
209 log " entrypoint='$entrypoint' cwd='$cwd' terminal=$terminal"
210
211 # Create ext4 disk image from bundle/rootfs/
212 local rootfs_dir="$bundle/rootfs"
213 local input_img="$dir/input.img"
214
215 if [ -d "$rootfs_dir" ] && [ -n "$(ls -A "$rootfs_dir" 2>/dev/null)" ]; then
216 # Calculate size: rootfs size + 50% headroom, minimum 64MB
217 local rootfs_size_kb
218 rootfs_size_kb=$(du -sk "$rootfs_dir" 2>/dev/null | awk '{print $1}')
219 local img_size_kb=$(( (rootfs_size_kb * 3 / 2) ))
220 [ "$img_size_kb" -lt 65536 ] && img_size_kb=65536
221
222 log " Creating ext4 image: ${img_size_kb}KB from $rootfs_dir"
223 mke2fs -t ext4 -d "$rootfs_dir" -b 4096 "$input_img" "${img_size_kb}K" \
224 >> "$LOG_FILE" 2>&1 || die "create: failed to create ext4 image"
225 else
226 die "create: $rootfs_dir is empty or does not exist"
227 fi
228
229 # Encode entrypoint as base64 for kernel cmdline
230 local cmd_b64=""
231 if [ -n "$entrypoint" ]; then
232 cmd_b64=$(echo -n "$entrypoint" | base64 -w0)
233 fi
234
235 # Domain name: vxn-oci-<short-id>
236 local domname="vxn-oci-${container_id}"
237 # Xen domain names have a max length — truncate if needed
238 if [ ${#domname} -gt 64 ]; then
239 domname="vxn-oci-${container_id:0:55}"
240 fi
241 echo "$domname" > "$dir/domname"
242
243 # Memory and vCPUs — configurable via environment
244 local xen_memory="${VXN_OCI_MEMORY:-512}"
245 local xen_vcpus="${VXN_OCI_VCPUS:-2}"
246
247 # Generate Xen domain config
248 local config_cfg="$dir/config.cfg"
249 local kernel_extra="console=hvc0 quiet loglevel=0 init=/init vcontainer.blk=xvd vcontainer.init=/vxn-init.sh"
250 [ -n "$cmd_b64" ] && kernel_extra="$kernel_extra docker_cmd=$cmd_b64"
251 kernel_extra="$kernel_extra docker_input=oci"
252
253 # Terminal mode: suppress boot messages for raw console I/O
254 if [ "$terminal" = "true" ]; then
255 kernel_extra="$kernel_extra docker_interactive=1"
256 fi
257
258 cat > "$config_cfg" <<XENEOF
259# Auto-generated Xen domain config for vxn-oci-runtime
260name = "$domname"
261type = "$VXN_TYPE"
262memory = $xen_memory
263vcpus = $xen_vcpus
264
265kernel = "$VXN_KERNEL"
266ramdisk = "$VXN_INITRAMFS"
267extra = "$kernel_extra"
268
269disk = [ 'format=raw,vdev=xvda,access=ro,target=$VXN_ROOTFS', 'format=raw,vdev=xvdb,access=ro,target=$input_img' ]
270vif = []
271
272serial = 'pty'
273
274on_poweroff = "destroy"
275on_reboot = "destroy"
276on_crash = "destroy"
277XENEOF
278
279 log " Xen config written to $config_cfg"
280
281 # Create domain in paused state (OCI spec: create does not start)
282 xl create -p "$config_cfg" >> "$LOG_FILE" 2>&1 || die "create: xl create -p failed"
283
284 log " Domain $domname created (paused)"
285
286 # Get domid and read Xen console PTY from xenstore
287 local domid pty_path
288 domid=$(xl domid "$domname" 2>/dev/null) || die "create: failed to get domid for $domname"
289 pty_path=$(xenstore-read "/local/domain/$domid/console/tty" 2>/dev/null) || true
290 log " domid=$domid pty=$pty_path"
291
292 if [ -n "$pty_path" ]; then
293 echo "$pty_path" > "$dir/pty"
294 fi
295
296 # Terminal mode: send PTY fd to shim via console-socket (SCM_RIGHTS)
297 if [ -n "$console_socket" ] && [ -n "$pty_path" ]; then
298 if command -v vxn-sendtty >/dev/null 2>&1; then
299 vxn-sendtty "$console_socket" "$pty_path" \
300 || log " WARNING: vxn-sendtty failed (socket=$console_socket pty=$pty_path)"
301 log " Sent PTY fd to console-socket"
302 else
303 log " WARNING: vxn-sendtty not found, cannot send PTY to shim"
304 fi
305 fi
306
307 # Persistent log dir — survives container deletion by shim
308 local logdir="/var/log/vxn-oci-runtime/containers/$container_id"
309 mkdir -p "$logdir"
310
311 # Monitor process: tracks domain lifecycle and captures output.
312 #
313 # Non-terminal mode: xl console captures the domain's serial output.
314 # When the domain dies, xl console exits (PTY closes). We immediately
315 # extract content between OUTPUT_START/END markers and write to stdout.
316 # stdout is the shim's pipe → containerd copies to client FIFO → ctr.
317 #
318 # CRITICAL: We use "wait" on xl console instead of polling xl list.
319 # Polling with sleep 5 was too slow — the shim detected "stopped" and
320 # killed the monitor before it had a chance to output. Using wait gives
321 # us instant reaction when the domain dies.
322 #
323 # Terminal mode (console-socket): the shim owns the PTY exclusively.
324 # We just wait for the domain to exit without capturing console.
325 local _dn="$domname" _logdir="$logdir" _csock="$console_socket"
326 (
327 if [ -z "$_csock" ]; then
328 # Non-terminal: capture console to persistent log dir
329 xl console "$_dn" > "$_logdir/console.log" 2>&1 &
330 _cpid=$!
331
332 # Wait for xl console to exit — domain death closes the PTY,
333 # which causes xl console to exit immediately. No polling delay.
334 wait $_cpid 2>/dev/null
335
336 # Extract output between markers and write to stdout.
337 # stdout IS the shim's pipe (confirmed: fd1=pipe). The shim's
338 # io.Copy goroutine reads from this pipe and writes to the
339 # containerd client FIFO. ctr reads from the FIFO.
340 if [ -f "$_logdir/console.log" ]; then
341 _relay=false
342 while IFS= read -r _line; do
343 _line="${_line%%$'\r'}"
344 case "$_line" in
345 *===OUTPUT_START===*) _relay=true; continue ;;
346 *===OUTPUT_END===*) _relay=false; continue ;;
347 *) [ "$_relay" = "true" ] && printf '%s\n' "$_line" ;;
348 esac
349 done < "$_logdir/console.log"
350 fi
351 else
352 # Terminal mode: shim owns PTY — just wait for domain death
353 while xl list "$_dn" >/dev/null 2>&1; do sleep 2; done
354 fi
355 ) &
356 local monitor_pid=$!
357
358 # Write monitor PID to --pid-file (runc shim monitors /proc/<pid>)
359 # Use printf — shim parses with strconv.Atoi which rejects trailing newlines
360 if [ -n "$pid_file" ]; then
361 printf '%s' "$monitor_pid" > "$pid_file"
362 fi
363 printf '%s' "$monitor_pid" > "$dir/monitor.pid"
364
365 log " monitor PID=$monitor_pid"
366
367 # Write OCI state
368 local created
369 created=$(date -u '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || echo "1970-01-01T00:00:00Z")
370 write_state "$container_id" "created" "$monitor_pid" "$bundle" "$created"
371
372 log "CREATE: done"
373}
374
375cmd_start() {
376 local container_id="$1"
377 [ -n "$container_id" ] || die "start: container ID required"
378
379 log "START: id=$container_id"
380 load_state "$container_id"
381
382 local dir
383 dir=$(state_dir "$container_id")
384 local domname
385 domname=$(cat "$dir/domname")
386
387 # Verify domain exists and is paused
388 xl list "$domname" >/dev/null 2>&1 || die "start: domain $domname not found"
389
390 # Unpause the domain
391 xl unpause "$domname" >> "$LOG_FILE" 2>&1 || die "start: xl unpause failed"
392
393 # Update state
394 local pid bundle created
395 pid=$(read_state_pid "$container_id")
396 bundle=$(read_state_field "$container_id" "bundle")
397 created=$(read_state_field "$container_id" "created")
398 write_state "$container_id" "running" "$pid" "$bundle" "$created"
399
400 log "START: done"
401}
402
403cmd_state() {
404 local container_id="$1"
405 [ -n "$container_id" ] || die "state: container ID required"
406
407 local dir
408 dir=$(state_dir "$container_id")
409 [ -f "$dir/state.json" ] || die "container $container_id does not exist"
410
411 # Read stored state
412 local status pid bundle created
413 status=$(read_state_field "$container_id" "status")
414 pid=$(read_state_pid "$container_id")
415 bundle=$(read_state_field "$container_id" "bundle")
416 created=$(read_state_field "$container_id" "created")
417
418 # The monitor process (init PID) is the authority for task liveness.
419 # Even after the Xen domain exits, the monitor may still be extracting
420 # output from console.log and writing it to stdout (the shim's pipe).
421 # Only report "stopped" when the monitor PID is actually dead.
422 # This prevents the shim from triggering kill/delete while the monitor
423 # is still outputting — which was the root cause of the I/O race.
424 if [ "$status" = "running" ] || [ "$status" = "created" ]; then
425 local monitor_alive=false
426 if [ -n "$pid" ] && [ "$pid" -gt 0 ] 2>/dev/null; then
427 if kill -0 "$pid" 2>/dev/null; then
428 monitor_alive=true
429 fi
430 fi
431 if [ "$monitor_alive" = "false" ]; then
432 status="stopped"
433 write_state "$container_id" "stopped" "$pid" "$bundle" "$created"
434 fi
435 fi
436
437 # Output OCI state JSON to stdout
438 cat <<EOF
439{"ociVersion":"$OCI_VERSION","id":"$container_id","status":"$status","pid":${pid:-0},"bundle":"$bundle","created":"$created","annotations":{}}
440EOF
441}
442
443cmd_kill() {
444 local container_id="$1"
445 local signal="${2:-SIGTERM}"
446 [ -n "$container_id" ] || die "kill: container ID required"
447
448 log "KILL: id=$container_id signal=$signal"
449 load_state "$container_id"
450
451 local dir
452 dir=$(state_dir "$container_id")
453 local domname
454 domname=$(cat "$dir/domname")
455
456 # Normalize signal: accept both numeric and symbolic forms
457 case "$signal" in
458 9|SIGKILL|KILL)
459 xl destroy "$domname" >> "$LOG_FILE" 2>&1 || true
460 ;;
461 2|SIGINT|INT)
462 xl destroy "$domname" >> "$LOG_FILE" 2>&1 || true
463 ;;
464 15|SIGTERM|TERM|"")
465 xl shutdown "$domname" >> "$LOG_FILE" 2>&1 || true
466 # Wait briefly for graceful shutdown, then force destroy
467 local i
468 for i in 1 2 3 4 5 6 7 8 9 10; do
469 xl list "$domname" >/dev/null 2>&1 || break
470 sleep 1
471 done
472 xl destroy "$domname" >> "$LOG_FILE" 2>&1 || true
473 ;;
474 *)
475 # Unknown signal — treat as SIGTERM
476 xl shutdown "$domname" >> "$LOG_FILE" 2>&1 || true
477 ;;
478 esac
479
480 # Update state
481 local pid bundle created
482 pid=$(read_state_pid "$container_id")
483 bundle=$(read_state_field "$container_id" "bundle")
484 created=$(read_state_field "$container_id" "created")
485 write_state "$container_id" "stopped" "$pid" "$bundle" "$created"
486
487 log "KILL: done"
488}
489
490cmd_delete() {
491 local container_id=""
492 local force=false
493
494 # Parse arguments
495 while [ $# -gt 0 ]; do
496 case "$1" in
497 --force|-f) force=true; shift ;;
498 -*) shift ;;
499 *)
500 if [ -z "$container_id" ]; then
501 container_id="$1"
502 fi
503 shift
504 ;;
505 esac
506 done
507
508 [ -n "$container_id" ] || die "delete: container ID required"
509
510 log "DELETE: id=$container_id force=$force"
511
512 local dir
513 dir=$(state_dir "$container_id")
514 [ -d "$dir" ] || die "container $container_id does not exist"
515
516 # Clean up Xen domain if still present.
517 # The shim only calls delete after the init PID (monitor) has exited,
518 # meaning the task is complete. The domain may still be shutting down —
519 # always destroy it as part of cleanup.
520 if [ -f "$dir/domname" ]; then
521 local domname
522 domname=$(cat "$dir/domname")
523 if xl list "$domname" >/dev/null 2>&1; then
524 xl destroy "$domname" >> "$LOG_FILE" 2>&1 || true
525 fi
526 fi
527
528 # Kill monitor process (also kills console capture child)
529 if [ -f "$dir/monitor.pid" ]; then
530 local mpid
531 mpid=$(cat "$dir/monitor.pid")
532 kill "$mpid" 2>/dev/null || true
533 fi
534
535 # Remove state directory (includes disk images)
536 rm -rf "$dir"
537
538 log "DELETE: done"
539}
540
541cmd_features() {
542 cat <<EOF
543{
544 "ociVersionMin": "1.0.0",
545 "ociVersionMax": "$OCI_VERSION",
546 "hooks": [],
547 "mountOptions": [],
548 "linux": {
549 "namespaces": [],
550 "capabilities": [],
551 "cgroup": {
552 "v1": false,
553 "v2": false
554 },
555 "seccomp": {
556 "enabled": false
557 },
558 "apparmor": {
559 "enabled": false
560 },
561 "selinux": {
562 "enabled": false
563 }
564 },
565 "annotations": {
566 "io.containerd.runc.v2.runtime_type": "vm"
567 }
568}
569EOF
570}
571
572cmd_logs() {
573 local container_id="$1"
574 [ -n "$container_id" ] || die "logs: container ID required"
575
576 # Check persistent log dir first, then state dir
577 local logfile=""
578 local logdir="/var/log/vxn-oci-runtime/containers/$container_id"
579 local dir
580 dir=$(state_dir "$container_id")
581
582 if [ -f "$logdir/console.log" ]; then
583 logfile="$logdir/console.log"
584 elif [ -f "$dir/console.log" ]; then
585 logfile="$dir/console.log"
586 else
587 die "no logs for $container_id"
588 fi
589
590 # Extract content between OUTPUT_START/END markers (non-terminal mode)
591 local relay=false
592 while IFS= read -r line; do
593 line="${line%%$'\r'}"
594 case "$line" in
595 *===OUTPUT_START===*) relay=true; continue ;;
596 *===OUTPUT_END===*) relay=false; continue ;;
597 *)
598 if [ "$relay" = "true" ]; then
599 printf '%s\n' "$line"
600 fi
601 ;;
602 esac
603 done < "$logfile"
604}
605
606# ============================================================================
607# Main
608# ============================================================================
609
610mkdir -p "$RUNTIME_ROOT/containers" 2>/dev/null || true
611
612# Parse global options before command
613while [ $# -gt 0 ]; do
614 case "$1" in
615 --root) RUNTIME_ROOT="$2"; shift 2 ;;
616 --root=*) RUNTIME_ROOT="${1#--root=}"; shift ;;
617 --log) LOG_FILE="$2"; shift 2 ;;
618 --log=*) LOG_FILE="${1#--log=}"; shift ;;
619 --log-format) shift 2 ;; # accepted but ignored
620 --log-format=*) shift ;;
621 --systemd-cgroup) shift ;; # accepted but ignored
622 -*) shift ;; # skip other global flags
623 *) break ;; # first non-flag is the command
624 esac
625done
626
627command="${1:-}"
628shift || true
629
630case "$command" in
631 create) cmd_create "$@" ;;
632 start) cmd_start "$@" ;;
633 state) cmd_state "$@" ;;
634 kill) cmd_kill "$@" ;;
635 delete) cmd_delete "$@" ;;
636 features) cmd_features "$@" ;;
637 logs) cmd_logs "$@" ;;
638 --version|version)
639 echo "vxn-oci-runtime version 1.0.0"
640 echo "spec: $OCI_VERSION"
641 ;;
642 *)
643 if [ -n "$command" ]; then
644 log "Unknown command: $command (args: $*)"
645 fi
646 echo "Usage: vxn-oci-runtime <command> [args...]" >&2
647 echo "Commands: create, start, state, kill, delete, logs" >&2
648 exit 1
649 ;;
650esac
diff --git a/recipes-containers/vcontainer/files/vxn-sendtty.c b/recipes-containers/vcontainer/files/vxn-sendtty.c
new file mode 100644
index 00000000..a253b129
--- /dev/null
+++ b/recipes-containers/vcontainer/files/vxn-sendtty.c
@@ -0,0 +1,90 @@
1/*
2 * SPDX-FileCopyrightText: Copyright (C) 2025 Bruce Ashfield
3 * SPDX-License-Identifier: GPL-2.0-only
4 *
5 * vxn-sendtty - Send a PTY fd to a containerd shim via SCM_RIGHTS
6 *
7 * Usage: vxn-sendtty <console-socket-path> <pty-path>
8 *
9 * Opens pty-path, connects to console-socket (Unix socket), and sends
10 * the PTY fd via sendmsg() with SCM_RIGHTS. This is the OCI runtime
11 * protocol for terminal mode (--console-socket): the shim receives the
12 * PTY master and bridges it to the user's terminal.
13 *
14 * Shell can't do SCM_RIGHTS natively, hence this small C helper.
15 */
16
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
20#include <unistd.h>
21#include <fcntl.h>
22#include <sys/socket.h>
23#include <sys/un.h>
24
25int main(int argc, char *argv[])
26{
27 int pty_fd, sock_fd, rc;
28 struct sockaddr_un addr;
29 struct msghdr msg;
30 struct iovec iov;
31 char buf[1] = {0};
32 char cmsg_buf[CMSG_SPACE(sizeof(int))];
33 struct cmsghdr *cmsg;
34
35 if (argc != 3) {
36 fprintf(stderr, "Usage: %s <console-socket-path> <pty-path>\n",
37 argv[0]);
38 return 1;
39 }
40
41 pty_fd = open(argv[2], O_RDWR | O_NOCTTY);
42 if (pty_fd < 0) {
43 perror("open pty");
44 return 1;
45 }
46
47 sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
48 if (sock_fd < 0) {
49 perror("socket");
50 close(pty_fd);
51 return 1;
52 }
53
54 memset(&addr, 0, sizeof(addr));
55 addr.sun_family = AF_UNIX;
56 strncpy(addr.sun_path, argv[1], sizeof(addr.sun_path) - 1);
57
58 if (connect(sock_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
59 perror("connect");
60 close(pty_fd);
61 close(sock_fd);
62 return 1;
63 }
64
65 memset(&msg, 0, sizeof(msg));
66 iov.iov_base = buf;
67 iov.iov_len = sizeof(buf);
68 msg.msg_iov = &iov;
69 msg.msg_iovlen = 1;
70 msg.msg_control = cmsg_buf;
71 msg.msg_controllen = sizeof(cmsg_buf);
72
73 cmsg = CMSG_FIRSTHDR(&msg);
74 cmsg->cmsg_level = SOL_SOCKET;
75 cmsg->cmsg_type = SCM_RIGHTS;
76 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
77 memcpy(CMSG_DATA(cmsg), &pty_fd, sizeof(int));
78
79 rc = sendmsg(sock_fd, &msg, 0);
80 if (rc < 0) {
81 perror("sendmsg");
82 close(pty_fd);
83 close(sock_fd);
84 return 1;
85 }
86
87 close(pty_fd);
88 close(sock_fd);
89 return 0;
90}