diff --git a/src/cmd-build b/src/cmd-build index 0d2cba60e624bfc8476b7694900fd4d139be8a4e..d9aa7ed937bb4f6fc3b19953c04a63b14515a27c 100755 --- a/src/cmd-build +++ b/src/cmd-build @@ -98,7 +98,7 @@ declare -A targets=( ) for target in "$@"; do if [[ $target != ostree ]]; then case "$target" in - qemu|metal|metal4k) ;; + qemu|metal|metal4k|raw-rootfs) ;; *) fatal "Unrecognized target: $target" ;; esac targets[$target]=1 diff --git a/src/cmd-buildextend-raw-rootfs b/src/cmd-buildextend-raw-rootfs new file mode 100755 index 0000000000000000000000000000000000000000..b88eee5597118cd56b1d8347bf1476abeefaa41e --- /dev/null +++ b/src/cmd-buildextend-raw-rootfs @@ -0,0 +1,442 @@ +#!/usr/bin/env bash +set -euox pipefail +# remove some files that ostree considers mandatory but actually useless for the rootfs +function hijack_on_rootfs { + rm -rf ostree/deploy/${os_name}/deploy/${commit}.0/usr/lib/modules + rm -rf ostree/deploy/${os_name}/deploy/${commit}.0/usr/share/rpm + rm -rf ostree/deploy/${os_name}/deploy/${commit}.0/usr/lib/sysimage + rm -rf boot/ostree + + pushd ostree/boot.1/LifseaOS/ > /dev/null + #d1458f46b19eb957b070a65cf58c5120122fdee0d966fc90c32159dcb77b4313 + files=(*) + hashstr="${files[0]}" + ln -s $hashstr latest + popd > /dev/null + + touch ostree/deploy/${os_name}/deploy/${commit}.0/etc/machine-id + mkdir -p ostree/deploy/${os_name}/var/lib/{eci,containerd,kubelet} +} + +function tar_to_img { + + WORKDIR=/tmp/workdir + IMAGE_NAME=$2 + IMAGE_SIZE=$3 + + [ -d "$WORKDIR" ] && rm -rf $WORKDIR + mkdir -p $WORKDIR + + pushd $WORKDIR > /dev/null + + #--------------------------------------------------------------------------- + # Create initial image, format and mount it + # Note: -b 4096 is MUST setting to suitable PMEM + #--------------------------------------------------------------------------- + qemu-img create -q -f raw $IMAGE_NAME $IMAGE_SIZE || fatal "qemu-img create failed" + parted -s -a optimal $IMAGE_NAME \ + mklabel msdos -- \ + mkpart primary ext4 1M -1M || fatal "parted failed" + device=$(losetup -P -f --show "$IMAGE_NAME") || fatal "losetup failed" + partprobe -s "$device" > /dev/null || fatal "partprobe failed" + # sleep for a while to wait ${device}p1 to show up... + sleep 5 + # if we're inside a docker, ${device}p1 will not appear automatically + # since udev rules will not be triggered. Now we have to mknod on our own. + partition=$(lsblk -rp -oNAME ${device} | grep ${device}p | tail -1) \ + || fatal "fetch NAME from lsblk failed" + if [ ! -b ${partition} ]; then + majmin=$(lsblk -r -oMAJ:MIN ${device} | tail -1 | tr ':' ' ') \ + || fatal "fetch MAJ:MIN from lsblk failed" + mknod -m 0660 ${partition} b ${majmin} || fatal "mknod failed" + fi + + if [ -z ${EXT4_NO_JOURNAL+x} ]; then + echo "EXT4 WITH JOURNAL" + mkfs.ext4 -F -b 4096 -O ^metadata_csum,uninit_bg -E lazy_journal_init=0,lazy_itable_init=0 ${partition} || fatal "mkfs.ext4 failed" + else + echo "EXT4 WITHOUT JOURNAL" + mkfs.ext4 -F -b 4096 -O ^metadata_csum,uninit_bg -E lazy_journal_init=0,lazy_itable_init=0 -O ^has_journal ${partition} || fatal "mkfs.ext4 failed" + fi + + [ -d ${WORKDIR}/mntpnt ] && rm -rf ${WORKDIR}/mntpnt + mkdir ${WORKDIR}/mntpnt + mount ${device}p1 ${WORKDIR}/mntpnt || fatal "mount failed" + tune2fs -m 0 ${device}p1 + + #--------------------------------------------------------------------------- + # Copy files from rootfs into img + #--------------------------------------------------------------------------- + tar -xaf $1 -C ${WORKDIR}/mntpnt + sync + umount ${WORKDIR}/mntpnt + fsck.ext4 -D -y ${device}p1 + losetup -d $device + rm -rf ${WORKDIR} + + popd > /dev/null + +} + +if [ "$EUID" -ne 0 ]; then + echo "This script should be run as root. Re-run as root" + sudo $0 $@ + exit 0 +fi + +dn=$(dirname "$0") +# shellcheck source=src/cmdlib.sh +. "${dn}"/cmdlib.sh + +# This script is used for creating a raw image file containing only rootfs. +# This is useful if the target for the tree is some kind of container which is not +# required to have its own kernel. + +image_type=raw-rootfs + +print_help() { + cat 1>&2 </dev/null || :) +if [ "${rev_parsed}" != "${commit}" ]; then + # Probably an older commit or tmp/ was wiped. Let's extract it to a separate + # temporary repo (not to be confused with ${tmprepo}...) so we can feed it + # as a ref (if not temp) to create_disk. + echo "Cache for build ${build} is gone" + echo "Importing commit ${commit} into temporary OSTree repo" + mkdir -p tmp/repo + commit_tar_name=$(jq -r .images.ostree.path < "${builddir}/meta.json") + if [ "${commit_tar_name}" = null ]; then + commit_tar_name=ostree-commit.tar + fi + tar -C tmp/repo -xf "${builddir}/${commit_tar_name}" + ostree_repo=$PWD/tmp/repo +fi + +image_format=img + +tarname=${name}-${build}-${image_type}.${basearch}.tar +tarpath=${builddir}/${tarname} + +img=${name}-${build}-${image_type}.${basearch}.${image_format} +path=${PWD}/${img} + +yaml2json() { + python3 -c 'import sys, json, yaml; json.dump(yaml.safe_load(sys.stdin), sys.stdout)' < "$1" > "$2" +} + +# Convert the image.yaml to JSON so that it can be more easily parsed +# by the shell script in create_disk.sh. +yaml2json "$configdir/image.yaml" image-config.json +yaml2json "/usr/lib/coreos-assembler/image-default.yaml" image-default.json +# Combine with the defaults +cat image-default.json image-config.json | jq -s add > image-configured.json + +# Generate the JSON describing the disk we want to build +cat >image-dynamic.json << EOF +{ + "osname": "${name}", + "buildid": "${build}", + "imgid": "${img}", + "ostree-commit": "${commit}", + "ostree-ref": "${ref}", + "ostree-repo": "${ostree_repo}" +} +EOF +cat image-configured.json image-dynamic.json | jq -s add > image.json + +# The following commands used to be run inside a vm +# ===================== taken from create_disk.sh +config="$(pwd)"/image.json +getconfig() { + k=$1 + jq -re .'"'$k'"' < ${config} +} +# Return a configuration value, or default if not set +getconfig_def() { + k=$1 + shift + default=$1 + shift + jq -re .'"'$k'"'//'"'${default}'"' < ${config} +} +rootfs_type=$(getconfig rootfs) +case "${rootfs_type}" in + ext4) ;; + # we may support other file system xfs|ext4verity|btrfs in the future + *) echo "Invalid rootfs type: ${rootfs_type}" 1>&2; exit 1;; +esac + +ostree=$(getconfig "ostree-repo") +commit=$(getconfig "ostree-commit") +ref=$(getconfig "ostree-ref") +imgsize=$(getconfig "size") + +# if imgsize is a number (may contain decimal), assume its unit is Gigabyte +[[ $imgsize =~ ^[0-9.]*$ ]] && imgsize="${imgsize}G" +echo "Image size: $imgsize" + +# We support not setting a remote name (used by RHCOS) +remote_name=$(getconfig_def "ostree-remote" "") +os_name=$(getconfig "osname") +buildid=$(getconfig "buildid") +imgid=$(getconfig "imgid") + +rootfs=/tmp/rootfs +mkdir -p $rootfs + +rm -rf $rootfs/* + +ostree admin init-fs --modern $rootfs +if [ "${rootfs_type}" = "ext4verity" ]; then + ostree config --repo=$rootfs/ostree/repo set ex-fsverity.required 'true' +fi +time ostree pull-local --repo $rootfs/ostree/repo "$ostree" "$commit" +if test -n "${remote_name}"; then + deploy_ref="${remote_name}:${ref}" + ostree refs --repo $rootfs/ostree/repo --create "${deploy_ref}" "${commit}" +else + deploy_ref=$commit +fi +ostree admin os-init "$os_name" --sysroot $rootfs +kargsargs="" +# Sleep a few seconds to wait for ostree repo to be ready +# IMPORTANT: For original create_disk.sh which runs a vm, here a sleep5 is +# mandatory, otherwise some wierd errors would occur. +# sync; sleep 5; sync +ostree admin deploy "${deploy_ref}" --sysroot $rootfs --os "$os_name" $kargsargs + +deploy_root="$rootfs/ostree/deploy/${os_name}/deploy/${commit}.0" +test -d "${deploy_root}" +var="$rootfs/ostree/deploy/${os_name}/var" +rootfs_var_overrides="${workdir}/overrides/rootfs/var" +# polulate var during build stage, it helps to remove dracut systemd +# services: +# ignition-ostree-populate-var.service +for subdir in home roothome opt srv usrlocal mnt media; do + mkdir -p $var/$subdir +done +[ -d $rootfs_var_overrides ] && cp -r $rootfs_var_overrides/* $var + +# create empty directory +for dir in proc dev run; do + mkdir -p $rootfs/$dir +done + +# Opt-in to https://github.com/ostreedev/ostree/pull/1767 AKA +# https://github.com/ostreedev/ostree/issues/1265 +ostree config --repo $rootfs/ostree/repo set sysroot.readonly true +touch $var/ignition.firstboot + +# TODO: this failed +# chattr: Inappropriate ioctl for device while reading flags on /tmp/rootfs +# chattr +i $rootfs + +pushd $rootfs > /dev/null + +hijack_on_rootfs + +# Files under ostree/deploy and ostree/repo are hard linked. +# Since we only remove files in ostree/deploy, the actual data is still +# present. So here we remove those "orphans" + +find ./ostree/repo/objects -links 1 -delete +tar -cf $tarpath * + +tar_to_img $tarpath $path $imgsize + +popd > /dev/null +fstrim -a -v +rm -rf $rootfs + +# ===================== originally end of create_disk.sh + +sha256=$(sha256sum_str < "${img}") +# there's probably a jq one-liner for this... +python3 -c " +import sys, json +j = json.load(sys.stdin) +j['images']['${image_type}'] = { + 'path': '${img}', + 'sha256': '${sha256}', + 'size': $(stat -c '%s' "${img}") +} +json.dump(j, sys.stdout, indent=4) +" < "${builddir}/meta.json" | jq -s add > "meta.json.new" + +# and now the crucial bits +cosa meta --workdir "${workdir}" --build "${build}" --artifact "${image_type}" --artifact-json "$(readlink -f meta.json.new)" +/usr/lib/coreos-assembler/finalize-artifact "${img}" "${builddir}/${img}" + + +# clean up the tmpild +rm -rf "${tmp_builddir}" + +# compiled from https://github.com/kata-containers/kata-containers/blob/main/tools/osbuilder/image-builder/nsdax.gpl.c +SCRIPT=$(realpath "$0") +SCRIPTPATH=$(dirname "$SCRIPT") +[ ! -f /usr/local/bin/nsdax ] && echo "Compiling $SCRIPTPATH/nsdax.c to /usr/local/bin/nsdax" && gcc $SCRIPTPATH/img_tools/nsdax.c -o /usr/local/bin/nsdax + +rootfs_start=1 +dax_header_sz=2 +rootfs_end=-1 +dax_alignment=0 + +create_disk() { + local image="$1" + local img_size="$2" + local fs_type="$3" + local part_start="$4" + + echo "Creating raw disk with size ${img_size}" + qemu-img create -q -f raw "${image}" "${img_size}" + echo "Image file created" + + # Kata runtime expect an image with just one partition + # The partition is the rootfs content + echo "Creating partitions" + parted -s -a optimal "${image}" -- \ + mklabel msdos \ + mkpart primary "${fs_type}" "${part_start}"M "${rootfs_end}"M + + echo "Partitions created" +} + +set_dax_header() { + local image="$1" + local img_size="$2" + local fs_type="$3" + local nsdax_bin="$4" + + # rootfs start + DAX header size + local rootfs_offset=$((rootfs_start + dax_header_sz)) + local header_image="${image}.header" + local dax_image="${image}.dax" + rm -f "${dax_image}" "${header_image}" + + create_disk "${header_image}" "${img_size}" "${fs_type}" "${rootfs_offset}" + + dax_header_bytes=$((dax_header_sz * 1024 * 1024)) + dax_alignment_bytes=$((dax_alignment * 1024 * 1024)) + echo "Set DAX metadata" + # Set metadata header + # Issue: https://github.com/kata-containers/osbuilder/issues/240 + "${nsdax_bin}" "${header_image}" "${dax_header_bytes}" "${dax_alignment_bytes}" + sync + + touch "${dax_image}" + # Copy MBR #1 + DAX metadata + dd if="${header_image}" of="${dax_image}" bs="${dax_header_sz}M" count=1 + # Copy MBR #2 + Rootfs + dd if="${image}" of="${dax_image}" oflag=append conv=notrunc + # final image + mv "${dax_image}" "${image}" + sync + + rm -f "${dax_image}" "${header_image}" +} + +# calculate final image size, it should be {size of img file} +2 +# first calculate {size of img file in MB} +IMG_FILE_SIZE=$(($(stat --printf="%s" ${builddir}/${img})/1048576)) +set_dax_header ${builddir}/${img} $((IMG_FILE_SIZE+2))M ext4 /usr/local/bin/nsdax + +echo "Successfully generated: ${img}" diff --git a/src/cmd-prep b/src/cmd-prep new file mode 100755 index 0000000000000000000000000000000000000000..a6431695f466df7e59de7efb6c4617534571b6b4 --- /dev/null +++ b/src/cmd-prep @@ -0,0 +1,146 @@ +# This cmd is used for downloading packages and binaries which are +# provided by the teams other than BaseOS +# +# Make use of ${workdir}/overrides to implement local overrides, see: +# [Working with CoreOS Assembler](docs/working.md) + + +#!/usr/bin/env bash +set -euox pipefail + +dn=$(dirname "$0") +# shellcheck source=src/cmdlib.sh +. "${dn}"/cmdlib.sh + +print_help() { + cat 1>&2 <<'EOF' +Usage: coreos-assembler prep --help + coreos-assembler prep [--mode=mode] [--image-size=size(G/M)] [--kata-agent-branch=] + + Accepted mode arguments: + + - rootfs (build for rootfs) + + Use the --image-size to specify the size of the target image, e.g. 1G/200M/.., assume its unit + is Gigabyte if only a number is specified. + + Default image-size is 10GB. +EOF +} + +DEFAULT_KATA_AGENT_BRANCH="3.0.0" + +KATA_AGENT_BRANCH= +MODE="rootfs" +IMG_SIZE= +rc=0 +options=$(getopt --options h --longoptions help,mode:,image-size:,kata-agent-branch: -- $@) || rc=$? +[ $rc -eq 0 ] || { + print_help + exit 1 +} +eval set -- "$options" +while true; do + case $1 in + -h | --help) + print_help + exit 0 + ;; + --kata-agent-branch) + shift + KATA_AGENT_BRANCH=$1 + ;; + --mode) + shift + MODE=$1 + ;; + --image-size) + shift + IMG_SIZE=$1 + ;; + --) + shift + break + ;; + *) + fatal "$0: unrecognized option: $1" + exit 1 + ;; + esac + shift +done + +workdir="$(pwd)" +manifest=${workdir}/src/config/manifest.yaml +image_config=${workdir}/src/config/image.yaml + +rpm_overrides=${workdir}/overrides/rpm +rootfs_overrides=${workdir}/overrides/rootfs + +sudo rm -rf $rootfs_overrides +mkdir -p $rpm_overrides +mkdir -p $rootfs_overrides + +case $MODE in + rootfs) + sed -i '/^include: manifests*/c\include: manifests\/'$MODE'.json' $manifest + ;; + *) + fatal "$0: unrecognized mode: $MODE, supported mode is rootfs" + exit 1 + ;; +esac + +if [ $IMG_SIZE ]; then + [[ $IMG_SIZE =~ ^[0-9.]*$ ]] && IMG_SIZE="${IMG_SIZE}G" + sed -i '/^size:*/c\size: '$IMG_SIZE'' $image_config +fi + +prepare_env() { + echo -e "\n[-] Preparing env ..." + export PATH=/usr/local/go/bin:/opt/rust/bin:$PATH + export RUSTUP_HOME=/opt/rust + export CARGO_HOME=/opt/rust + + mkdir -p ~/.cargo + cat << ! > ~/.cargo/config +[source.crates-io] +replace-with = 'ustc' + +[source.ustc] +registry = "git://mirrors.ustc.edu.cn/crates.io-index" +! + rustup default nightly + rustup target add x86_64-unknown-linux-musl +} + +build_and_install_kata_agent() { + tmpdir=$(mktemp -d) + if [ ! -n "$KATA_AGENT_BRANCH" ]; then + RED='\033[0;31m' + NC='\033[0m' + echo -e "${RED}[!] Kata-agent branch not assigned, select master ${NC}" + KATA_AGENT_BRANCH=$DEFAULT_KATA_AGENT_BRANCH + fi + echo -e "\n[-] Building kata-agent ..." + git clone -b $KATA_AGENT_BRANCH https://gitee.com/anolis/kata-containers.git $tmpdir/kata-agent --depth=1 || \ + fatal "kata-agent repo clone failed" + pushd $tmpdir/kata-agent/src/agent > /dev/null + git fetch origin $KATA_AGENT_BRANCH + git checkout $KATA_AGENT_BRANCH + make || fatal "build kata-agent failed" + popd > /dev/null + + mkdir -p ${rootfs_overrides}/usr/bin + cp -f $tmpdir/kata-agent/src/agent/target/${arch}-unknown-linux-musl/release/kata-agent ${rootfs_overrides}/usr/bin/kata-agent + rm -rf $tmpdir +} + +case $MODE in + rootfs) + prepare_env + build_and_install_kata_agent + ;; + *) + ;; +esac diff --git a/src/img_tools/nsdax.c b/src/img_tools/nsdax.c new file mode 100644 index 0000000000000000000000000000000000000000..10da68918d7f597c8bbfa7259d7d8c09d1472960 --- /dev/null +++ b/src/img_tools/nsdax.c @@ -0,0 +1,171 @@ +/* + * Copyright(c) 2013-2019 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define __KERNEL__ +#include +#include + +/* + Next types, definitions and functions were copied from kernel 4.19.24 source + code, specifically from nvdimm driver +*/ + +#define PFN_SIG_LEN 16 +#define PFN_SIG "NVDIMM_PFN_INFO" +#define SZ_4K 0x00001000 + +typedef __u16 u16; +typedef __u8 u8; +typedef __u64 u64; +typedef __u32 u32; + +enum nd_pfn_mode { + PFN_MODE_NONE, + PFN_MODE_RAM, + PFN_MODE_PMEM, +}; + +struct nd_pfn_sb { + u8 signature[PFN_SIG_LEN]; + u8 uuid[16]; + u8 parent_uuid[16]; + __le32 flags; + __le16 version_major; + __le16 version_minor; + __le64 dataoff; /* relative to namespace_base + start_pad */ + __le64 npfns; + __le32 mode; + /* minor-version-1 additions for section alignment */ + __le32 start_pad; + __le32 end_trunc; + /* minor-version-2 record the base alignment of the mapping */ + __le32 align; + u8 padding[4000]; + __le64 checksum; +}; + +struct nd_gen_sb { + char reserved[SZ_4K - 8]; + __le64 checksum; +}; + + +u64 nd_fletcher64(void *addr, size_t len, bool le) +{ + u32 *buf = addr; + u32 lo32 = 0; + u64 hi32 = 0; + int i; + + for (i = 0; i < len / sizeof(u32); i++) { + lo32 += le ? __le32_to_cpu((__le32) buf[i]) : buf[i]; + hi32 += lo32; + } + + return hi32 << 32 | lo32; +} + + +/* + * nd_sb_checksum: compute checksum for a generic info block + * + * Returns a fletcher64 checksum of everything in the given info block + * except the last field (since that's where the checksum lives). + */ +u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb) +{ + u64 sum; + __le64 sum_save; + + sum_save = nd_gen_sb->checksum; + nd_gen_sb->checksum = 0; + sum = nd_fletcher64(nd_gen_sb, sizeof(*nd_gen_sb), 1); + nd_gen_sb->checksum = sum_save; + return sum; +} + + +void show_usage(const char* name) { + printf("Usage: %s IMAGE_FILE DATA_OFFSET ALIGNMENT\n", name); + printf("DATA_OFFSET and ALIGNMENT must be in bytes\n"); +} + +int main(int argc, char *argv[]) { + if (argc != 4) { + show_usage(argv[0]); + return -1; + } + + const char* img_path = argv[1]; + + char *ptr = NULL; + const long int data_offset = strtol(argv[2], &ptr, 10); + if (ptr == argv[2]) { + fprintf(stderr, "Couldn't convert string '%s' to int\n", argv[2]); + show_usage(argv[0]); + return -1; + } + + ptr = NULL; + const long int alignment = strtol(argv[3], &ptr, 10); + if (ptr == argv[3]) { + fprintf(stderr, "Couldn't convert string '%s' to int\n", argv[3]); + show_usage(argv[0]); + return -1; + } + + printf("Opening file '%s'\n", img_path); + int fd = open(img_path, O_WRONLY); + if (fd == -1) { + perror("open:"); + return -1; + } + + struct nd_pfn_sb sb = { 0 }; + + snprintf((char*)sb.signature, PFN_SIG_LEN, PFN_SIG); + sb.mode = PFN_MODE_RAM; + sb.align = alignment; + sb.dataoff = data_offset; + sb.version_minor = 2; + + // checksum must be calculated at the end + sb.checksum = nd_sb_checksum((struct nd_gen_sb*) &sb); + + // NVDIMM driver: SZ_4K is the namespace-relative starting offset + int ret = lseek(fd, SZ_4K, SEEK_SET); + if (ret == -1) { + perror("lseek: "); + close(fd); + return -1; + } + + printf("Writing metadata\n"); + ret = write(fd, &sb, sizeof(sb)); + if (ret == -1) { + perror("write: "); + } + + close(fd); + printf("OK!\n"); + + return 0; +} \ No newline at end of file