diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/_toc.yaml b/docs/en/docs/cluster_deployment/isulad+k8s/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e0257503d15412f633c002ceffc7c0654d8bb3c --- /dev/null +++ b/docs/en/docs/cluster_deployment/isulad+k8s/_toc.yaml @@ -0,0 +1,12 @@ +label: iSulad + Kubernetes Cluster Deployment Guide +isManual: true +description: Deploy a Kubernetes cluster using the iSulad container engine on openEuler. +sections: + - label: Overview + href: ./overview.md + - label: iSulad + Kubernetes Environment Deployment + href: ./isulad+k8s_environment_deploy.md + - label: GitLab Deployment + href: ./gitlab_deploy.md + - label: GitLab Runner Deployment + href: ./gitlab_runner_deploy.md diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/1.view-required-images.png b/docs/en/docs/cluster_deployment/isulad+k8s/figures/1.view-required-images.png new file mode 100644 index 0000000000000000000000000000000000000000..74cdae5726cec83d5d74b0b8bd01694fd388e342 Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/1.view-required-images.png differ diff --git "a/docs/en/docs/cluster_deployment/isulad+k8s/figures/10.\350\207\252\345\256\232\344\271\211\351\241\271\347\233\256\345\220\215\347\247\260.jpg" "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/10.\350\207\252\345\256\232\344\271\211\351\241\271\347\233\256\345\220\215\347\247\260.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..2062c62a6aae63e9700f6ceedf73daea0876eb44 Binary files /dev/null and "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/10.\350\207\252\345\256\232\344\271\211\351\241\271\347\233\256\345\220\215\347\247\260.jpg" differ diff --git "a/docs/en/docs/cluster_deployment/isulad+k8s/figures/11.\350\256\276\347\275\256-cicd-runner.png" "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/11.\350\256\276\347\275\256-cicd-runner.png" new file mode 100644 index 0000000000000000000000000000000000000000..838cf1b6625439e44c7d09162fb71edbe1eaf3d3 Binary files /dev/null and "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/11.\350\256\276\347\275\256-cicd-runner.png" differ diff --git "a/docs/en/docs/cluster_deployment/isulad+k8s/figures/12.\350\256\260\344\270\213runner\345\234\260\345\235\200\344\270\216\344\273\244\347\211\214.jpg" "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/12.\350\256\260\344\270\213runner\345\234\260\345\235\200\344\270\216\344\273\244\347\211\214.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..d3aaaf9ff4b66e4b536bc0afaa33c121bc6b53f6 Binary files /dev/null and "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/12.\350\256\260\344\270\213runner\345\234\260\345\235\200\344\270\216\344\273\244\347\211\214.jpg" differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/13.view-cert-config.png b/docs/en/docs/cluster_deployment/isulad+k8s/figures/13.view-cert-config.png new file mode 100644 index 0000000000000000000000000000000000000000..8e9ce44af5a01670add1b8b2f5a7223a8bd0f35d Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/13.view-cert-config.png differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/14.import-cert.png b/docs/en/docs/cluster_deployment/isulad+k8s/figures/14.import-cert.png new file mode 100644 index 0000000000000000000000000000000000000000..2a1fdb24d6f5c1c9d44cbce08276289adc5c876c Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/14.import-cert.png differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/15.register-gitlab-runner.jpg b/docs/en/docs/cluster_deployment/isulad+k8s/figures/15.register-gitlab-runner.jpg new file mode 100644 index 0000000000000000000000000000000000000000..896f13bdc6411b719283f30d9973973950f27a1c Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/15.register-gitlab-runner.jpg differ diff --git "a/docs/en/docs/cluster_deployment/isulad+k8s/figures/16.web\347\253\257\345\267\262\345\212\240\345\205\245_LI.jpg" "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/16.web\347\253\257\345\267\262\345\212\240\345\205\245_LI.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..fd8ae15ef7bce7120c7ba24b6240b33dbcf610b8 Binary files /dev/null and "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/16.web\347\253\257\345\267\262\345\212\240\345\205\245_LI.jpg" differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/17.png b/docs/en/docs/cluster_deployment/isulad+k8s/figures/17.png new file mode 100644 index 0000000000000000000000000000000000000000..86f90a67185f532b362f4710ce8f7615cf40c9e1 Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/17.png differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/18.dns-config.png b/docs/en/docs/cluster_deployment/isulad+k8s/figures/18.dns-config.png new file mode 100644 index 0000000000000000000000000000000000000000..46b85396db34577b67679da759b6160ee707dec5 Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/18.dns-config.png differ diff --git "a/docs/en/docs/cluster_deployment/isulad+k8s/figures/19.CICD\347\225\214\351\235\242.png" "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/19.CICD\347\225\214\351\235\242.png" new file mode 100644 index 0000000000000000000000000000000000000000..f8193e005253eb4a6bd036ec8afdf6566f9fd6d2 Binary files /dev/null and "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/19.CICD\347\225\214\351\235\242.png" differ diff --git "a/docs/en/docs/cluster_deployment/isulad+k8s/figures/2.calico\351\205\215\347\275\256.png" "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/2.calico\351\205\215\347\275\256.png" new file mode 100644 index 0000000000000000000000000000000000000000..d656f86d8ce5e110cf240a58e58b05b42aba8c15 Binary files /dev/null and "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/2.calico\351\205\215\347\275\256.png" differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/20.yaml.png b/docs/en/docs/cluster_deployment/isulad+k8s/figures/20.yaml.png new file mode 100644 index 0000000000000000000000000000000000000000..9e7ec858abc2da72c7815a483f3011bf1225ec0f Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/20.yaml.png differ diff --git "a/docs/en/docs/cluster_deployment/isulad+k8s/figures/21.\346\265\201\346\260\264\347\272\277\347\212\266\346\200\201.png" "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/21.\346\265\201\346\260\264\347\272\277\347\212\266\346\200\201.png" new file mode 100644 index 0000000000000000000000000000000000000000..fff7063f17fd203390302a6245a8ce4a16cf428e Binary files /dev/null and "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/21.\346\265\201\346\260\264\347\272\277\347\212\266\346\200\201.png" differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/3.png b/docs/en/docs/cluster_deployment/isulad+k8s/figures/3.png new file mode 100644 index 0000000000000000000000000000000000000000..7394b5f21821ce8d352c2f935c3ea3e490dc0519 Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/3.png differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/4.gitlab-entrance.jpg b/docs/en/docs/cluster_deployment/isulad+k8s/figures/4.gitlab-entrance.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d3eb0d59d6dee5051470621a4969651668687789 Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/4.gitlab-entrance.jpg differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/5.view-password.jpg b/docs/en/docs/cluster_deployment/isulad+k8s/figures/5.view-password.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2e3902815108e9e91a07c382a4aae090b7cc6fe9 Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/5.view-password.jpg differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/6.logged-in.png b/docs/en/docs/cluster_deployment/isulad+k8s/figures/6.logged-in.png new file mode 100644 index 0000000000000000000000000000000000000000..5f4d2c2a9a8bf337263028e859e49499155920b0 Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/6.logged-in.png differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/7.image.png b/docs/en/docs/cluster_deployment/isulad+k8s/figures/7.image.png new file mode 100644 index 0000000000000000000000000000000000000000..26c811ae616d2fe86e7b8b75c78ef88aff83616b Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/7.image.png differ diff --git "a/docs/en/docs/cluster_deployment/isulad+k8s/figures/8.\346\226\260\345\273\272\351\241\271\347\233\256.png" "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/8.\346\226\260\345\273\272\351\241\271\347\233\256.png" new file mode 100644 index 0000000000000000000000000000000000000000..0c56662e6bdedcc78dea32b6e2afada466193096 Binary files /dev/null and "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/8.\346\226\260\345\273\272\351\241\271\347\233\256.png" differ diff --git "a/docs/en/docs/cluster_deployment/isulad+k8s/figures/9.\345\210\233\345\273\272\347\251\272\347\231\275\351\241\271\347\233\256.png" "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/9.\345\210\233\345\273\272\347\251\272\347\231\275\351\241\271\347\233\256.png" new file mode 100644 index 0000000000000000000000000000000000000000..117da11ee289d88b51539e027606cc9da526936a Binary files /dev/null and "b/docs/en/docs/cluster_deployment/isulad+k8s/figures/9.\345\210\233\345\273\272\347\251\272\347\231\275\351\241\271\347\233\256.png" differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/figures/public_sys-resources/icon-note.gif b/docs/en/docs/cluster_deployment/isulad+k8s/figures/public_sys-resources/icon-note.gif new file mode 100644 index 0000000000000000000000000000000000000000..6314297e45c1de184204098efd4814d6dc8b1cda Binary files /dev/null and b/docs/en/docs/cluster_deployment/isulad+k8s/figures/public_sys-resources/icon-note.gif differ diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/gitlab_deploy.md b/docs/en/docs/cluster_deployment/isulad+k8s/gitlab_deploy.md new file mode 100644 index 0000000000000000000000000000000000000000..c284afc0df6316b573370144a33da9cd294cb32c --- /dev/null +++ b/docs/en/docs/cluster_deployment/isulad+k8s/gitlab_deploy.md @@ -0,0 +1,311 @@ +# GitLab Deployment + +## Description + +GitLab deployment is required in Scenario 1 (openEuler native deployment CI/CD based on GitLab CI/CD). In Scenario 2 (openEuler native development cluster managed by GitLab CI/CD), skip this step. + +## Preparing the Server + +Prepare a machine running openEuler 20.03 LTS or later versions. + +## Starting GitLab + +Copy the required YAML files to the **/home** directory and start the related pod. +> **Note**: The YAML files related to GitLab can be obtained from the GitLab official site. + +Example YAML files are as follows. Modify them as required. + +gitlab-redis.yaml + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + namespace: default + labels: + name: redis +spec: + selector: + matchLabels: + name: redis + template: + metadata: + name: redis + labels: + name: redis + spec: + containers: + - name: redis + image: 10.35.111.11:5000/redis:latest + imagePullPolicy: IfNotPresent + ports: + - name: redis + containerPort: 6379 + volumeMounts: + - mountPath: /var/lib/redis + name: data + livenessProbe: + exec: + command: + - redis-cli + - ping + initialDelaySeconds: 30 + timeoutSeconds: 5 + readinessProbe: + exec: + command: + - redis-cli + - ping + initialDelaySeconds: 5 + timeoutSeconds: 1 + volumes: + - name: data + emptyDir: {} + +--- +apiVersion: v1 +kind: Service +metadata: + name: redis + namespace: default + labels: + name: redis +spec: + ports: + - name: redis + port: 6379 + targetPort: redis + selector: + name: redis +``` + +gitlab-postgresql.yaml + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgresql + namespace: default + labels: + name: postgresql +spec: + selector: + matchLabels: + name: postgresql + template: + metadata: + name: postgresql + labels: + name: postgresql + spec: + containers: + - name: postgresql + image: 10.35.111.11:5000/postgres:13.6 + imagePullPolicy: IfNotPresent + env: + - name: POSTGRES_HOST_AUTH_METHOD + value: trust + - name: DB_USER + value: gitlab + - name: DB_PASS + value: passw0rd + - name: DB_NAME + value: gitlab_production + - name: DB_EXTENSION + value: pg_trgm + ports: + - name: postgres + containerPort: 5432 + volumeMounts: + - mountPath: /var/lib/postgresql + name: data + livenessProbe: + exec: + command: + - pg_isready + - -h + - localhost + - -U + - postgres + initialDelaySeconds: 30 + timeoutSeconds: 5 + readinessProbe: + exec: + command: + - pg_isready + - -h + - localhost + - -U + - postgres + initialDelaySeconds: 5 + timeoutSeconds: 1 + volumes: + - name: data + emptyDir: {} + +--- +apiVersion: v1 +kind: Service +metadata: + name: postgresql + namespace: default + labels: + name: postgresql +spec: + ports: + - name: postgres + port: 5432 + targetPort: postgres + selector: + name: postgresql +``` + +gitlab.yaml + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gitlab + namespace: default + labels: + name: gitlab +spec: + selector: + matchLabels: + name: gitlab + template: + metadata: + name: gitlab + labels: + name: gitlab + spec: + containers: + - name: gitlab + image: 10.35.111.11:5000/yrzr/gitlab-ce-arm64v8:14.3.2-ce.0 + imagePullPolicy: IfNotPresent + env: + - name: TZ + value: Asia/Shanghai + - name: GITLAB_TIMEZONE + value: Beijing + - name: GITLAB_SECRETS_DB_KEY_BASE + value: long-and-random-alpha-numeric-string + - name: GITLAB_SECRETS_SECRET_KEY_BASE + value: long-and-random-alpha-numeric-string + - name: GITLAB_SECRETS_OTP_KEY_BASE + value: long-and-random-alpha-numeric-string + - name: GITLAB_ROOT_PASSWORD + value: admin321 + - name: GITLAB_ROOT_EMAIL + value: 517554016@qq.com + - name: GITLAB_HOST + value: git.qikqiak.com + - name: GITLAB_PORT + value: "80" + - name: GITLAB_SSH_PORT + value: "22" + - name: GITLAB_NOTIFY_ON_BROKEN_BUILDS + value: "true" + - name: GITLAB_NOTIFY_PUSHER + value: "false" + - name: GITLAB_BACKUP_SCHEDULE + value: daily + - name: GITLAB_BACKUP_TIME + value: 01:00 + - name: DB_TYPE + value: postgres + - name: DB_HOST + value: postgresql + - name: DB_PORT + value: "5432" + - name: DB_USER + value: gitlab + - name: DB_PASS + value: passw0rd + - name: DB_NAME + value: gitlab_production + - name: REDIS_HOST + value: redis + - name: REDIS_PORT + value: "6379" + ports: + - name: http + containerPort: 80 + - name: ssh + containerPort: 22 + volumeMounts: + - mountPath: /home/git/data + name: data + livenessProbe: + httpGet: + path: / + port: 80 + initialDelaySeconds: 180 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: / + port: 80 + initialDelaySeconds: 5 + timeoutSeconds: 1 + volumes: + - name: data + emptyDir: {} + +--- +apiVersion: v1 +kind: Service +metadata: + name: gitlab + namespace: default + labels: + name: gitlab +spec: + ports: + - name: http + port: 80 + targetPort: http + nodePort: 30852 + - name: ssh + port: 22 + nodePort: 32353 + targetPort: ssh + selector: + name: gitlab + type: NodePort +``` + +Start the containers. + +```shell +kubectl apply -f gitlab-redis.yaml +kubectl apply -f gitlab-postgresql.yaml +kubectl apply -f gitlab.yaml +``` + +Check whether the GitLab pod is set up successfully. + +```shell +kubectl get pod -A -owide +``` + +## Logging in to GitLab + +Log in to the GitLab Web UI. The address is the IP address and the configured port. + +![](figures/4.gitlab-entrance.jpg) +The user name is **root**. The default password can be viewed in the password file in the container. + +```shell +kubectl exec -it gitlab-lab -n default /bin/sh +cat /etc/gitlab/initial_root_password +``` + +![](figures/5.view-password.jpg) + +- After you log in, this page is displayed: + +![](figures/6.logged-in.png) diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/gitlab_runner_deploy.md b/docs/en/docs/cluster_deployment/isulad+k8s/gitlab_runner_deploy.md new file mode 100644 index 0000000000000000000000000000000000000000..eadbba9a497c4549ccaae713004b17f2aa2ab63d --- /dev/null +++ b/docs/en/docs/cluster_deployment/isulad+k8s/gitlab_runner_deploy.md @@ -0,0 +1,178 @@ +# GitLab Runner Deployment and Testing + +## Images and Software + +The following table lists the images required during installation. The version numbers are for reference only. + +| Image | Version | +|------------------------------------|----------| +| gitlab/gitlab-runner | alpine-v14.4.0 | +| gitlab/gitlab-runner-helper | x86_64-54944146 | + +> If the Internet is unavailable in the environment, download the required images in advance. +> Download the images from the Docker Hub official website . + +## Using gitlab-runner.yaml to Start the Runner Container + +In the **gitlab-runner.yaml** file, change the image name. The following is an example of the **.yaml** file. Modify the file as required. + +```shell +vim gitlab-runner.yaml +``` + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gitlab-runner + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + name: gitlab-runner + template: + metadata: + labels: + name: gitlab-runner + spec: + containers: + - args: + - run + image: gitlab/gitlab-runner:alpine-v14.4.0 + imagePullPolicy: IfNotPresent + name: gitlab-runner + volumeMounts: + - mountPath: /etc/gitlab-runner + name: config + readOnly: false + - mountPath: /etc/ssl/certs + name: cacerts + readOnly: true + restartPolicy: Always + volumes: + - hostPath: + path: /etc/gitlab-runner + name: config + - hostPath: + path: /etc/ssl/key + name: cacerts + +```shell + +Start the container. + +```shell +# kubectl apply -f gitlab-runner.yaml +# kubectl get pod -A -o wide +``` + +![image](figures/7.image.png) + +## Creating a Container Project That Uses User Certificates for Authentication in GitLab + +1. Click **New project**. + +2. Select **Create blank project**. + +3. Enter a name for the project. + +4. Choose **Settings** > **CI/CD** > **Runners** > **Expand**. + +5. Record the address and token for registering the Runner. + +6. Import certificate files. + + Check and generate certificate files **admin.crt**, **admin.key**, and **ca.crt** on the master node. + + - View certificate information. + + ```shell + # cat /etc/kubernetes/admin.conf + ``` + + ![view-cert-config](figures/13.view-cert-config.png) + + - Generate the encrypted **admin.crt**. + + ```shell + # echo "${client-certificate-data}" | base64 -d > admin.crt + ``` + + - Generate the encrypted **admin.key**. + + ```shell + # echo "${client-key-data}" | base64 -d > admin.key + ``` + + - Obtain the CA certificate on the manager node. + + ```shell + # cp /etc/kubernetes/pki/ca.crt ./ + ``` + +7. Import the three certificate files to the GitLab Runner container on the node where the Runner is running. + + > **Note**: To import the certificate files, check the node where the GitLab Runner is running, copy the certificate files to the node, and run the **isula cp** command to import the certificate files. + + ```shell + # isula cp admin.crt [Container ID]:Storage path + # isula cp admin.key [Container ID]:Storage path + # isula cp ca.crt [Container ID]:Storage path + ``` + + Note: The **isula cp** command can copy only one file at a time. + + ![import-cert](figures/14.import-cert.png) + +## Registering the GitLab Runner + +Perform registration in the GitLab Runner container. Currently, interactive registration is used. Obtain the registration information from GitLab. Choose **GitLab** > **Group runners** > **Settings** > **CI/CD** > **Runners**. + +![register-gitlab-runner](figures/15.register-gitlab-runner.jpg) + +Upload the prepared **gitlab-runner-helper** image to the private image repository in advance, go to the GitLab Runner container, and modify the configuration file. + +```shell +# cd /etc/gitlab-runner +# mkdir kubessl +# cp /home/admin.crt /etc/gitlab-runner/kubessl +# cp /home/ca.crt /etc/gitlab-runner/kubessl +# cp /home/admin.key /etc/gitlab-runner/kubessl +# vim /etc/gitlab-runner/config.toml +``` + +![](figures/17.png) + +## Adding the DNS Record of the GitLab Container to the Manager Node + +1. View the IP address of the GitLab container. + + ```shell + # kubectl get pods -Aowide + ``` + +2. Add the IP address of the GitLab container to the Kubernetes DNS configuration file. + + ```shell + # kubectl edit configmaps coredns -n kube-system + ``` + + ![dns](figures/18.dns-config.png) + +3. Restart the CoreDNS service. + + ```shell + # kubectl scale deployment coredns -n kube-system --replicas=0 + # kubectl scale deployment coredns -n kube-system --replicas=2 + ``` + +## GitLab Running Testing + +Return to the GitLab web IDE and choose **CI/CD** > **Editor** > **Create new CI/CD pipeline**. + +- Compile the YAML file as follows: + +![yaml](figures/20.yaml.png) + +- Choose **Pipelines** and view the status. diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/isulad+k8s_environment_deploy.md b/docs/en/docs/cluster_deployment/isulad+k8s/isulad+k8s_environment_deploy.md new file mode 100644 index 0000000000000000000000000000000000000000..8a588eb0ef520ec459bc7d1a8b21eb0c2fe2b8f3 --- /dev/null +++ b/docs/en/docs/cluster_deployment/isulad+k8s/isulad+k8s_environment_deploy.md @@ -0,0 +1,382 @@ +# iSulad+Kubernetes Environment Deployment + +## Preparing Cluster Servers + +Prepare at least 3 machines running openEuler 20.03 LTS or later versions. The following table lists information about the machines. + +| Host Name | IP Address | OS | Role | Component | +|-------|-------------|------------------------|----------|-----------| +| lab1 | 197.xxx.xxx.xxx | openEuler 20.03 LTS SP3 | Control node | iSulad/Kubernetes | +| lab2 | 197.xxx.xxx.xxx | openEuler 20.03 LTS SP3 | Worker node 1 | iSulad/Kubernetes | +| lab3 | 197.xxx.xxx.xxx | openEuler 20.03 LTS SP3 | Worker node 2 | iSulad/Kubernetes | + +## Preparing Images and Software Packages + +The following table lists software packages and images used in the example. The versions are for reference only. + +| Software | Version | +|------------------------------------|----------| +| iSulad | 2.0.17-2 | +| kubernetes-client | 1.20.2-9 | +| kubernetes-kubeadm | 1.20.2-9 | +| kubernetes-kubelet | 1.20.2-9 | + +| Image | Version | +|------------------------------------|----------| +| k8s.gcr.io/kube-proxy | v1.20.2 | +| k8s.gcr.io/kube-apiserver | v1.20.2 | +| k8s.gcr.io/kube-controller-manager | v1.20.2 | +| k8s.gcr.io/kube-scheduler | v1.20.2 | +| k8s.gcr.io/etcd | 3.4.13-0 | +| k8s.gcr.io/coredns | 1.7.0 | +| k8s.gcr.io/pause | 3.2 | +| calico/node | v3.14.2 | +| calico/pod2daemon-flexvol | v3.14.2 | +| calico/cni | v3.14.2 | +| calico/kube-controllers | v3.14.2 | + +> If you perform the deployment in without an Internet connection, download the software packages, dependencies, and images in advance. + +- Download software packages: +- Download images from Docker Hub: + +## Modifying the hosts File + +1. Change the host name of the machine, for example, **lab1**. + + ```shell + hostnamectl set-hostname lab1 + sudo -i + ``` + +2. Configure host name resolution by modifying the **/etc/hosts** file on each machine. + + ```shell + vim /etc/hosts + ``` + +3. Add the following content (IP address and host name) to the **hosts** file: + + ```text + 197.xxx.xxx.xxx lab1 + 197.xxx.xxx.xxx lab2 + 197.xxx.xxx.xxx lab3 + ``` + +## Preparing the Environment + +1. Disable the firewall/ + + ```shell + systemctl stop firewalld + systemctl disable firewalld + ``` + +2. Disable SELinux. + + ```shell + setenforce 0 + ``` + +3. Disable memory swapping. + + ```shell + swapoff -a + sed -ri 's/.*swap.*/#&/' /etc/fstab + ``` + +4. Configure the network and enable forwarding. + + ```shell + $ cat > /etc/sysctl.d/kubernetes.conf <" + ], + "pod-sandbox-image": "k8s.gcr.io/pause:3.2", + "native.umask": "normal", + "network-plugin": "cni", + "cni-bin-dir": "/opt/cni/bin", + "cni-conf-dir": "/etc/cni/net.d", + "image-layer-check": false, + "use-decrypted-key": true, + "insecure-skip-verify-enforce": false, + "cri-runtimes": { + "kata": "io.containerd.kata.v2" + } + } + ``` + +3. Restart the isulad service. + + ```shell + systemctl restart isulad + ``` + +### Loading the isulad Images + +1. Check the required system images. + + ```shell + kubeadm config images list + ``` + + Pay attention to the versions in the output, as shown in the figure. + ![](figures/1.view-required-images.png) + +2. Pull the images using the `isula` command. + + > [!NOTE]Note + > + > **Note**: The versions in the following commands are for reference only. Use the versions in the preceding output. + + ```shell + isula pull k8smx/kube-apiserver:v1.20.15 + isula pull k8smx/kube-controller-manager:v1.20.15 + isula pull k8smx/kube-scheduler:v1.20.15 + isula pull k8smx/kube-proxy:v1.20.15 + isula pull k8smx/pause:3.2 + isula pull k8smx/coredns:1.7.0 + isula pull k8smx/etcd:3.4.13-0 + ``` + +3. Modify the tags of the pulled images. + + ```shell + isula tag k8smx/kube-apiserver:v1.20.15 k8s.gcr.io/kube-apiserver:v1.20.15 + isula tag k8smx/kube-controller-manager:v1.20.15 k8s.gcr.io/kube-controller-manager:v1.20.15 + isula tag k8smx/kube-scheduler:v1.20.15 k8s.gcr.io/kube-scheduler:v1.20.15 + isula tag k8smx/kube-proxy:v1.20.15 k8s.gcr.io/kube-proxy:v1.20.15 + isula tag k8smx/pause:3.2 k8s.gcr.io/pause:3.2 + isula tag k8smx/coredns:1.7.0 k8s.gcr.io/coredns:1.7.0 + isula tag k8smx/etcd:3.4.13-0 k8s.gcr.io/etcd:3.4.13-0 + ``` + +4. Remove the old images. + + ```shell + isula rmi k8smx/kube-apiserver:v1.20.15 + isula rmi k8smx/kube-controller-manager:v1.20.15 + isula rmi k8smx/kube-scheduler:v1.20.15 + isula rmi k8smx/kube-proxy:v1.20.15 + isula rmi k8smx/pause:3.2 + isula rmi k8smx/coredns:1.7.0 + isula rmi k8smx/etcd:3.4.13-0 + ``` + +5. View pulled images. + + ```shell + isula images + ``` + +### Installing crictl + +```shell +yum install -y cri-tools +``` + +### Initializing the Master Node + +Initialize the master node. + +```shell +kubeadm init --kubernetes-version v1.20.2 --cri-socket=/var/run/isulad.sock --pod-network-cidr= +``` + +- `--kubernetes-version` indicates the current Kubernetes version. +- `--cri-socket` specifies the engine, that is, isulad. +- `--pod-network-cidr` specifies the IP address range of the pods. + +Enter the following commands as prompted: + +```shell +mkdir -p $HOME/.kube +sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config +sudo chown $(id -u):$(id -g) $HOME/.kube/config +``` + +After the initialization, copy the last two lines of the output and run the copied commands on the nodes to add them to the master cluster. The commands can also be generated using the following command: + +```sh +kubeadm token create --print-join-command +``` + +### Adding Nodes + +Paste the `kubeadm join` command generated on Master, add `--cri-socket=/var/run/isulad.sock` before `discovery`. + +### Installing Calico Network Plugins + +1. Pull Calico images. + + Configure the Calico network plugins on the Master node and pull the required images on each node. + + ```shell + isula pull calico/node:v3.14.2 + isula pull calico/cni:v3.14.2 + isula pull calico/kube-controllers:v3.14.2 + isula pull calico/pod2daemon-flexvol:v3.14.2 + ``` + +2. Download the configuration file on Master. + + ```shell + wget https://docs.projectcalico.org/v3.14/manifests/calico.yaml + ``` + +3. Create a pod. + + ```shell + kubectl apply -f calico.yaml + ``` + + - If you want to delete the configuration file, run the following command: + + ```shell + kubectl delete -f calico.yaml + ``` + +4. View pod information. + + ```shell + kubectl get pod -A -o wide + ``` + +### Checking the Master Node Information + +```shell +kubectl get nodes -o wide +``` + +To reset a node, run the following command: + +```shell +kubeadm reset +``` diff --git a/docs/en/docs/cluster_deployment/isulad+k8s/overview.md b/docs/en/docs/cluster_deployment/isulad+k8s/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..d37bdb780473a170d33e24219c25a24fbc2d27ab --- /dev/null +++ b/docs/en/docs/cluster_deployment/isulad+k8s/overview.md @@ -0,0 +1,23 @@ +# iSulad + Kubernetes Cluster Deployment Guide + +This document outlines the process of deploying a Kubernetes cluster with kubeadm on the openEuler OS, configuring a Kubernetes + iSulad environment, and setting up gitlab-runner. It serves as a comprehensive guide for creating a native openEuler development environment cluster. + +The guide addresses two primary scenarios: + +**Scenario 1**: A complete walkthrough for establishing a native openEuler development CI/CD pipeline from scratch using gitlab-ci. +**Scenario 2**: Instructions for integrating an existing native openEuler development execution machine cluster into gitlab-ci. + +For scenario 1, the following steps are required: + +1. Set up the Kubernetes + iSulad environment. +2. Deploy GitLab. +3. Install and test gitlab-runner. + +For scenario 2, where a gitlab-ci platform is already available, the process involves: + +1. Configure the Kubernetes + iSulad environment. +2. Install and test gitlab-runner. + +> [!NOTE] Note +> +> All operations described in this document must be executed with root privileges. diff --git a/docs/en/docs/cluster_deployment/kubernetes/_toc.yaml b/docs/en/docs/cluster_deployment/kubernetes/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0df73f29532891ee6e13368c9bf6bf6d27d6511 --- /dev/null +++ b/docs/en/docs/cluster_deployment/kubernetes/_toc.yaml @@ -0,0 +1,35 @@ +label: Kubernetes Cluster Deployment Guide +isManual: true +description: This guide offers essential instructions for deploying a reliable and + high-performance Kubernetes cluster on openEuler. +sections: + - label: Overview + href: ./overview.md + - label: Preparing VMs + href: ./preparing_vms.md + - label: Manual Cluster Deployment + href: ./deploying_a_kubernetes_cluster_manually.md + sections: + - label: Installing the Kubernetes Software Package + href: ./installing_the_kubernetes_software_package.md + - label: Preparing Certificates + href: ./preparing_certificates.md + - label: Installing etcd + href: ./installing_etcd.md + - label: Deploying Components on the Control Plane + href: ./deploying_control_plane_components.md + - label: Deploying a Node Component + href: ./deploying_a_node_component.md + - label: Automatic Cluster Deployment + href: ./eggo_automatic_deployment.md + sections: + - label: Tool Introduction + href: ./eggo_tool_introduction.md + - label: Deploying a Cluster + href: ./eggo_deploying_a_cluster.md + - label: Dismantling a Cluster + href: ./eggo_dismantling_a_cluster.md + - label: Running the Test Pod + href: ./running_the_test_pod.md + - label: Kubernetes Cluster Deployment Guide Based on containerd + href: ./kubernetes_containerd.md diff --git a/docs/en/docs/cluster_deployment/kubernetes/deploying_a_kubernetes_cluster_manually.md b/docs/en/docs/cluster_deployment/kubernetes/deploying_a_kubernetes_cluster_manually.md new file mode 100644 index 0000000000000000000000000000000000000000..fed9fb7eb92405803c49a8c66e2d0088b1cf280c --- /dev/null +++ b/docs/en/docs/cluster_deployment/kubernetes/deploying_a_kubernetes_cluster_manually.md @@ -0,0 +1,18 @@ +# Deploying a Kubernetes Cluster Manually + +**Note: Manual deployment applies only to experimental and learning environments and is not intended for commercial environments.** + +This chapter describes how to deploy a Kubernetes cluster. + +## Environment + +Deploy based on the preceding [VM installation](./preparing_vms.md) and obtain the following VM list: + +| HostName | MAC | IPv4 | +| ---------- | ----------------- | -------------------| +| k8smaster0 | 52:54:00:00:00:80 | 192.168.122.154/24 | +| k8smaster1 | 52:54:00:00:00:81 | 192.168.122.155/24 | +| k8smaster2 | 52:54:00:00:00:82 | 192.168.122.156/24 | +| k8snode1 | 52:54:00:00:00:83 | 192.168.122.157/24 | +| k8snode2 | 52:54:00:00:00:84 | 192.168.122.158/24 | +| k8snode3 | 52:54:00:00:00:85 | 192.168.122.159/24 | diff --git a/docs/en/docs/cluster_deployment/kubernetes/deploying_a_node_component.md b/docs/en/docs/cluster_deployment/kubernetes/deploying_a_node_component.md new file mode 100644 index 0000000000000000000000000000000000000000..66cdf2c7f4a52fd3b9b4fa0546bd5497affa5511 --- /dev/null +++ b/docs/en/docs/cluster_deployment/kubernetes/deploying_a_node_component.md @@ -0,0 +1,381 @@ +# Deploying a Node Component + +This section uses the `k8snode1` node as an example. + +## Environment Preparation + +```bash +# A proxy needs to be configured for the intranet. +$ dnf install -y docker iSulad conntrack-tools socat containernetworking-plugins +$ swapoff -a +$ mkdir -p /etc/kubernetes/pki/ +$ mkdir -p /etc/cni/net.d +$ mkdir -p /opt/cni +# Delete the default kubeconfig file. +$ rm /etc/kubernetes/kubelet.kubeconfig + +## Use iSulad as the runtime ########. +# Configure the iSulad. +cat /etc/isulad/daemon.json +{ + "registry-mirrors": [ + "docker.io" + ], + "insecure-registries": [ + "k8s.gcr.io", + "quay.io" + ], + "pod-sandbox-image": "k8s.gcr.io/pause:3.2",# pause type + "network-plugin": "cni", # If this parameter is left blank, the CNI network plug-in is disabled. In this case, the following two paths become invalid. After the plug-in is installed, restart iSulad. + "cni-bin-dir": "/usr/libexec/cni/", + "cni-conf-dir": "/etc/cni/net.d", +} + +# Add the proxy to the iSulad environment variable and download the image. +cat /usr/lib/systemd/system/isulad.service +[Service] +Type=notify +Environment="HTTP_PROXY=http://name:password@proxy:8080" +Environment="HTTPS_PROXY=http://name:password@proxy:8080" + +# Restart the iSulad and set it to start automatically upon power-on. +systemctl daemon-reload +systemctl restart isulad + + + + +## If Docker is used as the runtime, run the following command: ######## +$ dnf install -y docker +# If a proxy environment is required, configure a proxy for Docker, add the configuration file http-proxy.conf, and edit the following content. Replace name, password, and proxy-addr with the actual values. +$ cat /etc/systemd/system/docker.service.d/http-proxy.conf +[Service] +Environment="HTTP_PROXY=http://name:password@proxy-addr:8080" +$ systemctl daemon-reload +$ systemctl restart docker +``` + +## Creating kubeconfig Configuration Files + +Perform the following operations on each node to create a configuration file: + +```bash +$ kubectl config set-cluster openeuler-k8s \ + --certificate-authority=/etc/kubernetes/pki/ca.pem \ + --embed-certs=true \ + --server=https://192.168.122.154:6443 \ + --kubeconfig=k8snode1.kubeconfig + +$ kubectl config set-credentials system:node:k8snode1 \ + --client-certificate=/etc/kubernetes/pki/k8snode1.pem \ + --client-key=/etc/kubernetes/pki/k8snode1-key.pem \ + --embed-certs=true \ + --kubeconfig=k8snode1.kubeconfig + +$ kubectl config set-context default \ + --cluster=openeuler-k8s \ + --user=system:node:k8snode1 \ + --kubeconfig=k8snode1.kubeconfig + +$ kubectl config use-context default --kubeconfig=k8snode1.kubeconfig +``` + +**Note: Change k8snode1 to the corresponding node name.** + +## Copying the Certificate + +Similar to the control plane, all certificates, keys, and related configurations are stored in the `/etc/kubernetes/pki/` directory. + +```bash +$ ls /etc/kubernetes/pki/ +ca.pem k8snode1.kubeconfig kubelet_config.yaml kube-proxy-key.pem kube-proxy.pem +k8snode1-key.pem k8snode1.pem kube_proxy_config.yaml kube-proxy.kubeconfig +``` + +## CNI Network Configuration + +containernetworking-plugins is used as the CNI plug-in used by kubelet. In the future, plug-ins such as calico and flannel can be introduced to enhance the network capability of the cluster. + +```bash +# Bridge Network Configuration +$ cat /etc/cni/net.d/10-bridge.conf +{ + "cniVersion": "0.3.1", + "name": "bridge", + "type": "bridge", + "bridge": "cnio0", + "isGateway": true, + "ipMasq": true, + "ipam": { + "type": "host-local", + "subnet": "10.244.0.0/16", + "gateway": "10.244.0.1" + }, + "dns": { + "nameservers": [ + "10.244.0.1" + ] + } +} + +# Loopback Network Configuration +$ cat /etc/cni/net.d/99-loopback.conf +{ + "cniVersion": "0.3.1", + "name": "lo", + "type": "loopback" +} +``` + +## Deploying the kubelet Service + +### Configuration File on Which Kubelet Depends + +```bash +$ cat /etc/kubernetes/pki/kubelet_config.yaml +kind: KubeletConfiguration +apiVersion: kubelet.config.k8s.io/v1beta1 +authentication: + anonymous: + enabled: false + webhook: + enabled: true + x509: + clientCAFile: /etc/kubernetes/pki/ca.pem +authorization: + mode: Webhook +clusterDNS: +- 10.32.0.10 +clusterDomain: cluster.local +runtimeRequestTimeout: "15m" +tlsCertFile: "/etc/kubernetes/pki/k8snode1.pem" +tlsPrivateKeyFile: "/etc/kubernetes/pki/k8snode1-key.pem" +``` + +**Note: The IP address of the cluster DNS is 10.32.0.10, which must be the same as the value of service-cluster-ip-range.** + +### Compiling the systemd Configuration File + +```bash +$ cat /usr/lib/systemd/system/kubelet.service +[Unit] +Description=kubelet: The Kubernetes Node Agent +Documentation=https://kubernetes.io/docs/ +Wants=network-online.target +After=network-online.target + +[Service] +ExecStart=/usr/bin/kubelet \ + --config=/etc/kubernetes/pki/kubelet_config.yaml \ + --network-plugin=cni \ + --pod-infra-container-image=k8s.gcr.io/pause:3.2 \ + --kubeconfig=/etc/kubernetes/pki/k8snode1.kubeconfig \ + --register-node=true \ + --hostname-override=k8snode1 \ + --cni-bin-dir="/usr/libexec/cni/" \ + --v=2 + +Restart=always +StartLimitInterval=0 +RestartSec=10 + +[Install] +WantedBy=multi-user.target +``` + +**Note: If iSulad is used as the runtime, add the following configuration:** + +```bash +--container-runtime=remote \ +--container-runtime-endpoint=unix:///var/run/isulad.sock \ +``` + +## Deploying kube-proxy + +### Configuration File on Which kube-proxy Depends + +```bash +cat /etc/kubernetes/pki/kube_proxy_config.yaml +kind: KubeProxyConfiguration +apiVersion: kubeproxy.config.k8s.io/v1alpha1 +clientConnection: + kubeconfig: /etc/kubernetes/pki/kube-proxy.kubeconfig +clusterCIDR: 10.244.0.0/16 +mode: "iptables" +``` + +### Compiling the systemd Configuration File + +```bash +$ cat /usr/lib/systemd/system/kube-proxy.service +[Unit] +Description=Kubernetes Kube-Proxy Server +Documentation=https://kubernetes.io/docs/reference/generated/kube-proxy/ +After=network.target + +[Service] +EnvironmentFile=-/etc/kubernetes/config +EnvironmentFile=-/etc/kubernetes/proxy +ExecStart=/usr/bin/kube-proxy \ + $KUBE_LOGTOSTDERR \ + $KUBE_LOG_LEVEL \ + --config=/etc/kubernetes/pki/kube_proxy_config.yaml \ + --hostname-override=k8snode1 \ + $KUBE_PROXY_ARGS +Restart=on-failure +LimitNOFILE=65536 + +[Install] +WantedBy=multi-user.target +``` + +## Starting a Component Service + +```bash +systemctl enable kubelet kube-proxy +systemctl start kubelet kube-proxy +``` + +Deploy other nodes in sequence. + +## Verifying the Cluster Status + +Wait for several minutes and run the following command to check the node status: + +```bash +$ kubectl get nodes --kubeconfig /etc/kubernetes/pki/admin.kubeconfig +NAME STATUS ROLES AGE VERSION +k8snode1 Ready 17h v1.20.2 +k8snode2 Ready 19m v1.20.2 +k8snode3 Ready 12m v1.20.2 +``` + +## Deploying coredns + +coredns can be deployed on a node or master node. In this document, coredns is deployed on the `k8snode1` node. + +### Compiling the coredns Configuration File + +```bash +$ cat /etc/kubernetes/pki/dns/Corefile +.:53 { + errors + health { + lameduck 5s + } + ready + kubernetes cluster.local in-addr.arpa ip6.arpa { + pods insecure + endpoint https://192.168.122.154:6443 + tls /etc/kubernetes/pki/ca.pem /etc/kubernetes/pki/admin-key.pem /etc/kubernetes/pki/admin.pem + kubeconfig /etc/kubernetes/pki/admin.kubeconfig default + fallthrough in-addr.arpa ip6.arpa + } + prometheus :9153 + forward . /etc/resolv.conf { + max_concurrent 1000 + } + cache 30 + loop + reload + loadbalance +} +``` + +Note: + +- Listen to port 53. +- Configure the Kubernetes plug-in, including the certificate and the URL of kube api. + +### Preparing the service File of systemd + +```bash +cat /usr/lib/systemd/system/coredns.service +[Unit] +Description=Kubernetes Core DNS server +Documentation=https://github.com/coredns/coredns +After=network.target + +[Service] +ExecStart=bash -c "KUBE_DNS_SERVICE_HOST=10.32.0.10 coredns -conf /etc/kubernetes/pki/dns/Corefile" + +Restart=on-failure +LimitNOFILE=65536 + +[Install] +WantedBy=multi-user.target +``` + +### Starting the Service + +```bash +systemctl enable coredns +systemctl start coredns +``` + +### Creating the Service Object of coredns + +```bash +$ cat coredns_server.yaml +apiVersion: v1 +kind: Service +metadata: + name: kube-dns + namespace: kube-system + annotations: + prometheus.io/port: "9153" + prometheus.io/scrape: "true" + labels: + k8s-app: kube-dns + kubernetes.io/cluster-service: "true" + kubernetes.io/name: "CoreDNS" +spec: + clusterIP: 10.32.0.10 + ports: + - name: dns + port: 53 + protocol: UDP + - name: dns-tcp + port: 53 + protocol: TCP + - name: metrics + port: 9153 + protocol: TCP +``` + +### Creating the Endpoint Object of coredns + +```bash +$ cat coredns_ep.yaml +apiVersion: v1 +kind: Endpoints +metadata: + name: kube-dns + namespace: kube-system +subsets: + - addresses: + - ip: 192.168.122.157 + ports: + - name: dns-tcp + port: 53 + protocol: TCP + - name: dns + port: 53 + protocol: UDP + - name: metrics + port: 9153 + protocol: TCP +``` + +### Confirming the coredns Service + +```bash +# View the service object. +$ kubectl get service -n kube-system kube-dns +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +kube-dns ClusterIP 10.32.0.10 53/UDP,53/TCP,9153/TCP 51m +# View the endpoint object. +$ kubectl get endpoints -n kube-system kube-dns +NAME ENDPOINTS AGE +kube-dns 192.168.122.157:53,192.168.122.157:53,192.168.122.157:9153 52m +``` diff --git a/docs/en/docs/cluster_deployment/kubernetes/deploying_control_plane_components.md b/docs/en/docs/cluster_deployment/kubernetes/deploying_control_plane_components.md new file mode 100644 index 0000000000000000000000000000000000000000..d2ba472203ba134d43257a1da112b8d7f79fe9ac --- /dev/null +++ b/docs/en/docs/cluster_deployment/kubernetes/deploying_control_plane_components.md @@ -0,0 +1,357 @@ +# Deploying Components on the Control Plane + +## Preparing the kubeconfig File for All Components + +### kube-proxy + +```bash +kubectl config set-cluster openeuler-k8s --certificate-authority=/etc/kubernetes/pki/ca.pem --embed-certs=true --server=https://192.168.122.154:6443 --kubeconfig=kube-proxy.kubeconfig +kubectl config set-credentials system:kube-proxy --client-certificate=/etc/kubernetes/pki/kube-proxy.pem --client-key=/etc/kubernetes/pki/kube-proxy-key.pem --embed-certs=true --kubeconfig=kube-proxy.kubeconfig +kubectl config set-context default --cluster=openeuler-k8s --user=system:kube-proxy --kubeconfig=kube-proxy.kubeconfig +kubectl config use-context default --kubeconfig=kube-proxy.kubeconfig +``` + +### kube-controller-manager + +```bash +kubectl config set-cluster openeuler-k8s --certificate-authority=/etc/kubernetes/pki/ca.pem --embed-certs=true --server=https://127.0.0.1:6443 --kubeconfig=kube-controller-manager.kubeconfig +kubectl config set-credentials system:kube-controller-manager --client-certificate=/etc/kubernetes/pki/kube-controller-manager.pem --client-key=/etc/kubernetes/pki/kube-controller-manager-key.pem --embed-certs=true --kubeconfig=kube-controller-manager.kubeconfig +kubectl config set-context default --cluster=openeuler-k8s --user=system:kube-controller-manager --kubeconfig=kube-controller-manager.kubeconfig +kubectl config use-context default --kubeconfig=kube-controller-manager.kubeconfig +``` + +### kube-scheduler + +```bash +kubectl config set-cluster openeuler-k8s --certificate-authority=/etc/kubernetes/pki/ca.pem --embed-certs=true --server=https://127.0.0.1:6443 --kubeconfig=kube-scheduler.kubeconfig +kubectl config set-credentials system:kube-scheduler --client-certificate=/etc/kubernetes/pki/kube-scheduler.pem --client-key=/etc/kubernetes/pki/kube-scheduler-key.pem --embed-certs=true --kubeconfig=kube-scheduler.kubeconfig +kubectl config set-context default --cluster=openeuler-k8s --user=system:kube-scheduler --kubeconfig=kube-scheduler.kubeconfig +kubectl config use-context default --kubeconfig=kube-scheduler.kubeconfig +``` + +### admin + +```bash +kubectl config set-cluster openeuler-k8s --certificate-authority=/etc/kubernetes/pki/ca.pem --embed-certs=true --server=https://127.0.0.1:6443 --kubeconfig=admin.kubeconfig +kubectl config set-credentials admin --client-certificate=/etc/kubernetes/pki/admin.pem --client-key=/etc/kubernetes/pki/admin-key.pem --embed-certs=true --kubeconfig=admin.kubeconfig +kubectl config set-context default --cluster=openeuler-k8s --user=admin --kubeconfig=admin.kubeconfig +kubectl config use-context default --kubeconfig=admin.kubeconfig +``` + +### Obtaining the kubeconfig Configuration File + +```bash +admin.kubeconfig kube-proxy.kubeconfig kube-controller-manager.kubeconfig kube-scheduler.kubeconfig +``` + +## Configuration for Generating the Key Provider + +When api-server is started, a key pair `--encryption-provider-config=/etc/kubernetes/pki/encryption-config.yaml` needs to be provided. In this document, a key pair `--encryption-provider-config=/etc/kubernetes/pki/encryption-config.yaml` is generated by using urandom: + +```bash +$ cat generate.bash +#!/bin/bash + +ENCRYPTION_KEY=$(head -c 32 /dev/urandom | base64) + +cat > encryption-config.yaml < [!NOTE]NOTE +> +> - When a cluster is deleted, all data in the cluster is deleted and cannot be restored. Exercise caution when performing this operation. +> - Currently, dismantling a cluster does not delete the containers and the container images. However, if the Kubernetes cluster is configured to install a container engine during the deployment, the container engine will be deleted. As a result, the containers may run abnormally. +> - Some error information may be displayed when dismantling the cluster. Generally, this is caused by the error results returned during the delete operations. The cluster can still be properly dismantled. + +You can use the command line to delete the entire cluster. For example, run the following command to delete the k8s-cluster: + +```shell +eggo -d cleanup --id k8s-cluster +``` diff --git a/docs/en/docs/cluster_deployment/kubernetes/eggo_tool_introduction.md b/docs/en/docs/cluster_deployment/kubernetes/eggo_tool_introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..544be1f424d3ae64bc9960449b03c46ac680df1f --- /dev/null +++ b/docs/en/docs/cluster_deployment/kubernetes/eggo_tool_introduction.md @@ -0,0 +1,429 @@ +# Tool Introduction + +This chapter describes the information related to the automatic deployment tool. You are advised to read this chapter before deployment. + +## Deployment Modes + +The automatic Kubernetes cluster deployment tool provided by openEuler supports one-click deployment using the CLI. The tool provides the following deployment modes: + +- Offline deployment: Prepare all required RPM packages, binary files, plugins, and container images on the local host, pack the packages into a tar.gz file in a specified format, and compile the corresponding YAML configuration file. Then, you can run commands to deploy the cluster in one-click. This deployment mode can be used when the VM cannot access the external network. +- Online deployment: Compile the YAML configuration file. The required RPM packages, binary files, plugins, and container images are automatically downloaded from the Internet during installation and deployment. In this mode, the VM must be able to access the software sources and the image repository on which the cluster depends, for example, Docker Hub. + +## Configurations + +When you use the automatic Kubernetes cluster deployment tool, use the YAML configuration file to describe the cluster deployment information. This section describes the configuration items and provides configuration examples. + +### Configuration Items + +- cluster-id: Cluster name, which must comply with the naming rules for the DNS names. Example: k8s-cluster + +- username: User name used to log in to the hosts using SSH where the Kubernetes cluster is to be deployed. The user name must be identical on all hosts. + +- private-key-path:The path of the key for password-free SSH login. You only need to configure either private-key-path or password. If both are configured, private-key-path is used preferentially. + +- masters: The master node list. It is recommended that each master node is also set as a worker node. Each master node contains the following sub-items. Each master node must be configured with a group of sub-items: + - name: The name of the master node, which is the node name displayed to the Kubernetes cluster. + - ip: The IP address of the master node. + - port: The port for SSH login of the node. The default value is 22. + - arch: CPU architecture of the master node. For example, the value for x86_64 CPUs is amd64. + +- workers: The list of the worker nodes. Each worker node contains the following sub-items. Each worker node must be configured with a group of sub-items: + - name: The name of the worker node, which is the node name displayed to the Kubernetes cluster. + - ip: The IP address of the master node. + - port: The port for SSH login of the node. The default value is 22. + - arch: CPU architecture of the worker node. For example, the value for x86_64 CPUs is amd64. + +- etcds: The list of etcd nodes. If this parameter is left empty, one etcd node is deployed for each master node. Otherwise, only the configured etcd node is deployed. Each etcd node contains the following sub-items. Each etcd node must be configured with a group of sub-items: + - name: The name of the etcd node, which is the node name displayed to the Kubernetes cluster. + - ip: The IP address of the etcd node. + - port: The port for SSH login. + - arch: CPU architecture of the etcd node. For example, the value for x86_64 CPUs is amd64. + +- loadbalance: The loadbalance node list. Each loadbalance node contains the following sub-items. Each loadbalance node must be configured with a group of sub-items: + - name: The name of the loadbalance node, which is the node name displayed to the Kubernetes cluster. + - ip: The IP address of the loadbalance node. + - port: The port for SSH login. + - arch: CPU architecture of the loadbalance node. For example, the value for x86_64 CPUs is amd64. + - bind-port: The listening port of the load balancing service. + +- external-ca: Whether to use an external CA certificate. If yes, set this parameter to true. Otherwise, set this parameter to false. + +- external-ca-path: The path of the external CA certificate file. This parameter takes affect only when external-ca is set to true. + +- service: service information created by Kubernetes. The service configuration item contains the following sub-items: + - cidr: The IP address segment of the service created by Kubernetes. + - dnsaddr: DNS address of the service created by Kubernetes + - gateway: The gateway address of the service created by Kubernetes. + - dns: The configuration item of the CoreDNS created by Kubernetes. The dns configuration item contains the following sub-items: + - corednstype: The deployment type of the CoreDNS created by Kubernetes. The value can be pod or binary. + - imageversion: The CoreDNS image version of the pod deployment type. + - replicas: The number of CoreDNS replicas of the pod deployment type. + +- network: The network configuration of the Kubernetes cluster. The network configuration item contains the following sub-items: + - podcidr: IP address segment of the Kubernetes cluster network. + - plugin: The network plugin deployed in the Kubernetes cluster + - plugin-args: The configuration file path of the network plugin of the Kubernetes cluster network. Example: {"NetworkYamlPath": "/etc/kubernetes/addons/calico.yaml"} + +- apiserver-endpoint: The IP address or domain name of the APIServer service that can be accessed by external systems. If loadbalance is configured, set this parameter to the IP address of the loadbalance node. Otherwise, set this parameter to the IP address of the first master node. + +- apiserver-cert-sans: The IP addresses and domain names that need to be configured in the APIServer certificate. This configuration item contains the following sub-items: + - dnsnames: The array list of the domain names that need to be configured in the APIServer certificate. + - ips: The array list of IP addresses that need to be configured in the APIServer certificate. + +- apiserver-timeout: APIServer response timeout interval. + +- etcd-token: The etcd cluster name. + +- dns-vip: The virtual IP address of the DNS. + +- dns-domain: The DNS domain name suffix. + +- pause-image: The complete image name of the pause container. + +- network-plugin: The type of the network plugin. This parameter can only be set to cni. If this item is not configured, the default Kubernetes network is used. + +- cni-bin-dir: network plugin address. Use commas (,) to separate multiple addresses. For example: /usr/libexec/cni,/opt/cni/bin. + +- runtime: The type of the container runtime. Currently, docker and iSulad are supported. + +- runtime-endpoint: The endpoint of the container runtime. This parameter is optional when runtime is set to docker. + +- registry-mirrors: The mirror site address of the image repository used for downloading container images. + +- insecure-registries: The address of the image repository used for downloading container images through HTTP. + +- config-extra-args: The extra parameters for starting services of each component (such as kube-apiserver and etcd). This configuration item contains the following sub-items: + - name: The component name. The value can be etcd, kube-apiserver, kube-controller-manager, kube-scheduler, kube-proxy or kubelet. + + - extra-args: The extended parameters of the component. The format is key: value. Note that the component parameter corresponding to key must be prefixed with a hyphen (-) or two hyphens (--). + + - open-ports: Configure the ports that need to be enabled additionally. The ports required by Kubernetes do not need to be configured. Other plugin ports need to be configured additionally. + - worker | master | etcd | loadbalance: The type of the node where the ports are enabled. Each configuration item contains one or more port and protocol sub-items. + - port: The port address. + - protocol: The port type. The value can be tcp or udp. + + - install: Configure the detailed information about the installation packages or binary files to be installed on each type of nodes. Note that the corresponding files must be packaged in a tar.gz installation package. The following describes the full configuration. Select the configuration items as needed. + - package-source: The detailed information about the installation package. + - type: The compression type of the installation package. Currently, only tar.gz installation packages are supported. + - dstpath: The path where the installation package is to be decompressed on the peer host. The path must be valid absolute path. + - srcpath: The path for storing the installation packages of different architectures. The architecture must correspond to the host architecture. The path must be a valid absolute path. + - arm64: The path of the installation package of the ARM64 architecture. This parameter is required if any ARM64 node is included in the configuration. + - amd64: The path of the installation package of the AMD64 architecture. This parameter is required if any x86_64 node is included in the configuration. + > [!NOTE]**NOTE**: + > + > - In the install configuration item, the sub-items of etcd, kubernetes-master, kubernetes-worker, network, loadbalance, container, image, and dns are the same, that is, name, type, dst, schedule, and TimeOut. dst, schedule, and TimeOut are optional. You can determine whether to configure them based on the files to be installed. The following uses the etcd and kubernetes-master nodes as an example. + - etcd: The list of packages or binary files to be installed on etcd nodes. + - name: The names of the software packages or binary files to be installed. If the software package is an installation package, enter only the name and do not specify the version. During the installation, `$name*` is used for identification. Example: etcd. If there are multiple software packages, use commas (,) to separate them. + - type: The type of the configuration item. The value can be pkg, repo, bin, file, dir, image, yaml, or shell. If type is set to repo, configure the repo source on the corresponding node. + - dst: The path of the destination folder. This parameter is required when type is set to bin, file, or dir. It indicates the directory where a file or folder is stored. To prevent users from incorrectly configuring a path and deleting important files during cleanup, this parameter must be set to a path in the whitelist. For details, see "Whitelist Description." + - kubernetes-master: The list of packages or binary files to be installed on the Kubernetes master nodes. + - kubernetes-worker: The list of packages or binary files to be installed on the Kubernetes worker nodes. + - network: The list of packages or binary files to be installed for the network. + - loadbalance: The list of packages or binary files to be installed on the loadbalance nodes. + - container: The list of packages or binary files to be installed for the containers. + - image: The tar package of the container image. + - dns: Kubernetes CoreDNS installation package. If corednstype is set to pod, this parameter is not required. + - addition: The list of additional installation packages or binary files. + - master: The following configurations will be installed on all master nodes. + - name: The name of the software package or binary file to be installed. + - type: The type of the configuration item. The value can be pkg, repo, bin, file, dir, image, yaml, or shell. If type is set to repo, configure the repo source on the corresponding node. + - schedule: Valid only when type is set to shell. This parameter indicates when the user wants to execute the script. The value can be prejoin (before the node is added), postjoin (after the node is added), precleanup (before the node is removed), or postcleanup (after the node is removed). + - TimeOut: The script execution timeout interval. If the execution times out, the process is forcibly stopped. The default value is 30s. + - worker: The configurations will be installed on all worker nodes. The configuration format is the same as that of master under addition. + +### Whitelist Description + +The value of dst under install must match the whitelist rules. Set it to a path in the whitelist or a subdirectory of the path. The current whitelist is as follows: + +- /usr/bin +- /usr/local/bin +- /opt/cni/bin +- /usr/libexec/cni +- /etc/kubernetes +- /usr/lib/systemd/system +- /etc/systemd/system +- /tmp + +### Configuration Example + +The following is an example of the YAML file configuration. As shown in the example, nodes of different types can be deployed on a same host, but the configurations of these nodes must be the same. For example, a master node and a worker node are deployed on test0. + +```yaml +cluster-id: k8s-cluster +username: root +private-key-path: /root/.ssh/private.key +masters: +- name: test0 + ip: 192.168.0.1 + port: 22 + arch: arm64 +workers: +- name: test0 + ip: 192.168.0.1 + port: 22 + arch: arm64 +- name: test1 + ip: 192.168.0.3 + port: 22 + arch: arm64 +etcds: +- name: etcd-0 + ip: 192.168.0.4 + port: 22 + arch: amd64 +loadbalance: + name: k8s-loadbalance + ip: 192.168.0.5 + port: 22 + arch: amd64 + bind-port: 8443 +external-ca: false +external-ca-path: /opt/externalca +service: + cidr: 10.32.0.0/16 + dnsaddr: 10.32.0.10 + gateway: 10.32.0.1 + dns: + corednstype: pod + imageversion: 1.8.4 + replicas: 2 +network: + podcidr: 10.244.0.0/16 + plugin: calico + plugin-args: {"NetworkYamlPath": "/etc/kubernetes/addons/calico.yaml"} +apiserver-endpoint: 192.168.122.222:6443 +apiserver-cert-sans: + dnsnames: [] + ips: [] +apiserver-timeout: 120s +etcd-external: false +etcd-token: etcd-cluster +dns-vip: 10.32.0.10 +dns-domain: cluster.local +pause-image: k8s.gcr.io/pause:3.2 +network-plugin: cni +cni-bin-dir: /usr/libexec/cni,/opt/cni/bin +runtime: docker +runtime-endpoint: unix:///var/run/docker.sock +registry-mirrors: [] +insecure-registries: [] +config-extra-args: + - name: kubelet + extra-args: + "--cgroup-driver": systemd +open-ports: + worker: + - port: 111 + protocol: tcp + - port: 179 + protocol: tcp +install: + package-source: + type: tar.gz + dstpath: "" + srcpath: + arm64: /root/rpms/packages-arm64.tar.gz + amd64: /root/rpms/packages-x86.tar.gz + etcd: + - name: etcd + type: pkg + dst: "" + kubernetes-master: + - name: kubernetes-client,kubernetes-master + type: pkg + kubernetes-worker: + - name: docker-engine,kubernetes-client,kubernetes-node,kubernetes-kubelet + type: pkg + dst: "" + - name: conntrack-tools,socat + type: pkg + dst: "" + network: + - name: containernetworking-plugins + type: pkg + dst: "" + loadbalance: + - name: gd,gperftools-libs,libunwind,libwebp,libxslt + type: pkg + dst: "" + - name: nginx,nginx-all-modules,nginx-filesystem,nginx-mod-http-image-filter,nginx-mod-http-perl,nginx-mod-http-xslt-filter,nginx-mod-mail,nginx-mod-stream + type: pkg + dst: "" + container: + - name: emacs-filesystem,gflags,gpm-libs,re2,rsync,vim-filesystem,vim-common,vim-enhanced,zlib-devel + type: pkg + dst: "" + - name: libwebsockets,protobuf,protobuf-devel,grpc,libcgroup + type: pkg + dst: "" + - name: yajl,lxc,lxc-libs,lcr,clibcni,iSulad + type: pkg + dst: "" + image: + - name: pause.tar + type: image + dst: "" + dns: + - name: coredns + type: pkg + dst: "" + addition: + master: + - name: prejoin.sh + type: shell + schedule: "prejoin" + TimeOut: "30s" + - name: calico.yaml + type: yaml + dst: "" + worker: + - name: docker.service + type: file + dst: /usr/lib/systemd/system/ + - name: postjoin.sh + type: shell + schedule: "postjoin" +``` + +### Installation Package Structure + +For offline deployment, you need to prepare the Kubernetes software package and the related offline installation packages, and store the offline installation packages in a specific directory structure. The directory structure is as follows: + +```shell +package +├── bin +├── dir +├── file +├── image +├── pkg +└── packages_notes.md +``` + +The preceding directories are described as follows: + +- The directory structure of the offline deployment package corresponds to the package types in the cluster configuration file config. The package types include pkg, repo, bin, file, dir, image, yaml and shell. + +- The bin directory stores binary files, corresponding to the bin package type. + +- The dir directory stores the directory that needs to be copied to the target host. You need to configure the dst destination path, corresponding to the dir package type. + +- The file directory stores three types of files: file, yaml, and shell. The file type indicates the files to be copied to the target host, and requires the dst destination path to be configured. The yaml type indicates the user-defined YAML files, which will be applied after the cluster is deployed. The shell type indicates the scripts to be executed, and requires the schedule execution time to be configured. The execution time includes prejoin (before the node is added), postjoin (after the node is added), precleanup (before the node is removed), and postcleanup (after the node is removed). + +- The image directory stores the container images to be imported. The container images must be in a tar package format that is compatible with Docker (for example, images exported by Docker or isula-build). + +- The pkg directory stores the rpm/deb packages to be installed, corresponding to the pkg package type. You are advised to use binary files to facilitate cross-release deployment. + +### Command Reference + +To utilize the cluster deployment tool provided by openEuler, use the eggo command to deploy the cluster. + +#### Deploying the Kubernetes Cluster + +Run the following command to deploy a Kubernetes cluster using the specified YAML configuration: + +**eggo deploy** \[ **-d** ] **-f** *deploy.yaml* + +| Parameter| Mandatory (Yes/No)| Description | +| ------------- | -------- | --------------------------------- | +| --debug \| -d | No| Displays the debugging information.| +| --file \| -f | Yes| Specifies the path of the YAML file for the Kubernetes cluster deployment.| + +#### Adding a Single Node + +Run the following command to add a specified single node to the Kubernetes cluster: + +**eggo** **join** \[ **-d** ] **--id** *k8s-cluster* \[ **--type** *master,worker* ] **--arch** *arm64* **--port** *22* \[ **--name** *master1*] *IP* + +| Parameter| Mandatory (Yes/No) | Description| +| ------------- | -------- | ------------------------------------------------------------ | +| --debug \| -d | No| Displays the debugging information.| +| --id | Yes| Specifies the name of the Kubernetes cluster where the node is to be added.| +| --type \| -t | No| Specifies the type of the node to be added. The value can be master or worker. Use commas (,) to separate multiple types. The default value is worker.| +| --arch \| -a | Yes| Specifies the CPU architecture of the node to be added.| +| --port \| -p | Yes| Specifies the port number for SSH login of the node to be added.| +| --name \| -n | No| Specifies the name of the node to be added.| +| *IP* | Yes| Actual IP address of the node to be added.| + +#### Adding Multiple Nodes + +Run the following command to add specified multiple nodes to the Kubernetes cluster: + +**eggo** **join** \[ **-d** ] **--id** *k8s-cluster* **-f** *nodes.yaml* + +| Parameter| Mandatory (Yes/No) | Description | +| ------------- | -------- | -------------------------------- | +| --debug \| -d | No| Displays the debugging information.| +| --id | Yes| Specifies the name of the Kubernetes cluster where the nodes are to be added.| +| --file \| -f | Yes| Specifies the path of the YAML configuration file for adding the nodes.| + +#### Deleting Nodes + +Run the following command to delete one or more nodes from the Kubernetes cluster: + +**eggo delete** \[ **-d** ] **--id** *k8s-cluster* *node* \[*node...*] + +| Parameter| Mandatory (Yes/No) | Description | +| ------------- | -------- | -------------------------------------------- | +| --debug \| -d | No| Displays the debugging information.| +| --id | Yes| Specifies the name of the cluster where the one or more nodes to be deleted are located.| +| *node* | Yes| Specifies the IP addresses or names of the one or more nodes to be deleted.| + +#### Deleting the Cluster + +Run the following command to delete the entire Kubernetes cluster: + +**eggo cleanup** \[ **-d** ] **--id** *k8s-cluster* \[ **-f** *deploy.yaml* ] + +| Parameter| Mandatory (Yes/No) | Description| +| ------------- | -------- | ------------------------------------------------------------ | +| --debug \| -d | No| Displays the debugging information.| +| --id | Yes| Specifies the name of the Kubernetes cluster to be deleted.| +| --file \| -f | No| Specifies the path of the YAML file for the Kubernetes cluster deletion. If this parameter is not specified, the cluster configuration cached during cluster deployment is used by default. In normal cases, you are advised not to set this parameter. Set this parameter only when an exception occurs.| + +> [!NOTE]**NOTE**: +> +> - The cluster configuration cached during cluster deployment is recommended when you delete the cluster. That is, you are advised not to set the `--file | -f` parameter in normal cases. Set this parameter only when the cache configuration is damaged or lost due to an exception. + +#### Querying the Cluster + +Run the following command to query all Kubernetes clusters deployed using eggo: + +**eggo list** \[ **-d** ] + +| Parameter| Mandatory (Yes/No) | Description | +| ------------- | -------- | ------------ | +| --debug \| -d | No| Displays the debugging information.| + +#### Generating the Cluster Configuration File + +Run the following command to quickly generate the required YAML configuration file for the Kubernetes cluster deployment. + +**eggo template** **-d** **-f** *template.yaml* **-n** *k8s-cluster* **-u** *username* **-p** *password* **--etcd** \[*192.168.0.1,192.168.0.2*] **--masters** \[*192.168.0.1,192.168.0.2*] **--workers** *192.168.0.3* **--loadbalance** *192.168.0.4* + +| Parameter| Mandatory (Yes/No) | Description | +| ------------------- | -------- | ------------------------------- | +| --debug \| -d | No| Displays the debugging information.| +| --file \| -f | No| Specifies the path of the generated YAML file.| +| --name \| -n | No| Specifies the name of the Kubernetes cluster.| +| --username \| -u | No| Specifies the user name for SSH login of the configured node.| +| --password \| -p | No| Specifies the password for SSH login of the configured node.| +| --etcd | No| Specifies the IP address list of the etcd nodes.| +| --masters | No| Specifies the IP address list of the master nodes.| +| --workers | No| Specifies the IP address list of the worker nodes.| +| --loadbalance \| -l | No| Specifies the IP address of the loadbalance node.| + +#### Querying the Help Information + +Run the following command to query the help information of the eggo command: + + **eggo help** + +#### Querying the Help Information of Subcommands + +Run the following command to query the help information of the eggo subcommands: + +**eggo deploy | join | delete | cleanup | list | template -h** + +| Parameter| Mandatory (Yes/No) | Description | +| ----------- | -------- | ------------ | +| --help\| -h | Yes| Displays the help information.| diff --git a/docs/en/docs/cluster_deployment/kubernetes/figures/advertiseAddress.png b/docs/en/docs/cluster_deployment/kubernetes/figures/advertiseAddress.png new file mode 100644 index 0000000000000000000000000000000000000000..b36e5c4664f2d2e5faaa23128fd4711c11e30179 Binary files /dev/null and b/docs/en/docs/cluster_deployment/kubernetes/figures/advertiseAddress.png differ diff --git a/docs/en/docs/cluster_deployment/kubernetes/figures/arch.png b/docs/en/docs/cluster_deployment/kubernetes/figures/arch.png new file mode 100644 index 0000000000000000000000000000000000000000..650e4a67959136e12e49975196a4f3af28e6170e Binary files /dev/null and b/docs/en/docs/cluster_deployment/kubernetes/figures/arch.png differ diff --git a/docs/en/docs/cluster_deployment/kubernetes/figures/flannelConfig.png b/docs/en/docs/cluster_deployment/kubernetes/figures/flannelConfig.png new file mode 100644 index 0000000000000000000000000000000000000000..dc9e7c665edd02fad16d3e6f4970e3125efcbef8 Binary files /dev/null and b/docs/en/docs/cluster_deployment/kubernetes/figures/flannelConfig.png differ diff --git a/docs/en/docs/cluster_deployment/kubernetes/figures/name.png b/docs/en/docs/cluster_deployment/kubernetes/figures/name.png new file mode 100644 index 0000000000000000000000000000000000000000..dd6ddfdc3476780e8c896bfd5095025507f62fa8 Binary files /dev/null and b/docs/en/docs/cluster_deployment/kubernetes/figures/name.png differ diff --git a/docs/en/docs/cluster_deployment/kubernetes/figures/podSubnet.png b/docs/en/docs/cluster_deployment/kubernetes/figures/podSubnet.png new file mode 100644 index 0000000000000000000000000000000000000000..b368f77dd7dfd7722dcf7751b3e37ec28755e42d Binary files /dev/null and b/docs/en/docs/cluster_deployment/kubernetes/figures/podSubnet.png differ diff --git a/docs/en/docs/cluster_deployment/kubernetes/installing_etcd.md b/docs/en/docs/cluster_deployment/kubernetes/installing_etcd.md new file mode 100644 index 0000000000000000000000000000000000000000..1f93585d6945dab27e4ae96645e0f531131b15a6 --- /dev/null +++ b/docs/en/docs/cluster_deployment/kubernetes/installing_etcd.md @@ -0,0 +1,88 @@ +# Installing etcd + +## Preparing the Environment + +Run the following command to enable the port used by etcd: + +```bash +firewall-cmd --zone=public --add-port=2379/tcp +firewall-cmd --zone=public --add-port=2380/tcp +``` + +## Installing the etcd Binary Package + +Currently, the RPM package is used for installation. + +```bash +rpm -ivh etcd*.rpm +``` + +Prepare the directories. + +```bash +mkdir -p /etc/etcd /var/lib/etcd +cp ca.pem /etc/etcd/ +cp kubernetes-key.pem /etc/etcd/ +cp kubernetes.pem /etc/etcd/ +# Disabling SELinux +setenforce 0 +# Disabling the Default Configuration of the /etc/etcd/etcd.conf File +# Commenting Out the Line, for example, ETCD_LISTEN_CLIENT_URLS="http://localhost:2379". +``` + +## Compiling the etcd.service File + +The following uses the `k8smaster0` machine as an example: + +```bash +$ cat /usr/lib/systemd/system/etcd.service +[Unit] +Description=Etcd Server +After=network.target +After=network-online.target +Wants=network-online.target + +[Service] +Type=notify +WorkingDirectory=/var/lib/etcd/ +EnvironmentFile=-/etc/etcd/etcd.conf +# set GOMAXPROCS to number of processors +ExecStart=/bin/bash -c "ETCD_UNSUPPORTED_ARCH=arm64 /usr/bin/etcd --name=k8smaster0 --cert-file=/etc/etcd/kubernetes.pem --key-file=/etc/etcd/kubernetes-key.pem --peer-cert-file=/etc/etcd/kubernetes.pem --peer-key-file=/etc/etcd/kubernetes-key.pem --trusted-ca-file=/etc/etcd/ca.pem --peer-trusted-ca-file=/etc/etcd/ca.pem --peer-client-cert-auth --client-cert-auth --initial-advertise-peer-urls https://192.168.122.154:2380 --listen-peer-urls https://192.168.122.154:2380 --listen-client-urls https://192.168.122.154:2379,https://127.0.0.1:2379 --advertise-client-urls https://192.168.122.154:2379 --initial-cluster-token etcd-cluster-0 --initial-cluster k8smaster0=https://192.168.122.154:2380,k8smaster1=https://192.168.122.155:2380,k8smaster2=https://192.168.122.156:2380 --initial-cluster-state new --data-dir /var/lib/etcd" + +Restart=always +RestartSec=10s +LimitNOFILE=65536 + +[Install] +WantedBy=multi-user.target +``` + +**Note:** + +- The boot setting `ETCD_UNSUPPORTED_ARCH=arm64` needs to be added to ARM64; +- In this document, etcd and Kubernetes control are deployed on the same machine. Therefore, the `kubernetes.pem` and `kubernetes-key.pem` certificates are used to start etcd and Kubernetes control. +- A CA certificate is used in the entire deployment process. etcd can generate its own CA certificate and use its own CA certificate to sign other certificates. However, the certificate signed by the CA certificate needs to be used when the APIServer accesses the etcd client. +- `initial-cluster` needs to be added to all configurations for deploying etcd. +- To improve the storage efficiency of etcd, you can use the directory of the SSD as `data-dir`. + +Start the etcd service. + +```bash +systemctl enable etcd +systemctl start etcd +``` + +Then, deploy other hosts in sequence. + +## Verifying Basic Functions + +```bash +$ ETCDCTL_API=3 etcdctl -w table endpoint status --endpoints=https://192.168.122.155:2379,https://192.168.122.156:2379,https://192.168.122.154:2379 --cacert=/etc/etcd/ca.pem --cert=/etc/etcd/kubernetes.pem --key=/etc/etcd/kubernetes-key.pem ++------------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+ +| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFTAPPLIED INDEX | ERRORS | ++------------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+ +| https://192.168.122.155:2379 | b50ec873e253ebaa | 3.4.14 | 262 kB | false | false | 819 | 21 | 21 | | +| https://192.168.122.156:2379 | e2b0d126774c6d02 | 3.4.14 | 262 kB | true | false | 819 | 21 | 21 | | +| https://192.168.122.154:2379 | f93b3808e944c379 | 3.4.14 | 328 kB | false | false | 819 | 21 | 21 | | ++------------------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+ +``` diff --git a/docs/en/docs/cluster_deployment/kubernetes/installing_the_kubernetes_software_package.md b/docs/en/docs/cluster_deployment/kubernetes/installing_the_kubernetes_software_package.md new file mode 100644 index 0000000000000000000000000000000000000000..1b9d8350d0a024530bf186f6d27537c78b28fd15 --- /dev/null +++ b/docs/en/docs/cluster_deployment/kubernetes/installing_the_kubernetes_software_package.md @@ -0,0 +1,11 @@ +# Installing the Kubernetes Software Package + +```bash +dnf install -y docker conntrack-tools socat +``` + +After the EPOL source is configured, you can directly install Kubernetes through DNF. + +```bash +dnf install kubernetes +``` diff --git a/docs/en/docs/cluster_deployment/kubernetes/kubernetes_containerd.md b/docs/en/docs/cluster_deployment/kubernetes/kubernetes_containerd.md new file mode 100644 index 0000000000000000000000000000000000000000..30f327f2eb9fe4f0fd7105707f6c1539c5be996e --- /dev/null +++ b/docs/en/docs/cluster_deployment/kubernetes/kubernetes_containerd.md @@ -0,0 +1,308 @@ +# Kubernetes Cluster Deployment Guide Based on containerd + +Starting from version 1.21, Kubernetes no longer supports the Kubernetes+Docker setup for cluster deployment. This guide demonstrates how to quickly set up a Kubernetes cluster using containerd as the container runtime. For custom cluster configurations, consult the [official documentation](https://kubernetes.io/docs/home/). + +## Software Package Installation + +### 1. Installing Required Packages + +```sh +yum install -y containerd +yum install -y kubernetes* +yum install -y cri-tools +``` + +> [!NOTE]**Note** +> +> - If Docker is already installed on the system, uninstall it before installing containerd to prevent conflicts. + +The required containerd version is 1.6.22-15 or higher. If the installed version is not supported, upgrade to version 1.6.22-15 using the following commands, or perform a manual upgrade. + +```sh +wget --no-check-certificate https://repo.openeuler.org/openEuler-24.03-LTS/update/x86_64/Packages/containerd-1.6.22-15.oe2403.x86_64.rpm +rpm -Uvh containerd-1.6.22-15.oe2403.x86_64.rpm +``` + +The package versions downloaded via `yum` in this guide are: + +```text +1. containerd + - Architecture: x86_64 + - Version: 1.6.22-15 +2. kubernetes - client/help/kubeadm/kubelet/master/node + - Architecture: x86_64 + - Version: 1.29.1-4 +3. cri-tools + - Architecture: X86_64 + - Version: 1.29.0-3 +``` + +### 2. Downloading CNI Components + +```sh +mkdir -p /opt/cni/bin +cd /opt/cni/bin +wget --no-check-certificate https://github.com/containernetworking/plugins/releases/download/v1.5.1/cni-plugins-linux-amd64-v1.5.1.tgz +tar -xzvf ./cni-plugins-linux-amd64-v1.5.1.tgz -C . +``` + +> [!NOTE]**Note** +> +> - The provided download link is for the AMD64 architecture. Choose the appropriate version based on your system architecture. Other versions are available in the [GitHub repository](https://github.com/containernetworking/plugins/releases/). + +### 3. Downloading CNI Plugin (Flannel) + +```sh +wget https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml --no-check-certificate +``` + +## Environment Configuration + +This section configures the OS environment required for Kubernetes. + +### 1. Setting the Host Name + +```sh +hostnamectl set-hostname nodeName +``` + +### 2. Configuring the Firewall + +**Method 1:** + +Configure firewall rules to open ports for etcd and the API Server, ensuring proper communication between the control plane and worker nodes. + +Open ports for etcd: + +```sh +firewall-cmd --zone=public --add-port=2379/tcp --permanent +firewall-cmd --zone=public --add-port=2380/tcp --permanent +``` + +Open ports for the API Server: + +```sh +firewall-cmd --zone=public --add-port=6443/tcp --permanent +``` + +Apply the firewall rules: + +```sh +firewall-cmd --reload +``` + +> [!NOTE]**Note** +> +> - Firewall configuration may prevent certain container images from functioning properly. To ensure smooth operation, open the necessary ports based on the images being used. + +**Method 2:** + +Disable the firewall using the following commands: + +```sh +systemctl stop firewalld +systemctl disable firewalld +``` + +### 3. Disabling SELinux + +SELinux security policies may block certain operations within containers, such as writing to specific directories, accessing network resources, or executing privileged operations. This can cause critical services like CoreDNS to fail, resulting in `CrashLoopBackOff` or `Error` states. Disable SELinux using the following commands: + +```sh +setenforce 0 +sed -i "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config +``` + +### 4. Disabling Swap + +The Kubernetes scheduler allocates pods to nodes based on available memory and CPU resources. If swap is enabled on a node, the actual physical memory and logically available memory may not align, which can affect the scheduler decisions, leading to node overloading or incorrect scheduling. Therefore, disable swap: + +```sh +swapoff -a +sed -ri 's/.*swap.*/#&/' /etc/fstab +``` + +### 5. Configuring the Network + +Enable IPv6 and IPv4 traffic filtering on bridged networks using iptables, and enable IP forwarding to ensure inter-pod communication across nodes: + +```sh +$ cat > /etc/sysctl.d/k8s.conf << EOF +net.bridge.bridge-nf-call-ip6tables = 1 +net.bridge.bridge-nf-call-iptables = 1 +net.ipv4.ip_forward = 1 +vm.swappiness=0 +EOF +$ modprobe br_netfilter +$ sysctl -p /etc/sysctl.d/k8s.conf +``` + +## Configuring containerd + +This section configures containerd, including setting the pause image, cgroup driver, disabling certificate verification for the `registry.k8s.io` image repository, and configuring a proxy. + +First, generate the default configuration file for containerd and output it to the file specified by `containerd_conf`: + +```sh +containerd_conf="/etc/containerd/config.toml" +mkdir -p /etc/containerd +containerd config default > "${containerd_conf}" +``` + +Configure the pause image: + +```sh +pause_img=$(kubeadm config images list | grep pause | tail -1) +sed -i "/sandbox_image/s#\".*\"#\"${pause_img}\"#" "${containerd_conf}" +``` + +Set the cgroup driver to systemd: + +```sh +sed -i "/SystemdCgroup/s/=.*/= true/" "${containerd_conf}" +``` + +Disable certificate verification for the `registry.k8s.io` image repository: + +```sh +sed -i '/plugins."io.containerd.grpc.v1.cri".registry.configs/a\[plugins."io.containerd.grpc.v1.cri".registry.configs."registry.k8s.io".tls]\n insecure_skip_verify = true' /etc/containerd/config.toml +``` + +Configure the proxy (replace "***" in `HTTP_PROXY`, `HTTPS_PROXY`, and `NO_PROXY` with your proxy information): + +```sh +$ server_path="/etc/systemd/system/containerd.service.d" +$ mkdir -p "${server_path}" +$ cat > "${server_path}"/http-proxy.conf << EOF +[Service] +Environment="HTTP_PROXY=***" +Environment="HTTPS_PROXY=***" +Environment="NO_PROXY=***" +EOF +``` + +Restart containerd to apply the configurations: + +```sh +systemctl daemon-reload +systemctl restart containerd +``` + +## Configuring crictl to Use containerd as the Container Runtime + +```sh +crictl config runtime-endpoint unix:///run/containerd/containerd.sock +crictl config image-endpoint unix:///run/containerd/containerd.sock +``` + +## Configuring kubelet to Use systemd as the Cgroup Driver + +```sh +systemctl enable kubelet.service +echo 'KUBELET_EXTRA_ARGS="--runtime-cgroups=/systemd/system.slice --kubelet-cgroups=/systemd/system.slice"' >> /etc/sysconfig/kubelet +systemctl restart kubelet +``` + +## Creating a Cluster Using Kubeadm (Control Plane Only) + +### 1. Configuring Cluster Information + +```sh +kubeadm config print init-defaults --component-configs KubeletConfiguration >> kubeletConfig.yaml +vim kubeletConfig.yaml +``` + +In the **kubeletConfig.yaml** file, configure the node name, advertise address (`advertiseAddress`), and the CIDR for the Pod network. + +**Modify `name` to match the hostname, consistent with the first step in the environment configuration:** + +![](./figures/name.png) + +**Change `advertiseAddress` to the IP address of the control plane:** + +![](./figures/advertiseAddress.png) + +**Add `podSubnet` under `Networking` to specify the CIDR range:** + +![](./figures/podSubnet.png) + +### 2. Deploying the Cluster + +Use `kubeadm` to deploy the cluster. Many configurations are generated by default (such as authentication certificates). Refer to the [official documentation](https://kubernetes.io/docs/home/) for modifications. + +**Disable the proxy (if applicable):** + +```sh +unset http_proxy https_proxy +``` + +Deploy the cluster using `kubeadm init`: + +```sh +kubeadm init --config kubeletConfig.yaml +``` + +Specify the configuration file for `kubectl`: + +```sh +mkdir -p "$HOME"/.kube +cp -i /etc/kubernetes/admin.conf "$HOME"/.kube/config +chown "$(id -u)":"$(id -g)" "$HOME"/.kube/config +export KUBECONFIG=/etc/kubernetes/admin.conf +``` + +### 3. Deploying the CNI Plugin (Flannel) + +This tutorial uses Flannel as the CNI plugin. Below are the steps to download and deploy Flannel. + +The Flannel used here is downloaded from the `registry-1.docker.io` image repository. To avoid certificate verification issues, configure the image repository to skip certificate verification in the containerd configuration file (**/etc/containerd/config.toml**). + +![](./figures/flannelConfig.png) + +Use `kubectl apply` to deploy the **kube-flannel.yml** file downloaded during the software package installation. + +```sh +kubectl apply -f kube-flannel.yml +``` + +> ![Note](./public_sys-resources/icon-note.gif)**Note** +> +> The control plane may have taint issues, causing the node status in `kubectl get nodes` to remain "not ready." Refer to the [official documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) to remove taints. + +## Joining the Cluster (Worker Nodes Only) + +**Disable the proxy (if applicable):** + +```sh +unset http_proxy https_proxy +``` + +After installing and configuring the environment on worker nodes, join the cluster using the following command: + +```sh +kubeadm join : --token --discovery-token-ca-cert-hash sha256: +``` + +This command is generated after `kubeadm init` completes on the control plane. Alternatively, you can generate it on the control plane using the following commands: + +```sh +$ kubeadm token create # Generate token. +$ openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | \ + openssl dgst -sha256 -hex | sed 's/^.* //' # Get hash. +``` + +After joining, check the status of worker nodes on the control plane using: + +```sh +kubectl get nodes +``` + +If the node status shows "not ready," it may be due to unsuccessful Flannel plugin deployment. In this case, run the locally generated Flannel executable to complete the deployment. + +**Running kubectl Commands on Worker Nodes (Optional):** + +To run `kubectl` commands on worker nodes, copy the control plane configuration file **/etc/kubernetes/admin.conf** to the same directory, then configure it using: + +```sh +export KUBECONFIG=/etc/kubernetes/admin.conf +``` diff --git a/docs/en/docs/cluster_deployment/kubernetes/overview.md b/docs/en/docs/cluster_deployment/kubernetes/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..f49f60802f2219b9ab46f0fec436204a378e5cad --- /dev/null +++ b/docs/en/docs/cluster_deployment/kubernetes/overview.md @@ -0,0 +1,12 @@ +# Kubernetes Cluster Deployment Guide + +This document describes how to deploy a Kubernetes cluster in binary mode on openEuler. + +Note: All operations in this document are performed using the `root` permission. + +## Cluster Status + +The cluster status used in this document is as follows: + +- Cluster structure: six VMs running the openEuler OS, three master nodes, and three nodes. +- Physical machine: `x86/ARM` server of openEuler. diff --git a/docs/en/docs/cluster_deployment/kubernetes/preparing_certificates.md b/docs/en/docs/cluster_deployment/kubernetes/preparing_certificates.md new file mode 100644 index 0000000000000000000000000000000000000000..74d3c8233ea64c1dfd500dd4fc197bcc099ad53f --- /dev/null +++ b/docs/en/docs/cluster_deployment/kubernetes/preparing_certificates.md @@ -0,0 +1,412 @@ +# Preparing Certificates + +**Statement: The certificate used in this document is self-signed and cannot be used in a commercial environment.** + +Before deploying a cluster, you need to generate certificates required for communication between components in the cluster. This document uses the open-source CFSSL as the verification and deployment tool to help users understand the certificate configuration and the association between certificates of cluster components. You can select a tool based on the site requirements, for example, OpenSSL. + +## Building and Installing CFSSL + +The following commands for building and installing CFSSL are for your reference (the CFSSL website access permission is required, and the proxy must be configured first): + +```bash +wget --no-check-certificate https://github.com/cloudflare/cfssl/archive/v1.5.0.tar.gz +tar -zxf v1.5.0.tar.gz +cd cfssl-1.5.0/ +make -j6 +cp bin/* /usr/local/bin/ +``` + +## Generating a Root Certificate + +Compile the CA configuration file, for example, ca-config.json: + +```bash +$ cat ca-config.json | jq +{ + "signing": { + "default": { + "expiry": "8760h" + }, + "profiles": { + "kubernetes": { + "usages": [ + "signing", + "key encipherment", + "server auth", + "client auth" + ], + "expiry": "8760h" + } + } + } +} +``` + +Compile a CA CSR file, for example, ca-csr.json: + +```bash +$ cat ca-csr.json | jq +{ + "CN": "Kubernetes", + "key": { + "algo": "rsa", + "size": 2048 + }, + "names": [ + { + "C": "CN", + "L": "HangZhou", + "O": "openEuler", + "OU": "WWW", + "ST": "BinJiang" + } + ] +} +``` + +Generate the CA certificate and key: + +```bash +cfssl gencert -initca ca-csr.json | cfssljson -bare ca +``` + +The following certificates are obtained: + +```bash +ca.csr ca-key.pem ca.pem +``` + +## Generating the admin Account Certificate + +admin is an account used by K8S for system management. Compile the CSR configuration of the admin account, for example, admin-csr.json: + +```bash +cat admin-csr.json | jq +{ + "CN": "admin", + "key": { + "algo": "rsa", + "size": 2048 + }, + "names": [ + { + "C": "CN", + "L": "HangZhou", + "O": "system:masters", + "OU": "Containerum", + "ST": "BinJiang" + } + ] +} +``` + +Generate a certificate: + +```bash +cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=kubernetes admin-csr.json | cfssljson -bare admin +``` + +The result is as follows: + +```bash +admin.csr admin-key.pem admin.pem +``` + +## Generating a service-account Certificate + +Compile the CSR configuration file of the service-account account, for example, service-account-csr.json: + +```bash +cat service-account-csr.json | jq +{ + "CN": "service-accounts", + "key": { + "algo": "rsa", + "size": 2048 + }, + "names": [ + { + "C": "CN", + "L": "HangZhou", + "O": "Kubernetes", + "OU": "openEuler k8s install", + "ST": "BinJiang" + } + ] +} +``` + +Generate a certificate: + +```bash +cfssl gencert -ca=../ca/ca.pem -ca-key=../ca/ca-key.pem -config=../ca/ca-config.json -profile=kubernetes service-account-csr.json | cfssljson -bare service-account +``` + +The result is as follows: + +```bash +service-account.csr service-account-key.pem service-account.pem +``` + +## Generating the kube-controller-manager Certificate + +Compile the CSR configuration of kube-controller-manager: + +```bash +{ + "CN": "system:kube-controller-manager", + "key": { + "algo": "rsa", + "size": 2048 + }, + "names": [ + { + "C": "CN", + "L": "HangZhou", + "O": "system:kube-controller-manager", + "OU": "openEuler k8s kcm", + "ST": "BinJiang" + } + ] +} +``` + +Generate a certificate: + +```bash +cfssl gencert -ca=../ca/ca.pem -ca-key=../ca/ca-key.pem -config=../ca/ca-config.json -profile=kubernetes kube-controller-manager-csr.json | cfssljson -bare kube-controller-manager +``` + +The result is as follows: + +```bash +kube-controller-manager.csr kube-controller-manager-key.pem kube-controller-manager.pem +``` + +## Generating the kube-proxy Certificate + +Compile the CSR configuration of kube-proxy: + +```bash +{ + "CN": "system:kube-proxy", + "key": { + "algo": "rsa", + "size": 2048 + }, + "names": [ + { + "C": "CN", + "L": "HangZhou", + "O": "system:node-proxier", + "OU": "openEuler k8s kube proxy", + "ST": "BinJiang" + } + ] +} +``` + +Generate a certificate: + +```bash +cfssl gencert -ca=../ca/ca.pem -ca-key=../ca/ca-key.pem -config=../ca/ca-config.json -profile=kubernetes kube-proxy-csr.json | cfssljson -bare kube-proxy +``` + +The result is as follows: + +```bash +kube-proxy.csr kube-proxy-key.pem kube-proxy.pem +``` + +## Generating the kube-scheduler Certificate + +Compile the CSR configuration of kube-scheduler: + +```bash +{ + "CN": "system:kube-scheduler", + "key": { + "algo": "rsa", + "size": 2048 + }, + "names": [ + { + "C": "CN", + "L": "HangZhou", + "O": "system:kube-scheduler", + "OU": "openEuler k8s kube scheduler", + "ST": "BinJiang" + } + ] +} +``` + +Generate a certificate: + +```bash +cfssl gencert -ca=../ca/ca.pem -ca-key=../ca/ca-key.pem -config=../ca/ca-config.json -profile=kubernetes kube-scheduler-csr.json | cfssljson -bare kube-scheduler +``` + +The result is as follows: + +```bash +kube-scheduler.csr kube-scheduler-key.pem kube-scheduler.pem +``` + +## Generating the kubelet Certificate + +The certificate involves the host name and IP address of the server where kubelet is located. Therefore, the configuration of each node is different. The script is compiled as follows: + +```bash +$ cat node_csr_gen.bash + +#!/bin/bash + +nodes=(k8snode1 k8snode2 k8snode3) +IPs=("192.168.122.157" "192.168.122.158" "192.168.122.159") + +for i in "${!nodes[@]}"; do + +cat > "${nodes[$i]}-csr.json" < + k8smaster0 + 8 + 8 + + hvm + /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw + /var/lib/libvirt/qemu/nvram/k8smaster0.fd + + + + + + + + + 1 + + destroy + restart + restart + + /usr/libexec/qemu-kvm + + + + + + + + + + + + + + + + + + + + + + + + + + + + +``` + +The VM configuration must be unique. Therefore, you need to modify the following to ensure that the VM is unique: + +- name: host name of the VM. You are advised to use lowercase letters. In this example, the value is `k8smaster0`. +- nvram: handle file path of the NVRAM, which must be globally unique. In this example, the value is `/var/lib/libvirt/qemu/nvram/k8smaster0.fd`. +- disk source file: VM disk file path. In this example, the value is `/mnt/vm/images/master0.img`. +- mac address of the interface: MAC address of the interface. In this example, the value is `52:54:00:00:00:80`. + +## Installing a VM + +1. Create and start a VM. + + ```shell + virsh define master.xml + virsh start k8smaster0 + ``` + +2. Obtain the VNC port number of the VM. + + ```shell + virsh vncdisplay k8smaster0 + ``` + +3. Use a VM connection tool, such as VNC Viewer, to remotely connect to the VM and perform configurations as prompted. + +4. Set the host name of the VM, for example, k8smaster0. + + ```shell + hostnamectl set-hostname k8smaster0 + ``` diff --git a/docs/en/docs/cluster_deployment/kubernetes/public_sys-resources/icon-note.gif b/docs/en/docs/cluster_deployment/kubernetes/public_sys-resources/icon-note.gif new file mode 100644 index 0000000000000000000000000000000000000000..6314297e45c1de184204098efd4814d6dc8b1cda Binary files /dev/null and b/docs/en/docs/cluster_deployment/kubernetes/public_sys-resources/icon-note.gif differ diff --git a/docs/en/docs/cluster_deployment/kubernetes/running_the_test_pod.md b/docs/en/docs/cluster_deployment/kubernetes/running_the_test_pod.md new file mode 100644 index 0000000000000000000000000000000000000000..036ff51eb510dea02f364560d999bfca68bf2b04 --- /dev/null +++ b/docs/en/docs/cluster_deployment/kubernetes/running_the_test_pod.md @@ -0,0 +1,42 @@ +# Running the Test Pod + +## Configuration File + +```bash +$ cat nginx.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-deployment + labels: + app: nginx +spec: + replicas: 3 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: nginx:1.14.2 + ports: + - containerPort: 80 +``` + +## Starting the Pod + +Run the kubectl command to run Nginx. + +```bash +$ kubectl apply -f nginx.yaml +deployment.apps/nginx-deployment created +$ kubectl get pods +NAME READY STATUS RESTARTS AGE +nginx-deployment-66b6c48dd5-6rnwz 1/1 Running 0 33s +nginx-deployment-66b6c48dd5-9pq49 1/1 Running 0 33s +nginx-deployment-66b6c48dd5-lvmng 1/1 Running 0 34s +``` diff --git a/docs/en/docs/container_engine/docker_engine/_toc.yaml b/docs/en/docs/container_engine/docker_engine/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..12f34db244845e918458ce1932b9567a94e28b0f --- /dev/null +++ b/docs/en/docs/container_engine/docker_engine/_toc.yaml @@ -0,0 +1,25 @@ +label: Docker Container +isManual: true +description: Docker is an open source container engine that facilitates quick application + packaging, deployment, and delivery. +sections: + - label: Overview + href: ./overview.md + - label: Installation and Configuration + href: ./installation_and_configuration_3.md + - label: Container Management + href: ./container_management_1.md + - label: Image Management + href: ./image_management_1.md + - label: Command Reference + href: ./command_reference.md + sections: + - label: Container Engine + href: ./container_engine.md + - label: Container Management + href: ./container_management_2.md + - label: Image Management + href: ./image_management_2.md + - label: Statistics + href: ./statistics.md + diff --git a/docs/en/docs/container_engine/docker_engine/command_reference.md b/docs/en/docs/container_engine/docker_engine/command_reference.md new file mode 100644 index 0000000000000000000000000000000000000000..96cdd36e9724bf7df4df5ee032e212f4a04588d8 --- /dev/null +++ b/docs/en/docs/container_engine/docker_engine/command_reference.md @@ -0,0 +1,3 @@ +# Command Reference + +This chapter introduces Docker container commands. diff --git a/docs/en/docs/container_engine/docker_engine/container_engine.md b/docs/en/docs/container_engine/docker_engine/container_engine.md new file mode 100644 index 0000000000000000000000000000000000000000..a9f0546d9f2e63217e2db949e8080e1b2dbe28d6 --- /dev/null +++ b/docs/en/docs/container_engine/docker_engine/container_engine.md @@ -0,0 +1,304 @@ +# Container Engine + +Docker daemon is a system process that resides in the background. Before you run a docker subcommand, start Docker daemon. + +If the Docker daemon is installed using the RPM package or system package management tool, you can run the **systemctl start docker** command to start the Docker daemon. + +The **docker** command supports the following parameters: + +1. To combine parameters of a single character, run the following command: + + ```shell + docker run -t -i busybox /bin/sh + ``` + + The command can be written as follows: + + ```shell + docker run -ti busybox /bin/sh + ``` + +2. **bool** command parameters such as **--icc=true**, are displayed in the command help. If this parameter is not used, the default value displayed in the command help is used. If this parameter is used, the opposite value of the value displayed in the command help is used. In addition, if **--icc** is not added when Docker daemon is started, **--icc=true** is used by default. Otherwise, **--icc=false** is used. +3. Parameters such as **--attach=\[\]** in the command help indicate that these parameters can be set for multiple times. For example: + + ```shell + docker run --attach=stdin --attach=stdout -i -t busybox /bin/sh + ``` + +4. Parameters such as **-a** and **--attach=\[\]** in the command help indicate that the parameter can be specified using either **-a** _value_ or **--attach=**_value_. For example: + + ```shell + docker run -a stdin --attach=stdout -i -t busybox /bin/sh + ``` + +5. Parameters such as **--name=""** can be configured with a character string and can be configured only once. Parameters such as **-c=** can be configured with an integer and can be configured only once. + +**Table 1** Parameters specified during the Docker daemon startup + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Description

+

--api-cors-header

+

CORS header information for enabling remote API calling. This interface supports the secondary development of upper-layer applications, which sets the CORS header for a remote API.

+

--authorization-plugin=[]

+

Authentication plug-in.

+

-b, --bridge=""

+

Existing bridge device mounting to the docker container. Note: none can be used to disable the network in the container.

+

--bip=""

+

Bridge IP address, which is automatically created using the CIDR address. Note: this parameter cannot be used with -b .

+

--cgroup-parent

+

cgroup parent directory configured for all containers.

+

--config-file=/etc/docker/daemon.json

+

Configuration file for starting Docker daemon.

+

--containerd

+

Socket path of containerd.

+

-D, --debug=false

+

Specifies whether to enable the debugging mode.

+

--default-gateway

+

Default gateway of the container IPv4 address.

+

--default-gateway-v6

+

Default gateway of the container IPv6 address.

+

--default-ulimit=[]

+

Default ulimit value of the container.

+

--disable-legacy-registry

+

Disables the original registry.

+

--dns=[]

+

DNS server of the forcibly used container.

+

Example: --dns 8.8.x.x

+

--dns-opt=[]

+

DNS option.

+

--dns-search=[]

+

Forcibly searches DNS search domain name used by a container.

+

Example: --dns-search example.com

+

--exec-opt=[]

+

Parameter to be executed when a container is started.

+

For example, set the native.umask parameter.

+
#The umask value of the started container is 0022.--exec-opt native.umask=normal 
+# The umask value of the started container is 0027 (default value).
+--exec-opt  native.umask=secure    
+

Note: If native.umask is also configured in docker create or docker run command, the configuration in command is used.

+

--exec-root=/var/run/docker

+

Root directory for storing the execution status file.

+

--fixed-cidr=""

+

Fixed IP address (for example, 10.20.0.0/16) of the subnet. The IP address of the subnet must belong to the network bridge.

+

--fixed-cidr-v6

+

Fixed IPv6 address.

+

-G, --group="docker"

+

Group assigned to the corresponding Unix socket in the background running mode. Note: When an empty string is configured for this parameter, the group information is removed.

+

-g, --graph="/var/lib/docker"

+

The root directory for running docker.

+

-H, --host=[]

+

Socket bound in background mode. One or more sockets can be configured using tcp://host:port, unix:///path to socket, fd://* or fd://socketfd. Example:

+

$ dockerd -H tcp://0.0.0.0:2375

+

or

+

$ export DOCKER_HOST="tcp://0.0.0.0:2375"

+

--insecure-registry=[]

+

Registry for insecure connections. By default, the Docker uses TLS certificates to ensure security for all connections. If the registry does not support HTTPS connections or the certificate is issued by an unknown certificate authority of the Docker daemon, you need to configure --insecure-registry=192.168.1.110:5000 when starting the daemon. This parameter needs to be configured if a private registry is used.

+

--image-layer-check=true

+

Image layer integrity check. To enable the function, set this parameter to true. Otherwise, set this parameter to false. If this parameter is not configured, the function is disabled by default.

+

When Docker is started, the image layer integrity is checked. If the image layer is damaged, the related images are unavailable. Docker cannot verify empty files, directories, or link files. Therefore, if the preceding files are lost due to a power failure, the integrity check of Docker image data may fail. When the Docker version changes, check whether the parameter is supported. If not supported, delete it from the configuration file.

+

--icc=true

+

Enables communication between containers.

+

--ip="0.0.0.0"

+

Default IP address used when a container is bound to a port.

+

--ip-forward=true

+

Starts the net.ipv4.ip_forward process of the container.

+

--ip-masq=true

+

Enables IP spoofing.

+

--iptables=true

+

Starts the iptables rules defined by the Docker container.

+

-l, --log-level=info

+

Log level.

+

--label=[]

+

Daemon label, in key=value format.

+

--log-driver=json-file

+

Default log driver of container logs.

+

--log-opt=map[]

+

Log drive parameters.

+

--mtu=0

+

MTU value of the container network. If this parameter is not configured, value of route MTU is used by default. If the default route is not configured, set this parameter to the constant value 1500.

+

-p, --pidfile="/var/run/docker.pid"

+

PID file path of the background process.

+

--raw-logs

+

Logs with all timestamps and without the ANSI color scheme.

+

--registry-mirror=[]

+

Image registry preferentially used by the dockerd.

+

-s, --storage-driver=""

+

Storage driver used when a container is forcibly run.

+

--selinux-enabled=false

+

Enables SELinux. If the kernel version is 3.10.0-862.14 or later, this parameter cannot be set to true.

+

--storage-opt=[]

+

Storage driver parameter. This parameter is valid only when the storage driver is devicemapper. Example: dockerd --storage-opt dm.blocksize=512K

+

--tls=false

+

Enables the TLS authentication.

+

--tlscacert="/root/.docker/ca.pem"

+

Certificate file path that has been authenticated by the CA.

+

--tlscert="/root/.docker/cert.pem"

+

File path of the TLS certificates.

+

--tlskey="/root/.docker/key.pem"

+

File path of TLS keys.

+

--tlsverify=false

+

Verifies the communication between the background processes and the client using TLS.

+

--insecure-skip-verify-enforce

+

Whether to forcibly skip the verification of the certificate host or domain name. The default value is false.

+

--use-decrypted-key=true

+

Whether to use the decryption private key.

+

--userland-proxy=true

+

Whether to use the userland proxy for the container LO device.

+

--userns-remap

+

User namespace-based user mapping table in the container.

+
NOTE:

This parameter is not supported in the current version.

+
+
diff --git a/docs/en/docs/container_engine/docker_engine/container_management_1.md b/docs/en/docs/container_engine/docker_engine/container_management_1.md new file mode 100644 index 0000000000000000000000000000000000000000..2a85a34796944d7cd47513a6a15d1a2b8e70b808 --- /dev/null +++ b/docs/en/docs/container_engine/docker_engine/container_management_1.md @@ -0,0 +1,698 @@ +# Container Management + +## Creating a Container + +### Downloading Images + +Only user **root** can run the **docker** command. If you log in as a common user, you need to use the **sudo** command before running the **docker** command. + +```shell +docker pull busybox +``` + +This command is used to download the **busybox:latest** image from the official Docker registry. \(If no tag is specified in the command, the default tag name **latest** is used.\) During the image download, the system checks whether the dependent layer exists locally. If yes, the image download is skipped. When downloading images from a private registry, specify the registry description. For example, if a private registry containing some common images is created and its IP address is **192.168.1.110:5000**, you can run the following command to download the image from the private registry: + +```shell +docker pull 192.168.1.110:5000/busybox +``` + +The name of the image downloaded from the private registry contains the registry address information, which may be too long. Run the **docker tag** command to generate an image with a shorter name. + +```shell +docker tag 192.168.1.110:5000/busybox busybox +``` + +Run the **docker images** command to view the local image list. + +### Running a Simple Application + +```shell +$ docker run busybox /bin/echo "Hello world" +Hello world +``` + +This command uses the **busybox:latest** image to create a container, and executes the **echo "Hello world"** command in the container. Run the following command to view the created container: + +```shell +$ docker ps -l +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +d8c0a3315bc0 busybox"/bin/echo 'Hello wo..." 5 seconds ago Exited (0) 3 seconds ago practical_franklin +``` + +### Creating an Interactive Container + +```shell +$ docker run -it busybox /bin/bash +root@bf22919af2cf:/# ls +bin boot dev etc home lib media mnt opt proc root run sbin srv sys tmp usr var +root@bf22919af2cf:/# pwd +/ +``` + +The **-ti** option allocates a pseudo terminal to the container and uses standard input \(STDIN\) for interaction. You can run commands in the container. In this case, the container is an independent Linux VM. Run the **exit** command to exit the container. + +### Running a Container in the Background + +Run the following command. **-d** indicates that the container is running in the background. **--name=container1** indicates that the container name is **container1**. + +```shell +$ docker run -d --name=container1 busybox /bin/sh -c "while true;do echo hello world;sleep 1;done" +7804d3e16d69b41aac5f9bf20d5f263e2da081b1de50044105b1e3f536b6db1c +``` + +The command output contains the container ID but does not contain **hello world**. In this case, the container is running in the background. You can run the **docker ps** command to view the running container. + +```shell +$ docker ps +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +7804d3e16d69 busybox "/bin/sh -c 'while tr" 11 seconds ago Up 10 seconds container1 +``` + +Run the following **docker logs** command to view the output during container running: + +```shell +$ docker logs container1 +hello world +hello world +hello world +... +``` + +### Container Network Connection + +By default, a container can access an external network, while port mapping is required when an external network accesses a container. The following uses how to run the private registry service in Docker as an example. In the following command, **-P** is used to expose open ports in the registry to the host. + +```shell +$ docker run --name=container_registry -d -P registry +cb883f6216c2b08a8c439b3957fb396c847a99079448ca741cc90724de4e4731 +``` + +The container\_registry container has been started, but the mapping between services in the container and ports on the host is not clear. You need to run the **docker port** command to view the port mapping. + +```shell +$ docker port container_registry +5000/tcp -> 0.0.0.0:49155 +``` + +The command output shows that port 5000 in the container is mapped to port 49155 on the host. You can access the registry service by using the host IP address **49155**. Enter **** in the address box of the browser and press **Enter**. The registry version information is displayed. + +When running registry images, you can directly specify the port mapping, as shown in the following: + +```shell +docker run --name=container_registry -d -p 5000:5000 registry +``` + +**-p 5000:5000** is used to map port 5000 in the container to port 5000 on the host. + +### Precautions + +- **Do Not Add -a stdin Independently During Container Startup** + + When starting a container, you must add **-a stdout** or **-a stderr** together with **-a stdin** instead of **-a stdin** only. Otherwise, the device stops responding even after the container exits. + +- **Do Not Use the Long Or Short ID of an Existing Container As the Name of a New Container** + + When creating a container, do not use the long or short ID of the existing container A as the name of the new container B. If the long ID of container A is used as the name of container B, Docker will match container A even though the name of container B is used as the specified target container for operations. If the short ID of container A is used as the name of container B, Docker will match container B even though the short ID of container A is used as the specified target container for operations. This is because Docker matches the long IDs of all containers first. If the matching fails, the system performs exact matching using the value of **container\_name**. If matching failure persists, the container ID is directly matched in fuzzy mode. + +- **Containers That Depend on Standard Input and Output, Such As sh/bash, Must Use the -ti Parameter to Avoid Exceptions** + + Normal case: If you do not use the **-ti** parameter to start a process container such as sh/bash, the container exits immediately. + + The cause of this problem is that Docker creates a stdin that matches services in the container first. If the interactive parameters such as **-ti** are not set, Docker closes pipe after the container is started and the service container process sh/bash exits after stdin is closed. + + Exception: If Docker daemon is forcibly killed in a specific phase \(before pipe is closed\), daemon of the pipe is not closed in time. In this case, the sh/bash process does not exit even without **-ti**. As a result, an exception occurs. You need to manually clear the container. + + After being restarted, daemon takes over the original container stream. Containers without the **-ti** parameter may not be able to process the stream because these containers do not have streams to be taken over in normal cases. In actual services, sh/bash without the **-ti** parameter does not take effect and is seldom used. To avoid this problem, the **-ti** parameter is used to restrict interactive containers. + +- **Container Storage Volumes** + + If you use the **-v** parameter to mount files on the host to a container when the container is started, the inodes of the files may be changed when you run the **vi** or **sed** command to modify the files on the host or in the container. As a result, files on the host and in the container are not synchronized. Do not mount files in the container in this mode \(or do not use together with the **vi** and **sed** commands\). You can also mount the upper-layer directories of the files to avoid exceptions. The **nocopy** option can be used to prevent original files in the mount point directory of a container from being copied to the source directory of the host when Docker mounts volumes. However, this option can be used only when an anonymous volume is mounted and cannot be used in the bind mount scenario. + +- **Do Not Use Options That May Affect the Host** + + The **--privileged** option enables all permissions for a container. On the container, mounting operations can be performed and directories such as **/proc** and **/sys** can be modified, which may affect the host. Therefore, do not use this option for common containers. + + A host-shared namespace, such as the **--pid host**, **--ipc host**, or **--net host** option, can enable a container to share the namespace with the host, which will also affect the host. Therefore, do not use this option. + +- **Do Not Use the Unstable Kernel Memory Cgroup** + + Kernel memory cgroup on the Linux kernel earlier than 4.0 is still in the experimental phase and runs unstably. Therefore, do not use kernel memory cgroup. + + When the **docker run --kernel-memory** command is executed, the following alarm is generated: + + ```text + WARNING: You specified a kernel memory limit on a kernel older than 4.0. Kernel memory limits are experimental on older kernels, it won't work as expected as expected and can cause your system to be unstable. + ``` + +- **blkio-weight Parameter Is Unavailable in the Kernel That Supports blkio Precise Control** + + **--blkio-weight-device** can implement more accurate blkio control in a container. The control requires a specified disk device, which can be implemented through the **--blkio-weight-device** parameter of Docker. In this kernel, Docker does not provide the **--blkio-weight** mode to limit the container blkio. If you use this parameter to create a container, the following error is reported: + + ```text + docker: Error response from daemon: oci runtime error: container_linux.go:247: starting container process caused "process_linux.go:398: container init caused \"process_linux.go:369: setting cgroup config for ready process caused \\\"blkio.weight not supported, use weight_device instead\\\"\"" + ``` + +- **Using --blkio-weight-device in CFQ Scheduling Policy** + + The **--blkio-weight-device** parameter works only when the disk works in the Completely Fair Queuing \(CFQ\) policy. + + You can view the scheduler file \(**/sys/block/**_disk_**/queue/scheduler**\) to obtain the policies supported by the disk and the current policy. For example, you can run the following command to view **sda**. + + ```shell + # cat /sys/block/sda/queue/scheduler noop [deadline] cfq + ``` + + **sda** supports the following scheduling policies: **noop**, **deadline**, and **cfq**, and the **deadline** policy is being used. You can run the **echo** command to change the policy to **cfq**. + + ```shell + # echo cfq > /sys/block/sda/queue/scheduler + ``` + +- **systemd Usage Restrictions in Basic Container Images** + + When containers created from basic images are used, systemd in basic images is used only for system containers. + +### Concurrent Performance + +- There is an upper limit for the message buffer in Docker. If the number of messages exceeds the upper limit, the messages are discarded. Therefore, it is recommended that the number of commands executed concurrently should not exceed 1000. Otherwise, the internal messages in Docker may be lost and the container may fail to be started. +- When containers are concurrently created and restarted, the error message"oci runtime error: container init still running" is occasionally reported. This is because containerd optimizes the performance of the event waiting queue. When a container is stopped, the **runc delete** command is executed to kill the init processes in the container within 1s. If the init processes are not killed within 1s, runC returns this error message. The garbage collection \(GC\) mechanism of containerd reclaims residual resources after **runc delete** is executed at an interval of 10s. Therefore, operations on the container are not affected. If the preceding error occurs, wait for 4 or 5s and restart the container. + +### Security Feature Interpretation + +1. The following describes default permission configuration analysis of Docker. + + In the default configuration of a native Docker, capabilities carried by each default process are as follows: + + ```text + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + ``` + + The default seccomp configuration is a whitelist. If any syscall is not in the whitelist, **SCMP\_ACT\_ERRNO** is returned by default. Different system invoking is enabled for different caps of Docker. If a capability is not in the whitelist, Docker will not assign it to the container by default. + +2. CAP\_SYS\_MODULE + + CAP\_SYS\_MODULE allows a container to insert or remove ko modules. Adding this capability allows the container to escape or even damage the kernel. Namespace provides the maximum isolation for a container. In the ko module, you only need to point its namespace to **init\_nsproxy**. + +3. CAP\_SYS\_ADMIN + + The sys\_admin permission provides the following capabilities for a container: + + - For file system: **mount**, **umount**, and **quotactl** + - For namespace setting: **setns**, **unshare**, and **clone new namespace** + - driver ioctl + - For PCI control: **pciconfig\_read**, **pciconfig\_write**, and **pciconfig\_iobase** + - **sethostname** + +4. CAP\_NET\_ADMIN + + CAP\_NET\_ADMIN allows a container to access network interfaces and sniff network traffic. The container can obtain the network traffic of all containers including the host, which greatly damages network isolation. + +5. CAP\_DAC\_READ\_SEARCH + + CAP\_DAC\_READ\_SEARCH calls the open\_by\_handle\_at and name\_to\_handle\_at system calls. If the host is not protected by SELinux, the container can perform brute-force search for the inode number of the file\_handle structure to open any file on the host, which affects the isolation of the file system. + +6. CAP\_SYS\_RAWIO + + CAP\_SYS\_RAWIO allows a container to write I/O ports to the host, which may cause the host kernel to crash. + +7. CAP\_SYS\_PTRACE + + The ptrace permission for a container provides ptrace process debugging in the container. RunC has fixed this vulnerability. However, some tools, such as nsenter and docker-enter, are not protected. In a container, processes executed by these tools can be debugged to obtain resource information \(such as namespace and fd\) brought by these tools. In addition, ptrace can bypass seccomp, greatly increasing attack risks of the kernel. + +8. Docker capability interface: --cap-add all + + --cap-add all grants all permissions to a container, including the dangerous permissions mentioned in this section, which allows the container to escape. + +9. Do not disable the seccomp feature of Docker. + + Docker has a default seccomp configuration with a whitelist. **sys\_call** that is not in the whitelist is disabled by seccomp. You can disable the seccomp feature by running **--security-opt 'seccomp:unconfined'**. If seccomp is disabled or the user-defined seccomp configuration is used but the filtering list is incomplete, attack risks of the kernel in the container are increased. + +10. Do not set the **/sys** and **/proc** directories to writable. + + The **/sys** and **/proc** directories contain Linux kernel maintenance parameters and device management interfaces. If the write permission is configured for the directories in a container, the container may escape. + +11. Docker open capability: --CAP\_AUDIT\_CONTROL + + The permission allows a container to control the audit system and run the **AUDIT\_TTY\_GET** and **AUDIT\_TTY\_SET** commands to obtain the TTY execution records \(including the **root** password\) recorded in the audit system. + +12. CAP\_BLOCK\_SUSPEND and CAP\_WAKE\_ALARM + + The permission provides a container the capability to block the system from suspending \(epoll\). + +13. CAP\_IPC\_LOCK + + With this permission, a container can break the max locked memory limit in **ulimit** and use any mlock large memory block to cause DoS attacks. + +14. CAP\_SYS\_LOG + + In a container with this permission, system kernel logs can be read by using dmesg to break through kernel kaslr protection. + +15. CAP\_SYS\_NICE + + In a container with this permission, the scheduling policy and priority of a process can be changed, causing DoS attacks. + +16. CAP\_SYS\_RESOURCE + + With this permission, a container can bypass resource restrictions, such as disk space resource restriction, keymaps quantity restriction, and **pipe-size-max** restriction, causing DoS attacks. + +17. CAP\_SYS\_TIME + + In a container with this capability, the time on the host can be changed. + +18. Risk analysis of Docker default capabilities + + The default capabilities of Docker include CAP\_SETUID and CAP\_FSETID. If the host and a container share a directory, the container can set permissions for the binary file in the shared directory. Common users on the host can use this method to elevate privileges. With the CAP\_AUDIT\_WRITE capability, a container can write logs to the host, and the host must be configured with log anti-explosion measures. + +19. Docker and host share namespace parameters, such as **--pid**, **--ipc**, and **--uts**. + + This parameter indicates that the container and host share the namespace. The container can attack the host as the namespace of the container is not isolated from that of the host. For example, if you use **--pid** to share PID namespace with the host, the PID on the host can be viewed in the container, and processes on the host can be killed at will. + +20. **--device** is used to map the sensitive directories or devices of the host to the container. + + The Docker management plane provides interfaces for mapping directories or devices on a host to the container, such as **--device** and **-v**. Do not map sensitive directories or devices on the host to the container. + +## Creating Containers Using hook-spec + +### Principles and Application Scenarios + +Docker supports the extended features of hooks. The execution of hook applications and underlying runC complies with the OCI standards. For details about the standards, visit + +There are three types of hooks: prestart, poststart, and poststop. They are respectively used before applications in the container are started, after the applications are started, and after the applications are stopped. + +### API Reference + +The **--hook-spec** parameter is added to the **docker run** and **create** commands and is followed by the absolute path of the **spec** file. You can specify the hooks to be added during container startup. These hooks will be automatically appended after the hooks that are dynamically created by Docker \(currently only libnetwork prestart hook\) to execute programs specified by users during the container startup or destruction. + +The structure of **spec** is defined as follows: + +```go +// Hook specifies a command that is run at a particular event in the lifecycle of a container +type Hook struct{ + Path string `json:"path"` + Args []string `json:"args,omitempty"` + Env []string `json:"env,omitempty"` + Timeout *int `json:"timeout,omitempty"` +} +// Hooks for container setup and teardown +type Hooks struct{ + // Prestart is a list of hooks to be run before the container process is executed. + // On Linux, they are run after the container namespaces are created. + Prestart []Hook `json:"prestart,omitempty"` + // Poststart is a list of hooks to be run after the container process is started. + Poststart []Hook `json:"poststart,omitempty"` + // Poststop is a list of hooks to be run after the container process exits. + Poststop []Hook `json:"poststop,omitempty"` +} +``` + +- The **Path**, **Args**, and **Env** parameters are mandatory. +- **Timeout** is optional, while you are advised to set this parameter to a value ranging from 1 to 120. The parameter type is int. Floating point numbers are not allowed. +- The content of the **spec** file must be in JSON format as shown in the preceding example. If the format is incorrect, an error is reported. +- Both **docker run --hook-spec /tmp/hookspec.json**_xxx_, and **docker create --hook-spec /tmp/hookspec.json **_xxx_** && docker start**_xxx_ can be used. + +### Customizing Hooks for a Container + +Take adding a NIC during the startup as an example. The content of the **hook spec** file is as follows: + +```json +{ + "prestart": [ + { + "path": "/var/lib/docker/hooks/network-hook", + "args": ["network-hook", "tap0", "myTap"], + "env": [], + "timeout": 5 + } + ], + "poststart":[], + "poststop":[] +} +``` + +Specify prestart hook to add the configuration of a network hook. The path is **/var/lib/docker/hooks/network-hook**. **args** indicates the program parameters. Generally, the first parameter is the program name, and the second parameter is the parameter accepted by the program. For the network-hook program, two parameters are required. One is the name of the NIC on the host, and the other is the name of the NIC in the container. + +- Precautions + 1. The **hook** path must be in the**hooks** folder in the **graph** directory \(**--graph**\) of Docker. Its default value is **/var/lib/docker/hooks**. You can run the **docker info** command to view the root path. + + ```shell + $ docker info + ... + Docker Root Dir: /var/lib/docker + ... + ``` + + This path may change due to the user's manual configuration and the use of user namespaces \(**daemon --userns-remap**\). After the symbolic link of the path is parsed, the parsed path must start with _Docker Root Dir_**/hooks** \(for example, **/var/lib/docker/hooks**\). Otherwise, an error message is displayed. + + 2. The **hook** path must be an absolute path because daemon cannot properly process a relative path. In addition, an absolute path meets security requirements. + 3. The information output by the hook program to stderr is output to the client and affects the container lifecycle \(for example, the container may fail to be started\). The information output to stdout is ignored. + 4. Do not reversely call Docker instructions in hooks. + 5. The execute permission must have been granted on the configured hook execution file. Otherwise, an error is reported during hook execution. + 6. The execution time of the hook operation must be as short as possible. If the prestart period is too long \(more than 2 minutes\), the container startup times out. If the poststop period is too long \(more than 2 minutes\), the container is abnormal. + + The known exceptions are as follows: When the **docker stop** command is executed to stop a container and the clearing operation is performed after 2 minutes, the hook operation is not complete. Therefore, the system waits until the hook operation is complete \(the process holds a lock\). As a result, all operations related to the container stop responding. The operations can be recovered only after the hook operation is complete. In addition, the two-minute timeout processing of the **docker stop** command is an asynchronous process. Therefore, even if the **docker stop** command is successfully executed, the container status is still **up**. The container status is changed to **exited** only after the hook operation is completed. + +- Suggestions + 1. You are advised to set the hook timeout threshold to a value less than 5s. + 2. You are advised to configure only one prestart hook, one poststart hook, and one poststop hook for each container. If too many hooks are configured, the container startup may take a long time. + 3. You are advised to identify the dependencies between multiple hooks. If required, you need to adjust the sequence of the hook configuration files according to the dependencies. The execution sequence of hooks is based on the sequence in the configured **spec** file. + +### Multiple **hook-spec** + +If multiple hook configuration files are available and you need to run multiple hooks, you must manually combine these files into a configuration file and specify the new configuration file by using the **--hook-spec** parameter. Then all hooks can take effect. If multiple **--hook-spec** parameters are configured, only the last one takes effect. + +Configuration examples: + +The content of the **hook1.json** file is as follows: + +```shell +# cat /var/lib/docker/hooks/hookspec.json +{ + "prestart": [ + { + "path": "/var/lib/docker/hooks/lxcfs-hook", + "args": ["lxcfs-hook", "--log", "/var/log/lxcfs-hook.log"], + "env": [] + } + ], + "poststart":[], + "poststop":[] +} +``` + +The content of the **hook2.json** file is as follows: + +```shell +# cat /etc/isulad-tools/hookspec.json +{ + "prestart": [ + { + "path": "/docker-root/hooks/docker-hooks", + "args": ["docker-hooks", "--state", "prestart"], + "env": [] + } + ], + "poststart":[], + "poststop":[ + { + "path": "/docker-root/hooks/docker-hooks", + "args": ["docker-hooks", "--state", "poststop"], + "env": [] + } + ] +} +``` + +The content in JSON format after manual combination is as follows: + +```json +{ + "prestart":[ + { + "path": "/var/lib/docker/hooks/lxcfs-hook", + "args": ["lxcfs-hook", "--log", "/var/log/lxcfs-hook.log"], + "env": [] + }, + { + "path": "/docker-root/hooks/docker-hooks", + "args": ["docker-hooks", "--state", "prestart"], + "env": [] + } + ], + "poststart":[], + "poststop":[ + { + "path": "/docker-root/hooks/docker-hooks", + "args": ["docker-hooks", "--state", "poststop"], + "env": [] + } + ] +} +``` + +Docker daemon reads the binary values of hook in actions such as prestart in the hook configuration files in sequence based on the array sequence and executes the actions. Therefore, you need to identify the dependencies between multiple hooks. If required, you need to adjust the sequence of the hook configuration files according to the dependencies. + +### Customizing Default Hooks for All Containers + +Docker daemon can receive the **--hook-spec** parameter. The semantics of **--hook-spec** is the same as that of **--hook-spec** in **docker create** or **docker run**. You can also add hook configurations to the **/etc/docker/daemon.json** file. + +```json +{ + "hook-spec": "/tmp/hookspec.json" +} +``` + +When a container is running, hooks specified in **--hook-spec** defined by daemon are executed first, and then hooks customized for each container are executed. + +## Configuring Health Check During Container Creation + +Docker provides the user-defined health check function for containers. You can configure the **HEALTHCHECK CMD** option in the Dockerfile, or configure the **--health-cmd** option when a container is created so that commands are periodically executed in the container to monitor the health status of the container based on return values. + +### Configuration Methods + +- Add the following configurations to the Dockerfile file: + + ```text + HEALTHCHECK --interval=5m --timeout=3s --health-exit-on-unhealthy=true \ + CMD curl -f http://localhost/ || exit 1 + ``` + + The configurable options are as follows: + + 1. **--interval=DURATION**: interval between two consecutive command executions. The default value is **30s**. After a container is started, the first check is performed after the interval time. + 2. **--timeout=DURATION**: maximum duration for executing a single check command. If the execution times out, the command execution fails. The default value is **30s**. + 3. **--start-period=DURATION**: container initialization period. The default value is **0s**. During the initialization, the health check is also performed, while the health check failure is not counted into the maximum number of retries. However, if the health check is successful during initialization, the container is considered as started. All subsequent consecutive check failures are counted in the maximum number of retries. + 4. **--retries=N**. maximum number of retries for the health check. The default value is **3**. + 5. **--health-exit-on-unhealthy=BOOLEAN**: whether to kill a container when it is unhealthy. The default value is **false**. + 6. **CMD**: This option is mandatory. If **0** is returned after a command is run in a container, the command execution succeeds. If a value other than **0** is returned, the command execution fails. + + After **HEALTHCHECK** is configured, related configurations are written into the image configurations during image creation. You can run the **docker inspect** command to view the configurations. For example: + + ```json + "Healthcheck": { + "Test": [ + "CMD-SHELL", + "/test.sh" + ] + }, + ``` + +- Configurations during container creation: + + ```shell + docker run -itd --health-cmd "curl -f http://localhost/ || exit 1" --health-interval 5m --health-timeout 3s --health-exit-on-unhealthy centos bash + ``` + + The configurable options are as follows: + + 1. **--health-cmd**: This option is mandatory. If **0** is returned after a command is run in a container, the command execution succeeds. If a value other than **0** is returned, the command execution fails. + 2. **--health-interval**: interval between two consecutive command executions. The default value is **30s**. The upper limit of the value is the maximum value of Int64 \(unit: nanosecond\). + 3. **--health-timeout**: maximum duration for executing a single check command. If the execution times out, the command execution fails. The default value is **30s**. The upper limit of the value is the maximum value of Int64 \(unit: nanosecond\). + 4. **--health-start-period**: container initialization time. The default value is **0s**. The upper limit of the value is the maximum value of Int64 \(unit: nanosecond\). + 5. **--health-retries**: maximum number of retries for the health check. The default value is **3**. The maximum value is the maximum value of Int32. + 6. **--health-exit-on-unhealthy**: specifies whether to kill a container when it is unhealthy. The default value is **false**. + + After the container is started, the **HEALTHCHECK** configurations are written into the container configurations. You can run the **docker inspect** command to view the configurations. For example: + + ```json + "Healthcheck": { + "Test": [ + "CMD-SHELL", + "/test.sh" + ] + }, + ``` + +### Check Rules + +1. After a container is started, the container status is **health:starting**. +2. After the period specified by **start-period**, the **cmd** command is periodically executed in the container at the interval specified by **interval**. That is, after the command is executed, the command will be executed again after the specified period. +3. If the **cmd** command is successfully executed within the time specified by **timeout** and the return value is **0**, the check is successful. Otherwise, the check fails. If the check is successful, the container status changes to **health:healthy**. +4. If the **cmd** command fails to be executed for the number of times specified by **retries**, the container status changes to **health:unhealthy**, and the container continues the health check. +5. When the container status is **health:unhealthy**, the container status changes to **health:healthy** if a check succeeds. +6. If **--health-exit-on-unhealthy** is set, and the container exits due to reasons other than being killed \(the returned exit code is **137**\), the health check takes effect only after the container is restarted. +7. When the **cmd** command execution is complete or times out, Docker daemon will record the start time, return value, and standard output of the check to the configuration file of the container. A maximum of five latest records can be recorded. In addition, the configuration file of the container stores health check parameters. + +Run the **docker ps** command to view the container status. + +```shell +$ docker ps +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +7de2228674a2 testimg "bash" About an hour ago Up About an hour (unhealthy) cocky_davinci +``` + +When the container is running, the health check status is written into the container configurations. You can run the **docker inspect** command to view the configurations. + +```json +"Health": { + "Status": "healthy", + "FailingStreak": 0, + "Log": [ + { + "Start": "2018-03-07T07:44:15.481414707-05:00", + "End": "2018-03-07T07:44:15.556908311-05:00", + "ExitCode": 0, + "Output": "" + }, + { + "Start": "2018-03-07T07:44:18.557297462-05:00", + "End": "2018-03-07T07:44:18.63035891-05:00", + "ExitCode": 0, + "Output": "" + }, + ...... +} +``` + +> [!NOTE]NOTE +> +> - A maximum of five health check status records can be stored in a container. The last five records are saved. +> - Only one health check configuration item can take effect in a container at a time. The later items configured in the Dockerfile will overwrite the earlier ones. Configurations during container creation will overwrite those in images. +> - In the Dockerfile, you can set **HEALTHCHECK NONE** to cancel the health check configuration in a referenced image. When a container is running, you can set **--no-healthcheck** to cancel the health check configuration in an image. Do not configure the health check and **--no-healthcheck** parameters at the same time during the startup. +> - After a container with configured health check parameters is started, if Docker daemon exits, the health check is not executed. After Docker daemon is restarted, the container health status changes to **starting**. Afterwards, the check rules are the same as above. +> - If health check parameters are set to **0** during container image creation, the default values are used. +> - If health check parameters are set to **0** during container startup, the default values are used. + +## Stopping and Deleting a Container + +Run the **docker stop** command to stop the container named **container1**. + +```shell +docker stop container1 +``` + +Or run the **docker kill** command to kill and stop the container. + +```shell +docker kill container1 +``` + +After the container is stopped, run the **docker rm** command to delete the container. + +```shell +docker rm container1 +``` + +Or run the **docker rm -f** command to forcibly delete the container. + +```shell +docker rm -f container1 +``` + +### Precautions + +- Do not run the **docker rm -f**_XXX_ command to delete a container. If you forcibly delete a container, the **docker rm** command ignores errors during the process, which may cause residual metadata of the container. If you delete an image in common mode and an error occurs during the deletion process, the deletion fails and no metadata remains. +- Do not run the **docker kill** command. The **docker kill** command sends related signals to service processes in a container. Depending on the signal processing policies of service processes in the container may cause the result that the signal execution cannot be performed as expected. +- A container in the restarting state may not stop immediately when you run the **docker stop** command. If a container uses the restart rules, when the container is in the restarting state, there is a low probability that the **docker stop** command on the container returns immediately. The container will still be restarted with the impact of the restart rule. +- Do not run the **docker restart** command to restart a container with the **--rm** parameter. When a container with the **--rm** parameter exits, the container is automatically deleted. If the container with the **--rm** parameter is restarted, exceptions may occur. For example, if both the **--rm** and **-ti** parameters are added when the container is started, the restart operation cannot be performed on the container, otherwise, the container may stop responding and cannot exit. + +### When Using docker stop/restart to Specify -t and t<0, Ensure That Applications in the Container Can Process Stop Signal + +Stop Principle: \(The stop process is called by **Restart**.\) + +1. The SIGTERM \(15\) signal can be sent to a container for the first time. +2. Wait for a period of time \(**t** entered by the user\). +3. If the container process still exists, send the SIGKILL \(9\) signal to forcibly kill the process. + +The meaning of the input parameter **t** \(unit: s\) is as follows: + +- **t** < 0: Wait for graceful stop. This setting is preferred when users are assured that their applications have a proper stop signal processing mechanism. +- **t** = 0: Do not wait and send **kill -9** to the container immediately. +- **t** \> 0: Wait for a specified period and send **kill -9** to the container if the container does not stop within the specified period. + +Therefore, if **t** is set to a value less than 0 \(for example, **t** = **-1**\), ensure that the container application correctly processes the SIGTERM signal. If the container ignores this signal, the container will be suspended when the **docker stop** command is run. + +### Manually Deleting Containers in the Dead State As the Underlying File System May Be Busy + +When Docker deletes a container, it stops related processes of the container, changes the container status to Dead, and then deletes the container rootfs. When the file system or devicemapper is busy, the last step of deleting rootfs fails. Run the **docker ps -a** command. The command output shows that the container is in the Dead state. Containers in the Dead state cannot be started again. Wait until the file system is not busy and run the **docker rm** command again to delete the containers. + +### In PID namespace Shared Containers, If Child Container Is in pause State, Parent Container Stops Responding and the docker run Command Cannot Be Executed + +When the **--pid** parameter is used to create the parent and child containers that share PID namespace, if any process in the child container cannot exit \(for example, it is in the D or pause state\) when the **docker stop** command is executed, the **docker stop** command of the parent container is waiting. You need to manually recover the process so that the command can be executed normally. + +In this case, run the **docker inspect** command on the container in the pause state to check whether the parent container corresponding to **PidMode** is the container that requires **docker stop**. For the required container, run the **docker unpause** command to cancel the pause state of the child container. Then, proceed to the next step. + +Generally, the possible cause is that the PID namespace corresponding to the container cannot be destroyed due to residual processes. If the problem persists, use Linux tools to obtain the residual processes and locate the cause of the process exit failure in PID namespace. After the problem is solved, the container can exit. + +- Obtain PID namespace ID in a container. + + ```shell + docker inspect --format={{.State.Pid}} CONTAINERID | awk '{print "/proc/"$1"/ns/pid"}' |xargs readlink + ``` + +- Obtain threads in the namespace. + + ```shell + ls -l /proc/*/task/*/ns/pid |grep -F PIDNAMESPACE_ID |awk '{print $9}' |awk -F \/ '{print $5}' + ``` + +## Querying Container Information + +In any case, the container status should not be determined based on whether the **docker** command is successfully returned. To view the container status, you are advised to use the following command: + +```shell +docker inspect +``` + +## Modification Operations + +### Precautions for Starting Multiple Processes in Container Using docker exec + +When the first **docker exec** command executed in a container is the **bash** command, ensure that all processes started by **exec** are stopped before you run the **exit** command. Otherwise, the device may stop responding when you run the **exit** command. To ensure that the process started by **exec** is still running in the background when the **exit** command is run, add **nohup** when starting the process. + +### Usage Conflict Between docker rename and docker stats _container\_name_ + +If you run the **docker stats**_container\_name_ command to monitor a container in real time, after the container is renamed by using **docker rename**, the name displayed after **docker stats** is executed is the original name instead of the renamed one. + +### Failed to Perform docker rename Operation on Container in restarting State + +When the rename operation is performed on a container in the restarting state, Docker modifies the container network configuration accordingly. The container in the restarting state may not be started and the network does not exist. As a result, the rename operation reports an error indicating that the sandbox does not exist. You are advised to rename only containers that are not in the restarting state. + +### docker cp + +1. When you run **docker cp** to copy files to a container, all operations on the container can be performed only after the **docker cp** command is executed. +2. When a container runs as a non-**root** user, and you run the **docker cp** command to copy a non-**root** file on the host to the container, the permission role of the file in the container changes to **root**. Different from the **cp** command, the **docker cp** command changes UIDs and GIDs of the files copied to the container to **root**. + +### docker login + +After the **docker login** command is executed, **user/passwd** encrypted by AES \(256-bit\) is saved in **/root/.docker/config.json**. At the same time, _root_**.docker/aeskey** \(permission 0600\) is generated to decrypt **user/passwd** in **/root/.docker/config.json**. Currently, AES key cannot be updated periodically. You need to manually delete the AES key for updating. After AES key is updated, you need to log in to Docker daemon again to push the AES key no matter whether Docker daemon is restarted. For example: + +```text +root@hello:~/workspace/dockerfile# docker login +Login with your Docker ID to push and pull images from Docker Hub. If you don't have a Docker ID, head over to https://hub.docker.com to create one. +Username: example Password: +Login Succeeded +root@hello:~/workspace/dockerfile# docker push example/empty +The push refers to a repository [docker.io/example/empty] +547b6288eb33: Layer already exists +latest: digest: sha256:99d4fb4ce6c6f850f3b39f54f8eca0bbd9e92bd326761a61f106a10454b8900b size: 524 +root@hello:~/workspace/dockerfile# rm /root/.docker/aeskey +root@hello:~/workspace/dockerfile# docker push example/empty +WARNING: Error loading config file:/root/.docker/config.json - illegal base64 data at input byte 0 +The push refers to a repository [docker.io/example/empty] +547b6288eb33: Layer already exists +errors: +denied: requested access to the resource is denied +unauthorized: authentication required +root@hello:~/workspace/dockerfile# docker login +Login with your Docker ID to push and pull images from Docker Hub. If you don't have a Docker ID, head over to https://hub.docker.com to create one. +Username: example +Password: +Login Succeeded +root@hello:~/workspace/dockerfile# docker push example/empty +The push refers to a repository [docker.io/example/empty] +547b6288eb33: Layer already exists +latest: digest: sha256:99d4fb4ce6c6f850f3b39f54f8eca0bbd9e92bd326761a61f106a10454b8900b size: 524 +``` diff --git a/docs/en/docs/container_engine/docker_engine/container_management_2.md b/docs/en/docs/container_engine/docker_engine/container_management_2.md new file mode 100644 index 0000000000000000000000000000000000000000..4d2afa2ed9c2646e3db8e1efda62c39e5f6d81fa --- /dev/null +++ b/docs/en/docs/container_engine/docker_engine/container_management_2.md @@ -0,0 +1,1232 @@ +# Container Management + +Subcommands supported by the current Docker are classified into the following groups by function: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Function

+

Command

+

Description

+

Host environment

+

version

+

Views the Docker version.

+

info

+

Views the Docker system and host environment information.

+

Container-related information

+

Container lifecycle management

+

create

+

Creates a container using an image.

+

run

+

Creates and runs a container using an image.

+

start

+

Starts a stopped container.

+

stop

+

Stops a running container.

+

restart

+

Restarts a container.

+

wait

+

Waits for a container to stop and prints the exit code.

+

rm

+

Deletes a container.

+

Container process management

+

pause

+

Suspends all processes in a container.

+

unpause

+

Resumes a suspended process in a container.

+

top

+

Views processes in a container.

+

exec

+

Executes a process in containers.

+

Container inspection tool

+

ps

+

Views running containers (without attaching any option).

+

logs

+

Displays the log information of a container.

+

attach

+

Connects standard input and output to a container.

+

inspect

+

Returns the bottom-layer information of a container.

+

port

+

Lists the port mappings between containers and hosts.

+

diff

+

Returns the changes made by the container compared with rootfs in the image.

+

cp

+

Copies files between containers and hosts.

+

export

+

Exports the file system in a container in a .tar package.

+

stats

+

Views the resource usage of a container in real time.

+

Images

+

Generates an image.

+

build

+

Creates an image using a Dockerfile.

+

commit

+

Creates an image based on the container rootfs.

+

import

+

Creates an image using the content in the .tar package as the file system.

+

load

+

Loads an image from the .tar package.

+

Image registry

+

login

+

Logs in to a registry.

+

logout

+

Logs out of a registry.

+

pull

+

Pulls an image from the registry.

+

push

+

Pushes an image to the registry.

+

search

+

Searches for an image in the registry.

+

Image management

+

images

+

Displays images in the system.

+

history

+

Displays the change history of an image.

+

rmi

+

Deletes an image.

+

tag

+

Adds a tag to an image.

+

save

+

Saves an image to a .tar package.

+

Others

+

events

+

Obtains real-time events from the Docker daemon.

+

rename

+

Renames a container.

+
+ +Some subcommands have some parameters, such as **docker run**. You can run the **docker **_command _**--help** command to view the help information of the command. For details about the command parameters, see the preceding command parameter description. The following sections describe how to use each command. + +## attach + +Syntax: **docker attach \[**_options_**\]** _container_ + +Function: Attaches an option to a running container. + +Parameter description: + +**--no-stdin=false**: Does not attach any STDIN. + +**--sig-proxy=true**: Proxies all signals of the container, except SIGCHLD, SIGKILL, and SIGSTOP. + +Example: + +```shell +$ sudo docker attach attach_test +root@2988b8658669:/# ls bin boot dev etc home lib lib64 media mnt opt proc root run sbin srv sys tmp usr var +``` + +## commit + +Syntax: **docker commit \[**_options_**\] **_container _**\[**_repository\[:tag\]_**\]** + +Function: creates an image from a container. + +Parameter description: + +**-a**, **--author=""**: specifies an author. + +**-m**, **--message=""**: specifies the submitted information. + +**-p**, **--pause=true**: pauses the container during submission. + +Example: + +Run the following command to start a container and submit the container as a new image: + +```shell +$ sudo docker commit test busybox:test +sha256:be4672959e8bd8a4291fbdd9e99be932912fe80b062fba3c9b16ee83720c33e1 + +$ sudo docker images +REPOSITORY TAG IMAGE ID CREATED SIZE +busybox latest e02e811dd08f 2 years ago 1.09MB +``` + +## cp + +Syntax: **docker cp \[**_options_**\] **_container_**:**_src\_path_ _dest\_path_**|-** + +**docker cp \[**_options_**\]** _src\_path_**|-** _container_**:**_dest\_path_ + +Function: Copies a file or folder from a path in a container to a path on the host or copies a file or folder from the host to the container: + +Precautions: The **docker cp** command does not support the copy of files in virtual file systems such as **/proc**, **/sys**, **/dev**, and **/tmp** in the container and files in the file systems mounted by users in the container. + +Parameter description: + +**-a**, **--archive**: Sets the owner of the file copied to the container to the **container** user \(**--user**\). + +**-L**, **--follow-link**: Parses and traces the symbolic link of a file. + +Example: + +Run the following command to copy the **/test** directory in the registry container to the **/home/**_aaa_ directory on the host: + +```shell +sudo docker cp registry:/test /home/aaa +``` + +## create + +Syntax: **docker create \[**_options_**\]** _image_ **\[**_command_**\] \[**_arg_**...\]** + +Function: Creates a container using an image file and return the ID of the container. After the container is created, run the **docker start** command to start the container. _options_ are used to configure the container during container creation. Some parameters will overwrite the container configuration in the image file. _command_ indicates the command to be executed during container startup. + +Parameter description: + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Description

+

-a --attach=[]

+

Attaches the console to the STDIN, STDOUT, and STDERR of the process in the container.

+

--name=""

+

Name of a container.

+

--add-host=[host:ip]

+

Adds a mapping between the host name and IP address to the /etc/hosts in the container.

+

For example, --add-host=test:10.10.10.10.

+

--annotation

+

Sets annotations for the container. For example, set the native.umask parameter.

+
--annotation native.umask=normal #The umask value of the started container is 0022.
+--annotation native.umask=secure #The umask value of the started container is 0027.
+

If this parameter is not set, the umask configuration in dockerd is used.

+

--blkio-weight

+

Relative weight of blockio, which ranges from 10 to 1000.

+

--blkio-weight-device=[]

+

Blockio weight, which configures the relative weight.

+

-c, --cpu-shares=0

+

Relative weight of the host CPU obtained by the container. This parameter can be used to obtain a higher priority. By default, all containers obtain the same CPU priority.

+

--cap-add=[]

+

Adds Linux functions.

+

--cap-drop=[]

+

Clears Linux functions.

+

--cgroup-parent

+

cgroup parent directory for the container.

+

--cidfile=""

+

Writes the container ID to a specified file.

+

For example: --cidfile=/home/cidfile-test writes the container ID to the /home/cidfile-test file.

+

--cpu-period

+

CPU CFS period.

+

The default value is 100 ms. Generally, --cpu-period and --cpu-quota are used together. For example, --cpu-period=50000 --cpu-quota=25000 indicates that if there is one CPU, the container can obtain 50% of the CPU every 50 ms.

+

--cpus=0.5 has the same effect.

+

--cpu-quota

+

CPU CFS quota. The default value is 0, indicating that there is no restriction on the quota.

+

--cpuset-cpus

+

Number of CPUs (0-3, 0, 1) that can be used by processes in the container. By default, there is no restriction on this parameter.

+

--cpuset-mems

+

Memory nodes (0-3, 0, 1) for running processes in the container. This parameter is valid only for the NUMA system.

+

--device=[]

+

Adds the host device to a container, for example, --device=/dev/sdc:/dev/xvdc:rwm.

+

--dns=[]

+

Forcibly enables the container to use the specified DNS server. For example, --dns=114.114.xxx.xxx indicates that nameserver 114.114.xxx.xxx is written to /etc/resolv.conf of the created container and the original content is overwritten.

+

--dns-opt=[]

+

DNS options.

+

--dns-search=[]

+

Forcibly searches DNS search domain name used by a container.

+

-e, --env=[]

+

Sets environment variable for the container.

+

--env=[KERNEL_MODULES=]:

+

Inserts a specified module into a container. Currently, only the modules on the host can be inserted. After the container is deleted, the modules still reside on the host, and the --hook-spec option must be configured for the container. The following are valid parameter formats:

+

KERNEL_MODULERS=

+

KERNEL_MODULERS=a

+

KERNEL_MODULERS=a,b

+

KERNEL_MODULERS=a,b,

+

--entrypoint=""

+

Overwrites the original entrypoint in the image. The entrypoint is used to set the command executed when the container is started.

+

--env-file=[]

+

Reads environment variables from a file. Multiple environment variables are separated by lines in the file. For example: --env-file=/home/test/env indicates multiple environment variables are stored in the env file.

+

--expose=[]

+

Enables an internal port of a container. The -P option described in the following section maps the enabled port to a port on the host.

+

--group-add=[]

+

Adds a specified container to an additional group.

+

-h, --hostname=""

+

Host name.

+

--health-cmd

+

Container health check command.

+

--health-interval

+

Interval between two consecutive command executions. The default value is 30s.

+

--health-timeout

+

Maximum duration for executing a single check command. If the execution times out, the command fails to be executed. The default value is 30s.

+

--health-start-period

+

Interval between the time when the container is started and the time when the first health check is performed. The default value is 0s.

+

--health-retries

+

Maximum number of retries after a health check fails. The default value is 3.

+

--health-exit-on-unhealthy

+

Specifies whether to stop a container when the container is unhealthy. The default value is false.

+

--host-channel=[]

+

Sets a channel for communication between processes in the container and the host, in host path:container path:rw/ro:size limit format.

+

-i, --interactive=false

+

Enables STDIN even if it is not attached.

+

--ip

+

IPv4 address of a container.

+

--ip6

+

IPv6 address of a container.

+

--ipc

+

IPC namespace of a container.

+

--isolation

+

Container isolation policy.

+

-l, --label=[]

+

Label of a container.

+

--label-file=[]

+

Obtains the label from the file.

+

--link=[]

+

Links to another container. This parameter adds environment variables of the IP address and port number of the linked container to the container and adds a mapping to the /etc/hosts file, for example, --link=name:alias.

+

--log-driver

+

Log driver of a container.

+

--log-opt=[]

+

Log driver option.

+

-m, --memory=""

+

Memory limit of a container. The format is numberoptional unit, and available units are b, k, m, and g. The minimum value of this parameter is 4m.

+

--mac-address

+

MAC address of a container, for example, 92:d0:c6:0a:xx:xx.

+

--memory-reservation

+

Container memory limit. The default value is the same as that of --memory. --memory is a hard limit, and --memory-reservation is a soft limit. When the memory usage exceeds the preset value, the memory usage is dynamically adjusted (the system attempts to reduce the memory usage to a value less than the preset value when reclaiming the memory). However, the memory usage may exceed the preset value. Generally, this parameter can be used together with --memory. The value must be less than the preset value of --memory.

+

--memory-swap

+

Total usage of the common memory and swap partition. -1 indicates no restriction is set on the usage. If this parameter is not set, the swap partition size is twice the value of --memory. That is, the swap partition can use the same amount of memory as --memory.

+

--memory-swappiness=-1

+

Time when the container uses the swap memory. The value ranges from 0 to 100, in percentage.

+

--net="bridge"

+

Network mode of the container. Docker 1.3.0 has the following network modes: bridge, host, none, and container:name|id. The default value is bridge.

+
  • bridge: Creates a network stack on the bridge when the Docker daemon is started.
  • host: Uses the network stack of the host in the container.
  • none: Does not use networks.
  • container:name|id: Reuses the network stack of another container.
+

--no-healthcheck

+

Does not perform health check for a container.

+

--oom-kill-disable

+

Disables the OOM killer. You are advised not to set this parameter if the -m parameter is not set.

+

--oom-score-adj

+

Adjusts the OOM rule of a container. The value ranges from -1000 to 1000.

+

-P, --publish-all=false

+

Maps all enabled ports of a container to host ports. Containers can be accessed through the host ports. You can run the docker port command to view the mapping between container ports and host ports.

+

-p, --publish=[]

+

Maps a port in a container to a port on the host, in IP address:host port:container port | IP address::container port | host port:container port | container port format. If no IP address is configured, accesses of all NICs on the host is listened. If no host port is configured, the host port is automatically allocated.

+

--pid

+

PID namespace of a container.

+

--privileged=false

+

Grants extra permission to a container. If the --privileged option is used, the container can access all devices on the host.

+

--restart=""

+

Configures restart rule when the container exits. Currently, version 1.3.1 supports the following rules:

+
  • no: indicates that the container is not restarted when it is stopped.
  • on-failure: indicates that the container is restarted when the container exit code is not 0. This rule can be used to add the maximum number of restart times, for example, on-failure:5, indicating that the container can be restarted for a maximum of five times.
  • always: indicates the container is exited regardless of the exit code.
+

--read-only

+

Mounts the root file system of the container in read-only mode.

+

--security-opt=[]

+

Container security rule.

+

--shm-size

+

Size of the /dev/shm device. The default value is 64M.

+

--stop-signal=SIGTERM

+

Container stop signal. The default value is SIGTERM.

+

-t, --tty=false

+

Allocates a pseudo terminal.

+

--tmpfs=[]

+

Mounts the tmpfs directory.

+

-u, --user=""

+

User name or user ID.

+

--ulimit=[]

+

ulimit option.

+

--userns

+

User namespace of a container.

+

-v, --volume=[]

+

Mounts a directory of the host to the container, or create a volume in the container. For example, -v /home/test:/home mounts the /home/test directory of the host to the /home directory of the container, and -v /tmp creates the tmp folder in the root directory of the container, the folder can be shared by other containers using the --volumes-from option. The host directory cannot be mounted to the /proc subdirectory of the container. Otherwise, an error is reported when the container is started.

+

--volume-driver

+

Data volume driver of the container. This parameter is optional.

+

--volumes-from=[]

+

Mounts the volume of another container to the current container to share the volume. For example, -volumes-from container_name mounts the volume of container_name to the current container. -v and --volumes-from=[] are two very important options for data backup and live migration.

+

-w, --workdir=""

+

Specifies the working directory of the container.

+
+ +Example: + +Run the following command to create a container named **busybox** and run the **docker start** command to start the container. + +```shell +sudo docker create -ti --name=busybox busybox /bin/bash +``` + +## diff + +Syntax: **docker diff** _container_ + +Function: Checks the differences between containers and determines the changes have been made compared with the container creation. + +Parameter description: none. + +Example: + +```shell +$ sudo docker diff registry +C /root +A /root/.bash_history +A /test +``` + +## exec + +Syntax: **docker exec \[**_options_**\]** _container_ _command_ **\[**_arg..._**\]** + +Function: Runs a command in the container. + +Parameter description: + +**-d** and **--detach=false**: Run in the background. + +**-i** and **--interactive=false**: Keep the STDIN of the container enabled. + +**-t** and **--tty=false**: Allocate a virtual terminal. + +**--privileged**: Executes commands in privilege mode. + +**-u** and **--user**: Specifies the user name or UID. + +Example: + +```shell +$ sudo docker exec -ti exec_test ls +bin etc lib media opt root sbin sys tmp var +dev home lib64 mnt proc run srv test usr +``` + +## export + +Syntax: **docker export** _container_ + +Function: Exports the file system content of a container to STDOUT in .tar format. + +Parameter description: none. + +Example: + +Run the following commands to export the contents of the container named **busybox** to the **busybox.tar** package: + +```shell +$ sudo docker export busybox > busybox.tar +$ ls +busybox.tar +``` + +## inspect + +Syntax: **docker inspect \[**_options_**\] **_container_**|**_image_**\[**_container_|_image..._**\]** + +Function: Returns the underlying information about a container or image. + +Parameter description: + +**-f** and **--format=""**: Output information in a specified format. + +**-s** and **--size**: Display the total file size of the container when the query type is container. + +**--type**: Returns the JSON format of the specified type. + +**-t** and **--time=120**: Timeout interval, in seconds. If the **docker inspect** command fails to be executed within the timeout interval, the system stops waiting and immediately reports an error. The default value is **120**. + +Example: + +1. Run the following command to return information about a container: + + ```shell + $ sudo docker inspect busybox_test + [ + { + "Id": "9fbb8649d5a8b6ae106bb0ac7686c40b3cbd67ec2fd1ab03e0c419a70d755577", + "Created": "2019-08-28T07:43:51.27745746Z", + "Path": "bash", + "Args": [], + "State": { + "Status": "running", + "Running": true, + "Paused": false, + "Restarting": false, + "OOMKilled": false, + "Dead": false, + "Pid": 64177, + "ExitCode": 0, + "Error": "", + "StartedAt": "2019-08-28T07:43:53.021226383Z", + "FinishedAt": "0001-01-01T00:00:00Z" + }, + ...... + ``` + +2. Run the following command to return the specified information of a container in a specified format. The following uses the IP address of the busybox\_test container as an example. + + ```shell + $ sudo docker inspect -f {{.NetworkSettings.IPAddress}} busybox_test + 172.17.0.91 + ``` + +## logs + +Syntax: **docker logs \[**_options_**\]** _container_ + +Function: Captures logs in a container that is in the **running** or **stopped** state. + +Parameter description: + +**-f** and **--follow=false**: Print logs in real time. + +**-t** and **--timestamps=false**: Display the log timestamp. + +**--since**: Displays logs generated after the specified time. + +**--tail="all"**: Sets the number of lines to be displayed. By default, all lines are displayed. + +Example: + +1. Run the following command to check the logs of the jaegertracing container where a jaegertracing service runs: + + ```shell + $ sudo docker logs jaegertracing + {"level":"info","ts":1566979103.3696961,"caller":"healthcheck/handler.go:99","msg":"Health Check server started","http-port":14269,"status":"unavailable"} + {"level":"info","ts":1566979103.3820567,"caller":"memory/factory.go:55","msg":"Memory storage configuration","configuration":{"MaxTraces":0}} + {"level":"info","ts":1566979103.390773,"caller":"tchannel/builder.go:94","msg":"Enabling service discovery","service":"jaeger-collector"} + {"level":"info","ts":1566979103.3908608,"caller":"peerlistmgr/peer_list_mgr.go:111","msg":"Registering active peer","peer":"127.0.0.1:14267"} + {"level":"info","ts":1566979103.3922884,"caller":"all-in-one/main.go:186","msg":"Starting agent"} + {"level":"info","ts":1566979103.4047635,"caller":"all-in-one/main.go:226","msg":"Starting jaeger-collector TChannel server","port":14267} + {"level":"info","ts":1566979103.404901,"caller":"all-in-one/main.go:236","msg":"Starting jaeger-collector HTTP server","http-port":14268} + {"level":"info","ts":1566979103.4577134,"caller":"all-in-one/main.go:256","msg":"Listening for Zipkin HTTP traffic","zipkin.http-port":9411} + ``` + +2. Add **-f** to the command to output the logs of the jaegertracing container in real time. + + ```shell + $ sudo docker logs -f jaegertracing + {"level":"info","ts":1566979103.3696961,"caller":"healthcheck/handler.go:99","msg":"Health Check server started","http-port":14269,"status":"unavailable"} + {"level":"info","ts":1566979103.3820567,"caller":"memory/factory.go:55","msg":"Memory storage configuration","configuration":{"MaxTraces":0}} + {"level":"info","ts":1566979103.390773,"caller":"tchannel/builder.go:94","msg":"Enabling service discovery","service":"jaeger-collector"} + {"level":"info","ts":1566979103.3908608,"caller":"peerlistmgr/peer_list_mgr.go:111","msg":"Registering active peer","peer":"127.0.0.1:14267"} + {"level":"info","ts":1566979103.3922884,"caller":"all-in-one/main.go:186","msg":"Starting agent"} + ``` + +## pause/unpause + +Syntax: **docker pause** _container_ + +**docker unpause** _container_ + +Function: The two commands are used in pairs. The **docker pause** command suspends all processes in a container, and the **docker unpause** command resumes the suspended processes. + +Parameter description: none. + +Example: + +The following uses a container where the docker registry service runs as an example. After the **docker pause** command is executed to pause the process of the container, access of the registry service by running the **curl** command is blocked. You can run the **docker unpause** command to resume the suspended registry service. The registry service can be accessed by running the **curl** command. + +1. Run the following command to start a registry container: + + ```shell + sudo docker run -d --name pause_test -p 5000:5000 registry + ``` + + Run the **curl** command to access the service. Check whether the status code **200 OK** is returned. + + ```shell + sudo curl -v 127.0.0.1:5000 + ``` + +2. Run the following command to stop the processes in the container: + + ```shell + sudo docker pause pause_test + ``` + + Run the **curl** command to access the service to check whether it is blocked and wait until the service starts. + +3. Run the following command to resume the processes in the container: + + ```shell + sudo docker unpause pause_test + ``` + + The cURL access in step 2 is restored and the request status code **200 OK** is returned. + +## port + +Syntax: **docker port **_container_ **\[**_private\_port\[/proto\]_**\]** + +Function: Lists the port mapping of a container or queries the host port where a specified port resides. + +Parameter description: none. + +Example: + +1. Run the following command to list all port mappings of a container: + + ```shell + $ sudo docker port registry + 5000/tcp -> 0.0.0.0.:5000 + ``` + +2. Run the following command to query the mapping of a specified container port: + + ```shell + $ sudo docker port registry 5000 + 0.0.0.0.:5000 + ``` + +## ps + +Syntax:**docker ps \[**_options_**\]** + +Function: Lists containers in different states based on different parameters. If no parameter is added, all running containers are listed. + +Parameter description: + +**-a** and **--all=false**: Display the container. + +**-f** and **--filter=\[\]**: Filter values. The available options are: **exited=**_int_ \(exit code of the container\) **status=**_restarting|running|paused|exited_ \(status code of the container\), for example, **-f status=running**: lists the running containers. + +**-l** and **--latest=false**: List the latest created container. + +**-n=-1**: Lists the latest created _n_ containers. + +**--no-trunc=false**: Displays all 64-bit container IDs. By default, 12-bit container IDs are displayed. + +**-q** and **--quiet=false**: Display the container ID. + +**-s** and **--size=false**: Display the container size. + +Example: + +1. Run the following command to lists running containers: + + ```shell + sudo docker ps + ``` + +2. Run the following command to display all containers: + + ```shell + sudo docker ps -a + ``` + +## rename + +Syntax: **docker rename OLD\_NAME NEW\_NAME** + +Function: Renames a container. + +Example: + +Run the **docker run** command to create and start a container, run the **docker rename** command to rename the container, and check whether the container name is changed. + +```shell +$ sudo docker ps +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +b15976967abb busybox:latest "bash" 3 seconds ago Up 2 seconds festive_morse +$ sudo docker rename pedantic_euler new_name +$ sudo docker ps +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +b15976967abb busybox:latest "bash" 34 seconds ago Up 33 seconds new_name +``` + +## restart + +Syntax: **docker restart \[**_options_**\]** _container_ **\[**_container..._**\]** + +Function: Restarts a running container. + +Parameter description: + +**-t** and **--time=10**: Number of seconds to wait for the container to stop before the container is killed. If the container has stopped, restart the container. The default value is **10**. + +Example: + +```shell +sudo docker restart busybox +``` + +>[!NOTE]NOTE +>During the container restart, if a process in the **D** or **Z** state exists in the container, the container may fail to be restarted. In this case, you need to analyze the cause of the **D** or **Z** state of the process in the container. Restart the container after the **D** or **Z** state of the process in the container is released. + +## rm + +Syntax: **docker rm \[**_options_**\] **_container_ **\[**_container..._**\]** + +Function: Deletes one or more containers. + +Parameter description: + +**-f** and **--force=false**: Forcibly delete a running container. + +**-l** and **--link=false**: Remove the specified link and do not remove the underlying container. + +**-v** and **--volumes=false**: Remove the volumes associated with the container. + +Example: + +1. Run the following command to delete a stopped container: + + ```shell + sudo docker rm test + ``` + +2. Run the following command to delete a running container: + + ```shell + sudo docker rm -f rm_test + ``` + +## run + +Syntax: **docker run \[**_options_**\] **_image_ **\[**_command_**\] \[**_arg_**...\]** + +Function: Creates a container from a specified image \(if the specified image does not exist, an image is downloaded from the official image registry\), starts the container, and runs the specified command in the container. This command integrates the **docker create**, **docker start**, and **docker exec** commands. + +Parameter description: \(The parameters of this command are the same as those of the **docker create** command. For details, see the parameter description of the **docker create** command. Only the following two parameters are different.\) + +**--rm=false**: Specifies the container to be automatically deleted when it exits. + +**-v**: Mounts a local directory or an anonymous volume to the container. Note: When a local directory is mounted to a container with a SELinux security label, do not add or delete the local directory at the same time. Otherwise, the security label may not take effect. + +**--sig-proxy=true**: Receives proxy of the process signal. SIGCHLD, SIGSTOP, and SIGKILL do not use the proxy. + +Example: + +Run the busybox image to start a container and run the **/bin/sh** command after the container is started: + +```shell +sudo docker run -ti busybox /bin/sh +``` + +## start + +Syntax: **docker start \[**_options_**\]** _container_ **\[**_container_**...\]** + +Function: Starts one or more containers that are not running. + +Parameter description: + +**-a** and **--attach=false**: Attach the standard output and error output of a container to STDOUT and STDERR of the host. + +**-i** and **--interactive=false**: Attach the standard input of the container to the STDIN of the host. + +Example: + +Run the following command to start a container named **busybox** and add the **-i -a** to the command to add standard input and output. After the container is started, directly enter the container. You can exist the container by entering **exit**. + +If **-i -a** is not added to the command when the container is started, the container is started in the background. + +```shell +sudo docker start -i -a busybox +``` + +## stats + +Syntax: **docker stats \[**_options_**\] \[**_container_**...\]** + +Function: Continuously monitors and displays the resource usage of a specified container. \(If no container is specified, the resource usage of all containers is displayed by default.\) + +Parameter description: + +**-a**, and **--all**: Display information about all containers. By default, only running containers are displayed. + +**--no-stream**: Displays only the first result and does not continuously monitor the result. + +Example: + +Run the **docker run** command to start and create a container, and run the **docker stats** command to display the resource usage of the container: + +```shell +$ sudo docker stats +CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS +2e242bcdd682 jaeger 0.00% 77.08MiB / 125.8GiB 0.06% 42B / 1.23kB 97.9MB / 0B 38 +02a06be42b2c relaxed_chandrasekhar 0.01% 8.609MiB / 125.8GiB 0.01% 0B / 0B 0B / 0B 10 +deb9e49fdef1 hardcore_montalcini 0.01% 12.79MiB / 125.8GiB 0.01% 0B / 0B 0B / 0B 9 +``` + +## stop + +Syntax: **docker stop \[**_options_**\]** _container_ **\[**_container_**...\]** + +Function: Sends a SIGTERM signal to a container and then sends a SIGKILL signal to stop the container after a certain period. + +Parameter description: + +**-t** and **--time=10**: Number of seconds that the system waits for the container to exit before the container is killed. The default value is **10**. + +Example: + +```shell +sudo docker stop -t=15 busybox +``` + +## top + +Syntax: **docker top** _container_ **\[**_ps options_**\]** + +Function: Displays the processes running in a container. + +Parameter description: none. + +Example: + +Run the top\_test container and run the **top** command in the container. + +```shell +$ sudo docker top top_test +UID PID PPID C STIME TTY TIME CMD +root 70045 70028 0 15:52 pts/0 00:00:00 bash +``` + +The value of **PID** is the PID of the process in the container on the host. + +## update + +Syntax: **docker update \[**_options_**\]** _container_ **\[**_container_**...\]** + +Function: Hot changes one or more container configurations. + +Parameter description: + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Description

+

--accel=[]

+

Configures one or more container accelerators.

+

--blkio-weight

+

Relative weight of the container blockio. The value ranges from 10 to 1000.

+

--cpu-shares

+

Relative weight of the host CPU obtained by the container. This parameter can be used to obtain a higher priority. By default, all containers obtain the same CPU priority.

+

--cpu-period

+

CPU CFS period.

+

The default value is 100 ms. Generally, --cpu-period and --cpu-quota are used together. For example, --cpu-period=50000 --cpu-quota=25000 indicates that if there is one CPU, the container can obtain 50% of the CPU every 50 ms.

+

--cpu-quota

+

CPU CFS quota. The default value is 0, indicating that there is no restriction on the quota.

+

--cpuset-cpus

+

Number of CPUs (0-3, 0, 1) that can be used by processes in the container. By default, there is no restriction on this parameter.

+

--cpuset-mems

+

Memory nodes (0-3, 0, 1) for running processes in the container. This parameter is valid only for the NUMA system.

+

--kernel-memory=""

+

Kernel memory limit of a container. The format is numberoptional unit, and available units are b, k, m, and g.

+

-m, --memory=""

+

Memory limit of a container. The format is numberoptional unit, and available units are b, k, m, and g. The minimum value of this parameter is 4m.

+

--memory-reservation

+

Container memory limit. The default value is the same as that of --memory. --memory is a hard limit, and --memory-reservation is a soft limit. When the memory usage exceeds the preset value, the memory usage is dynamically adjusted (the system attempts to reduce the memory usage to a value less than the preset value when reclaiming the memory). However, the memory usage may exceed the preset value. Generally, this parameter can be used together with --memory. The value must be less than the preset value of --memory.

+

--memory-swap

+

Total usage of the common memory and swap partition. -1 indicates no restriction is set on the usage. If this parameter is not set, the swap partition size is twice the value of --memory. That is, the swap partition can use the same amount of memory as --memory.

+

--restart=""

+

Configures restart rule when the container exits. Currently, version 1.3.1 supports the following rules:

+
  • no: indicates that the container is not restarted when it is stopped.
  • on-failure: indicates that the container is restarted when the container exit code is not 0. This rule can be used to add the maximum number of restart times, for example, on-failure:5, indicating that the container can be restarted for a maximum of five times.
  • always: indicates the container is exited regardless of the exit code.
+

--help

+

Help information.

+
+ +Example: + +Run the following command to change the CPU and memory configurations of the container named **busybox**, including changing the relative weight of the host CPU obtained by the container to **512**, the CPU cores that can be run by processes in the container to **0,1,2,3**, and the memory limit for running the container to **512 m**. + +```shell +sudo docker update --cpu-shares 512 --cpuset-cpus=0,3 --memory 512m ubuntu +``` + +## wait + +Syntax: **docker wait** _container_ **\[**_container..._**\]** + +Function: Waits for a container to stop and print the exit code of the container: + +Parameter description: none. + +Example: + +Run the following command to start a container named **busybox**: + +```shell +sudo docker start -i -a busybox +``` + +Run the **docker wait** command: + +```shell +$ sudo docker wait busybox +0 +``` + +Wait until the busybox container exits. After the busybox container exits, the exit code **0** is displayed. diff --git a/docs/en/docs/container_engine/docker_engine/image_management_1.md b/docs/en/docs/container_engine/docker_engine/image_management_1.md new file mode 100644 index 0000000000000000000000000000000000000000..9ea569234226b5461e29b7c0a1817bbb122257bd --- /dev/null +++ b/docs/en/docs/container_engine/docker_engine/image_management_1.md @@ -0,0 +1,51 @@ +# Image Management + +## Creating an Image + +You can use the **docker pull**, **docker build**,**docker commit**, **docker import**, or **docker load** command to create an image. For details about how to use these commands, see [Image Management](image_management_2.md). + +### Precautions + +1. Do not concurrently run the **docker load** and **docker rmi** commands. If both of the following conditions are met, concurrency problems may occur: + + - An image exists in the system. + - The docker rmi and docker load operations are concurrently performed on an image. + + Therefore, avoid this scenario. \(All concurrent operations between the image creation operations such as running the **tag**, **build**, and **load**, and **rmi** commands, may cause similar errors. Therefore, do not concurrently perform these operations with **rmi**.\) + +2. If the system is powered off when docker operates an image, the image may be damaged. In this case, you need to manually restore the image. + + When the docker operates images \(using the **pull**, **load**, **rmi**, **build**, **combine**, **commit**, or **import** commands\), image data operations are asynchronous, and image metadata is synchronous. Therefore, if the system power is off when not all image data is updated to the disk, the image data may be inconsistent with the metadata. Users can view images \(possibly none images\), but cannot start containers, or the started containers are abnormal. In this case, run the **docker rmi** command to delete the image and perform the previous operations again. The system can be recovered. + +3. Do not store a large number of images on nodes in the production environment. Delete unnecessary images in time. + + If the number of images is too large, the execution of commands such as **docker image** is slow. As a result, the execution of commands such as **docker build** or **docker commit** fails, and the memory may be stacked. In the production environment, delete unnecessary images and intermediate process images in time. + +4. When the **--no-parent** parameter is used to build images, if multiple build operations are performed at the same time and the FROM images in the Dockerfile are the same, residual images may exist. There are two cases: + - If FROM images are incomplete, the images generated when images of FROM are running may remain. Names of the residual images are similar to **base\_v1.0.0-app\_v2.0.0**, or they are none images. + - If the first several instructions in the Dockerfile are the same, none images may remain. + +### None Image May Be Generated + +1. A none image is the top-level image without a tag. For example, the image ID of **ubuntu** has only one tag **ubuntu**. If the tag is not used but the image ID is still available, the image ID becomes a none image. +2. An image is protected because the image data needs to be exported during image saving. However, if a deletion operation is performed, the image may be successfully untagged and the image ID may fail to be deleted \(because the image is protected\). As a result, the image becomes a none image. +3. If the system is powered off when you run the **docker pull** command or the system is in panic, a none image may be generated. To ensure image integrity, you can run the **docker rmi** command to delete the image and then restart it. +4. If you run the **docker save** command to save an image and specify the image ID as the image name, the loaded image does not have a tag and the image name is **none**. + +### A Low Probability That Image Fails to Be Built If the Image Is Deleted When Being Built + +Currently, the image build process is protected by reference counting. After an image is built, reference counting of the image is increased by 1 \(holdon operation\). Once the holdon operation is successful, the image will not be deleted. However, there is a low probability that before the holdon operation is performed, the image can still be deleted, causing the image build failure. + +## Viewing Images + +Run the following command to view the local image list: + +```shell +docker images +``` + +## Deleting Images + +### Precautions + +Do not run the **docker rmi -f**_XXX_ command to delete images. If you forcibly delete an image, the **docker rmi** command ignores errors during the process, which may cause residual metadata of containers or images. If you delete an image in common mode and an error occurs during the deletion process, the deletion fails and no metadata remains. diff --git a/docs/en/docs/container_engine/docker_engine/image_management_2.md b/docs/en/docs/container_engine/docker_engine/image_management_2.md new file mode 100644 index 0000000000000000000000000000000000000000..7efe0f27b4599d46e43ac9994e37d7597d0443db --- /dev/null +++ b/docs/en/docs/container_engine/docker_engine/image_management_2.md @@ -0,0 +1,450 @@ +# Image Management + +## build + +Syntax: **docker build \[**_options_**\]** _path_ **|** _URL_ **| -** + +Function: Builds an image using the Dockerfile in the specified path. + +Parameter description: Common parameters are as follows. For details about more parameters, see the **docker help build** command section. + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Description

+

--force-rm=false

+

Deletes containers generated during the build process even if the build is not successful.

+

--no-cache=false

+

Builds cache without using cache.

+

-q, --quiet=false

+

Prevents the redundant information generation during the build.

+

--rm=true

+

Deletes the container generated during the build after the build is successful.

+

-t, --tag=""

+

Tag name of the image generated during the build.

+

--build-arg=[]

+

Configures the build parameters.

+

--label=[]

+

Image-related parameters. The description of each parameter is similar to that of the create command.

+

--isolation

+

Container isolation method.

+

--pull

+

Obtains the latest image during the build.

+
+ +**Dockerfile Command** + +Dockerfile is used to describe how to build an image and automatically build a container. The format of all **Dockerfile** commands is _instruction_ _arguments_. + +**FROM Command** + +Syntax: **FROM** _image_ or **FROM** _image_:_tag_ + +Function: Specifies a basic image, which is the first command for all Dockerfile files. If the tag of a basic image is not specified, the default tag name **latest** is used. + +**RUN Command** + +Syntax: **RUN** _command_ \(for example, **run in a shell - \`/bin/sh -c\`**\) or + +**RUN \[**_executable_, _param1_, _param2_ ... **\]** \(in the **exec** command format\) + +Function: Runs any command in the image specified by the **FROM** command and then commits the result. The committed image can be used in later commands. The **RUN** command is equivalent to: + +**docker run** _image_ _command_ + +**docker commit** _container\_id_ + +**Remarks** + +The number sign \(\#\) is used to comment out. + +**MAINTAINER Command** + +Syntax: **MAINTAINER**_name_ + +Function: Specifies the name and contact information of the maintenance personnel. + +**ENTRYPOINT Command** + +Syntax: **ENTRYPOINT cmd **_param1 param2..._ or **ENTRYPOINT \[**_"cmd", "param1", "param2"..._**\]** + +Function: Configures the command to be executed during container startup. + +**USER Command** + +Syntax: **USER**_name_ + +Function: Specifies the running user of memcached. + +**EXPOSE Command** + +Syntax: **EXPOSE **_port_** \[**_port_**...\]** + +Function: Enables one or more ports for images. + +**ENV Command** + +Syntax: **ENV**_key value_ + +Function: Configures environment variables. After the environment variables are configured, the **RUN** commands can be subsequently used. + +**ADD Command** + +Syntax: **ADD**_src dst_ + +Function: Copies a file from the _src_ directory to the _dest_ directory of a container. _src_ indicates the relative path of the source directory to be built. It can be the path of a file or directory, or a remote file URL. _dest_ indicates the absolute path of the container. + +**VOLUME Command** + +Syntax: **VOLUME \["**_mountpoint_**"\]** + +Function: Creates a mount point for sharing a directory. + +**WORKDIR Command** + +Syntax: **workdir**_path_ + +Function: Runs the **RUN**, **CMD**, and **ENTRYPOINT** commands to set the current working path. The current working path can be set multiple times. If the current working path is a relative path, it is relative to the previous **WORKDIR** command. + +**CMD command** + +Syntax: **CMD \[**_"executable","param1","param2"_**\]** \(This command is similar to the **exec** command and is preferred.\) + +**CMD \["**_param1_**","**_param2_**"\]** \(The parameters are the default parameters for ENTRYPOINT.\) + +**CMD** _command_ _param1_ _param2_ \(This command is similar to the **shell** command.\) + +Function: A Dockerfile can contain only one CMD command. If there are multiple CMD commands, only the last one takes effect. + +**ONBUILD Commands** + +Syntax: **ONBUILD \[**_other commands_**\]** + +Function: This command is followed by other commands, such as the **RUN** and **COPY** commands. This command is not executed during image build and is executed only when the current image is used as the basic image to build the next-level image. + +The following is a complete example of the Dockerfile command that builds an image with the sshd service installed. + +```text +FROM busybox +ENV http_proxy http://192.168.0.226:3128 +ENV https_proxy https://192.168.0.226:3128 +RUN apt-get update && apt-get install -y openssh-server +RUN mkdir -p /var/run/sshd +EXPOSE 22 +ENTRYPOINT /usr/sbin/sshd -D +``` + +Example: + +1. Run the following command to build an image using the preceding Dockerfile: + + ```shell + sudo docker build -t busybox:latest + ``` + +2. Run the following command to view the generated image: + + ```shell + docker images | grep busybox + ``` + +## history + +Syntax: **docker history \[**_options_**\]** _image_ + +Function: Displays the change history of an image. + +Parameter description: + +-H, --human=true + +**--no-trunc=false**: Does not delete any output. + +**-q** and **--quiet=false**: Display only IDs. + +Example: + +```shell +$ sudo docker history busybox:test +IMAGE CREATED CREATED BY SIZE COMMENT +be4672959e8b 15 minutes ago bash 23B +21970dfada48 4 weeks ago 128MB Imported from - +``` + +## images + +Syntax: **docker images \[**_options_**\] \[**_name_**\]** + +Function: Lists existing images. The intermediate image is not displayed if no parameter is configured. + +Parameter description: + +**-a** and **--all=false**: Display all images. + +**-f** and **--filter=\[\]**: Specify a filtering value, for example, **dangling=true**. + +**--no-trunc=false**: Does not delete any output. + +**-q** and **--quiet=false**: Display only IDs. + +Example: + +```shell +$ sudo docker images +REPOSITORY TAG IMAGE ID CREATED SIZE +busybox latest e02e811dd08f 2 years ago 1.09MB +``` + +## import + +Syntax: **docker import URL|- \[**_repository_**\[**_:tag_**\]\]** + +Function: Imports a .tar package that contains rootfs as an image. This parameter corresponds to the **docker export** command. + +Parameter description: none. + +Example: + +Run the following command to generate a new image for **busybox.tar** exported using the **docker export** command: + +```shell +$ sudo docker import busybox.tar busybox:test +sha256:a79d8ae1240388fd3f6c49697733c8bac4d87283920defc51fb0fe4469e30a4f +$ sudo docker images +REPOSITORY TAG IMAGE ID CREATED SIZE +busybox test a79d8ae12403 2 seconds ago 1.3MB +``` + +## load + +Syntax: **docker load \[**_options_**\]** + +Function: Reloads an image from .tar package obtained by running the **docker save** command. This parameter corresponds to the **docker save** command. + +Parameter description: + +**-i** and **--input=""** can be used. + +Example: + +```shell +$ sudo docker load -i busybox.tar +Loaded image ID: sha256:e02e811dd08fd49e7f6032625495118e63f597eb150403d02e3238af1df240ba +$ sudo docker images +REPOSITORY TAG IMAGE ID CREATED SIZE +busybox latest e02e811dd08f 2 years ago 1.09MB +``` + +## login + +Syntax: **docker login \[**_options_**\] \[**_server_**\]** + +Function: Logs in to an image server. If no server is specified, the system logs in to **** by default. + +Parameter description: + +**-e** and **--email=""**: Email address. + +**-p** and **--password=""**: Password. + +**-u** and **--username=""**: User name. + +Example: + +```shell +sudo docker login +``` + +## logout + +Syntax: **docker logout \[**_server_**\]** + +Function: Logs out of an image server. If no server is specified, the system logs out of **** by default. + +Parameter description: none. + +Example: + +```shell +sudo docker logout +``` + +## pull + +Syntax: **docker pull \[**_options_**\]** _name_**\[**_:tag_**\]** + +Function: Pulls an image from an official or private registry. + +Parameter description: + +**-a** and **--all-tags=false**: Download all images in a registry. \(A registry can be tagged with multiple tags. For example, a busybox registry may have multiple tags, such as **busybox:14.04**, **busybox:13.10**, **busybox:latest**. If **-a** is used, all busybox images with tags are pulled.\) + +Example: + +1. Run the following command to obtain the Nginx image from the official registry: + + ```shell + $ sudo docker pull nginx + Using default tag: latest + latest: Pulling from official/nginx + 94ed0c431eb5: Pull complete + 9406c100a1c3: Pull complete + aa74daafd50c: Pull complete + Digest: sha256:788fa27763db6d69ad3444e8ba72f947df9e7e163bad7c1f5614f8fd27a311c3 + Status: Downloaded newer image for nginx:latest + ``` + + When an image is pulled, the system checks whether the dependent layer exists. If yes, the local layer is used. + +2. Pull an image from a private registry. + + Run the following command to pull the Fedora image from the private registry, for example, the address of the private registry is **192.168.1.110:5000**: + + ```shell + sudo docker pull 192.168.1.110:5000/fedora + ``` + +## push + +Syntax: **docker push** _name_**\[**_:tag_**\]** + +Function: Pushes an image to the image registry. + +Parameter description: none. + +Example: + +1. Run the following command to push an image to the private image registry at 192.168.1.110:5000. +2. Label the image to be pushed. \(The **docker tag** command is described in the following section.\) In this example, the image to be pushed is busybox:sshd. + + ```shell + sudo docker tag ubuntu:sshd 192.168.1.110:5000/busybox:sshd + ``` + +3. Run the following command to push the tagged image to the private image registry: + + ```shell + sudo docker push 192.168.1.110:5000/busybox:sshd + ``` + + During the push, the system automatically checks whether the dependent layer exists in the image registry. If yes, the layer is skipped. + +## rmi + +Syntax: **docker rmi \[**_options_**\] **_image _**\[**_image..._**\]** + +Function: Deletes one or more images. If an image has multiple tags in the image library, only the untag operation is performed when the image is deleted. If the image has only one tag, the dependent layers are deleted in sequence. + +Parameter description: + +**-f** and **--force=false**: Forcibly delete an image. + +**--no-prune=false**: Does not delete parent images without tags. + +Example: + +```shell +sudo docker rmi 192.168.1.110:5000/busybox:sshd +``` + +## save + +Syntax: **docker save \[**_options_**\] **_image _**\[**_image..._**\]** + +Function: Saves an image to a TAR package. The output is **STDOUT** by default. + +Parameter description: + +**-o** and **--output=""**: Save an image to a file rather than STDOUT. + +Example: + +```shell +$ sudo docker save -o nginx.tar nginx:latest +$ ls +nginx.tar +``` + +## search + +Syntax: **docker search**_options_ _TERM_ + +Function: Searches for a specific image in the image registry. + +Parameter description: + +**--automated=false**: Displays the automatically built image. + +**--no-trunc=false**: Does not delete any output. + +**-s** and **--stars=0**: Display only images of a specified star level or higher. + +Example: + +1. Run the following command to search for Nginx in the official image library: + + ```shell + $ sudo docker search nginx + NAME DESCRIPTION STARS OFFICIAL AUTOMATED + nginx Official build of Nginx. 11873 [OK] + jwilder/nginx-proxy Automated Nginx reverse proxy for docker con... 1645 [OK] + richarvey/nginx-php-fpm Container running Nginx + PHP-FPM capable of... 739 [OK] + linuxserver/nginx An Nginx container, brought to you by LinuxS... 74 + bitnami/nginx Bitnami nginx Docker Image 70 [OK] + tiangolo/nginx-rtmp Docker image with Nginx using the nginx-rtmp... 51 [OK] + ``` + +2. Run the following command to search for busybox in the private image library. The address of the private image library must be added during the search. + + ```shell + sudo docker search 192.168.1.110:5000/busybox + ``` + +## tag + +Syntax: **docker tag \[**_options_**\] **_image_**\[**_:tag_**\] \[**_registry host/_**\]\[**_username/_**\]**_name_**\[**_:tag_**\]** + +Function: Tags an image to a registry. + +Parameter description: + +**-f** or **--force=false**: Forcibly replaces the original image when the same tag name exists. + +Example: + +```shell +sudo docker tag busybox:latest busybox:test +``` diff --git a/docs/en/docs/container_engine/docker_engine/installation_and_configuration_3.md b/docs/en/docs/container_engine/docker_engine/installation_and_configuration_3.md new file mode 100644 index 0000000000000000000000000000000000000000..cffa5a6a675ef612fa16ce41bf4053ae3c24fbbf --- /dev/null +++ b/docs/en/docs/container_engine/docker_engine/installation_and_configuration_3.md @@ -0,0 +1,404 @@ +# Installation and Configuration + +This chapter describes important configurations related to the installation of the open source container Docker. + +## Precautions + +- The root permission is required for installing a Docker container. +- The **docker-engine** RPM package cannot be installed together with the **containerd**, **runc**, or **podman** RPM package. This is because the **docker-engine** RPM package contains all components required for Docker running, including **containerd**, **runc**, and **docker** binary files. Yet the **containerd**, **runc**, and **podman** RPM packages also contain the corresponding binary files. Software package conflicts may occur due to repeated installation. + +## Basic Installation Configuration + +### Daemon Parameter Configuration + +You can add configuration items to the **/etc/docker/daemon.json** file to customize parameters. You can run the **dockerd --help** command to view related configuration items and their usage methods. A configuration example is as follows: + +```shell +$ cat /etc/docker/daemon.json +{ + "debug": true, + "storage-driver": "overlay2", + "storage-opts": ["overlay2.override_kernel_check=true"] +} +``` + +### Daemon Running Directory Configuration + +Re-configuring various running directories and files \(including **--graph** and **--exec-root**\) may cause directory conflicts or file attribute changes, affecting the normal use of applications. + +>[!TIP]NOTICE +>Therefore, the specified directories or files should be used only by Docker to avoid file attribute changes and security issues caused by conflicts. + +- Take **--graph** as an example. When **/new/path/** is used as the new root directory of the daemon, if a file exists in **/new/path/** and the directory or file name conflicts with that required by Docker \(for example, **containers**, **hooks**, and **tmp**\), Docker may update the original directory or file attributes, including the owner and permission. + +>[!TIP]NOTICE +>From Docker 17.05, the **--graph** parameter is marked as **Deprecated** and replaced with the **--data-root** parameter. + +### Daemon Network Configuration + +- After the network segment of the docker0 bridge is specified by using the **--bip** parameter on Docker daemon, if the **--bip** parameter is deleted during the next Docker daemon restart, the docker0 bridge uses the previous value of **--bip**, even if the docker0 bridge is deleted before the restart. The reason is that Docker saves the network configuration and restores the previous configuration by default during the next restart. +- When running the **docker network create** command to concurrently create networks, you can create two networks with the same name. The reason is that Docker networks are distinguished by IDs. The name is only an alias that is easy to identify and may not be unique. +- In the Docker bridge network mode, a Docker container establishes external communication through NAT on the host. When Docker daemon starts a Docker container, a docker-proxy process is started for each port mapped on the host to access the proxy. It is recommended that you map only the necessary ports when using userland-proxy to reduce the resources consumed by the port mapping of docker-proxy. + +### Daemon umask Configuration + +The default **umask** value of the main container process and exec process is **0022**. To meet security specifications and prevent containers from being attacked, the default value of **umask** is changed to **0027** after runC implementation is modified. After the modification, the other groups cannot access new files or directories. + +The default value of **umask** is **0027** when Docker starts a container. You can change the value to **0022** by running the **--exec-opt native.umask=normal** command during container startup. + +>[!TIP]NOTICE +>If **native.umask** is configured in **docker create** or **docker run** command, its value is used. + +For details, see the parameter description in [docker create](./container_management_2.md#create) and [docker run](./container_management_2.md#run). + +### Daemon Start Time + +The Docker service is managed by systemd, which restricts the startup time of each service. If the Docker service fails to be started within the specified time, the possible causes are as follows: + +- If Docker daemon is started for the first time using devicemapper, the Docker daemon needs to perform the initialization operation on the device. This operation, however, will perform a large number of disk I/O operations. When the disk performance is poor or many I/O conflicts exist, the Docker daemon startup may time out. devicemapper needs to be initialized only once and does not need to be initialized again during later Docker daemon startup. +- If the usage of the current system resources is too high, the system responses slowly, all operations in the system slow down, and the startup of the Docker service may time out. +- During the restart, a daemon traverses and reads configuration files and the init layer and writable layer configurations of each container in the Docker working directory. If there are too many containers \(including the created and exited containers\) in the current system and the disk read and write performance is limited, the startup of the Docker service may time out due to the long-time daemon traversing. + +If the service startup times out, you are advised to rectify the fault as follows: + +- Ensure that the container orchestration layer periodically deletes unnecessary containers, especially the exited containers. +- Based on performance requirements of the solution, adjust the cleanup period of the orchestration layer and the start time of the Docker service. + +### Journald Component + +After systemd-journald is restarted, Docker daemon needs to be restarted. Journald obtains the Docker daemon logs through a pipe. If the journald service is restarted, the pipe is disabled. The write operation of Docker logs triggers the SIGPIPE signal, which causes the Docker daemon crash. If this signal is ignored, the subsequent Docker daemon logs may fail to be recorded. Therefore, you are advised to restart Docker daemon after the journald service is restarted or becomes abnormal, ensuring that Docker logs can be properly recorded and preventing status exceptions caused by daemon crash. + +### Firewalld Component + +You need to restart the Docker service after restarting or starting firewalld. + +- When the firewalld service is started, the iptables rules of the current system are cleared. Therefore, if the firewalld service is restarted during Docker daemon startup, the Docker service may fail to insert iptables rules, causing the Docker service startup failure. +- If the firewalld service is restarted after the Docker service is started, or the status of the firewalld service \(service paused or resumed\) is changed, the iptables rules of the Docker service are deleted. As a result, the container with port mapping fails to be created. + +### Iptables Component + +If the **--icc=false** option is added in Docker, the communication between containers can be restricted. However, if the OS has some rules, the communication between containers may not be restricted. For example: + +```text +Chain FORWARD (policy ACCEPT 0 packets, 0 bytes) +... +0 0 ACCEPT icmp -- * * 0.0.0.0/0 0.0.0.0/0 +... +0 0 DROP all -- docker0 docker0 0.0.0.0/0 0.0.0.0/0 +... +``` + +In the **Chain FORWARD** command, the ACCEPT icmp rule is added to DROP. As a result, after the **--icc=false** option is added, containers can be pinged, but the peer end is unreachable if UDP or TCP is used. + +Therefore, if you want to add the **--icc=false** option when using Docker in a container OS, you are advised to clear iptables rules on the host first. + +### Audit Component + +You can configure audit for Docker. However, this configuration is not mandatory. For example: + +```text +-w /var/lib/docker -k docker +-w /etc/docker -k docker +-w /usr/lib/systemd/system/docker.service -k docker +-w /usr/lib/systemd/system/docker.socket -k docker +-w /etc/sysconfig/docker -k docker +-w /usr/bin/docker-containerd -k docker +-w /usr/bin/docker-runc -k docker +-w /etc/docker/daemon.json -k docker +``` + +Configuring audit for Docker brings certain benefits for auditing, while it does not have any substantial effects on attack defense. In addition, the audit configurations cause serious efficiency problems, for example, the system may not respond smoothly. Therefore, exercise caution in the production environment. + +The following uses **-w /var/lib/docker -k docker** as an example to describe how to configure Docker audit. + +```shell +cat /etc/audit/rules.d/audit.rules | grep docker -w /var/lib/docker/ -k docker +auditctl -R /etc/audit/rules.d/audit.rules | grep docker +auditctl -l | grep docker -w /var/lib/docker/ -p rwxa -k docker +``` + +>[!NOTE]NOTE +>**-p \[r|w|x|a\]** and **-w** are used together to monitor the read, write, execution, and attribute changes \(such as timestamp changes\) of the directory. In this case, any file or directory operation in the **/var/lib/docker** directory will be recorded in the **audit.log** file. As a result, too many logs will be recorded in the **audit.log** file, which severely affects the memory or CPU usage of the auditd, and further affects the OS. For example, logs similar to the following will be recorded in the **/var/log/audit/audit.log** file each time the **ls /var/lib/docker/containers** command is executed: + +```text +type=SYSCALL msg=audit(1517656451.457:8097): arch=c000003e syscall=257 success=yes exit=3 a0=ffffffffffffff9c a1=1b955b0 a2=90800 a3=0 items=1 ppid=17821 pid=1925 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts6 ses=4 comm="ls" exe="/usr/bin/ls" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key="docker"type=CWD msg=audit(1517656451.457:8097): cwd="/root"type=PATH msg=audit(1517656451.457:8097): item=0 name="/var/lib/docker/containers" inode=1049112 dev=fd:00 mode=040700 ouid=0 ogid=0 rdev=00:00 obj=unconfined_u:object_r:container_var_lib_t:s0 objtype=NORMAL +``` + +### Security Configuration seccomp + +During the container network performance test, it is found that the performance of Docker is lower than that of the native kernel namespace. After seccomp is enabled, system calls \(such as sendto\) are not performed through system\_call\_fastpath. Instead, tracesys is called, which greatly deteriorates the performance. Therefore, you are advised to disable seccomp in container scenarios where services require high performance. For example: + +```shell +docker run -itd --security-opt seccomp=unconfined busybox:latest +``` + +### Do Not Modify Private Directory of Docker Daemon + +Do not modify the root directory used by Docker \(**/var/lib/docker** by default\), the directory during operation \(**/run/docker** by default\), or the files or directories in the two directories. The forbidden operations include deleting files, adding files, creating soft or hard links for the directories or files, or modifying attributes, permissions, or contents of the files. If any modification is required, contact the Euler container team for review. + +### Precautions for Common Users in the Scenario Where a Large Number of Containers Are Deployed + +The maximum number of processes that a common user can create on an OS host can be restricted by creating the **/etc/security/limits.d/20-nproc.conf** file in the system. Similarly, the maximum number of processes that a common user can create in a container is determined by the value in the **/etc/security/limits.d/20-nproc.conf** file in the container image, as shown in the following example: + +```shell +$ cat /etc/security/limits.conf +* soft nproc 4096 +``` + +If an error is reported due to insufficient resources when a large number of containers are deployed by a common user, increase the value **4096** in the **/etc/security/limits.d/20-nproc.conf** file. + +Configure the maximum value based on the maximum capability of the kernel, as shown in the following example: + +```shell +$ sysctl -a | grep pid_max +kernel.pid_max = 32768 +``` + +## Storage Driver Configuration + +This Docker version supports two storage drivers: overlay2 and devicemapper. Since overlay2 has better performance than devicemapper, it is recommended that overlay2 be preferentially used in the production environment. + +### overlay2 Storage Driver Configuration + +#### Configuration Methods + +overlay2 is the default storage driver of Docker. You can also use either of the following methods to explicitly configure the driver: + +- Edit the **/etc/docker/daemon.json** file to explicitly configure the **storage-driver** field. + + ```shell + $ cat /etc/docker/daemon.json + { + "storage-driver": "overlay2" + } + ``` + +- Edit the **/etc/sysconfig/docker-storage** file and explicitly configure the Docker daemon startup parameters. + + ```shell + $ cat /etc/sysconfig/docker-storage + DOCKER_STORAGE_OPTIONS="--storage-driver=overlay2" + ``` + +#### Precautions + +- When you perform lifecycle management operations on some containers, an error may be reported, indicating that the corresponding rootfs or executable file cannot be found. +- If the health check of a container is configured to execute executable files in the container, an error may be reported, which causes the health check failure of the container. + +- When you use overlay2 as the graphdriver and modify an image file in a container for the first time, the modification fails if the file size is greater than the remaining space of the system. Even if a little modification on the file is involved, the whole file must be copied to the upper layer. If the remaining space is insufficient, the modification fails. +- Compared with common file systems, the overlay2 file system has the following behavior differences: + - Kernel version + + overlay2 is compatible only with the native kernel 4.0 or later. You are advised to use the Ext4 file system. + + - Copy-UP performance + + Modifying files at the lower layer triggers file replication to the upper layer. Data block replication and fsync are time-consuming. + + - Rename directories + - The rename system call is allowed only when both the source and the destination paths are at the merged layer. Otherwise, the EXDEV error is reported. + - Kernel 4.10 introduces the redirect directory feature to fix this issue. The corresponding kernel option is **CONFIG\_OVERLAY\_FS\_REDIRECT\_DIR**. + + When overlay2 is used, a file system directory fails to be renamed because the related feature configured in the **/sys/module/overlay/parameters/redirect\_dir** file has been disabled. To use this feature, you need to manually set **/sys/module/overlay/parameters/redirect\_dir** to **Y**. + + - Hard link disconnection + - If there are multiple hard links in the lower-layer directory, writing data to the merged layer will trigger Copy-UP, resulting in hard link disconnection. + - The index feature is introduced in kernel 4.13 to fix this issue. The corresponding kernel option is **CONFIG\_OVERLAY\_FS\_INDEX**. Note that this option is not forward compatible and does not support hot upgrade. + + - Changes of **st\_dev** and **st\_ino** + + After Copy-UP is triggered, you can view only new files at the merged layer, and inodes change. Although **attr** and **xattr** can be replicated, **st\_dev** and **st\_ino** are unique and cannot be replicated. As a result, you can run **stat** and **ls** commands to check inode changes accordingly. + + - fd change + + Before Copy-UP is triggered, you can obtain the descriptor fd1 when opening a file in read-only mode. After Copy-UP is trigger, you can obtain the descriptor fd2 when opening the file with the same name. The two descriptors point to different files. The data written to fd2 is not displayed in fd1. + +#### Abnormal Scenarios + +When a container uses the overlay2 storage driver, mount points may be overwritten. + +#### Abnormal Scenario: Mount Point Being Overwritten + +In the faulty container, there is a mount point in **/var/lib/docker/overlay2**. + +```shell +$ mount -l | grep overlay +overlay on /var/lib/docker/overlay2/844fd3bca8e616572935808061f009d106a8748dfd29a0a4025645457fa21785/merged type overlay (rw,relatime,seclabel,lowerdir=/var/lib/docker/overlay2/l/JL5PZQLNDCIBU3ZOG3LPPDBHIJ:/var/lib/docker/overlay2/l/ELRPYU4JJG4FDPRLZJCZZE4UO6,upperdir=/var/lib/docker/overlay2/844fd3bca8e616572935808061f009d106a8748dfd29a0a4025645457fa21785/diff,workdir=/var/lib/docker/overlay2/844fd3bca8e616572935808061f009d106a8748dfd29a0a4025645457fa21785/work) +/dev/mapper/dm-root on /var/lib/docker/overlay2 type ext4 (rw,relatime,seclabel,data=ordered) +``` + +An error as follows may occur when some Docker commands are executed: + +```shell +$ docker rm 1348136d32 +docker rm: Error response from daemon: driver "overlay2" failed to remove root filesystem for 1348136d32: error while removing /var/lib/docker/overlay2/844fd3bca8e616572935808061f009d106a8748dfd29a0a4025645457fa21785: invalid argument +``` + +You will find that the rootfs of the corresponding container cannot be found on the host. However, this does not mean that the rootfs is lost. The rootfs is overwritten by the mount point in **/var/lib/docker/overlay2**, and services are still running properly. The solutions are as follows: + +- Solution 1 + 1. Run the following command to check the graphdriver used by Docker: + + ```shell + docker info | grep "Storage Driver" + ``` + + 2. Run the following commands to query the current mount point: + + ```shell + # Devicemapper + mount -l | grep devicemapper + # Overlay2 + mount -l | grep overlay2 + ``` + + The output format is _A_ on _B_ type _C_ \(_D_\). + - _A_: block device name or **overlay** + - _B_: mount point + - _C_: file system type + - _D_: mounting attribute + + 3. Run the **umount** command on the mount points \(_B_\) one by one from bottom to top. + 4. Run the **docker restart** command on all the containers or delete all the containers. + 5. Run the following command to restart Docker: + + ```shell + systemctl restart docker + ``` + +- Solution 2 + 1. Migrate services. + 2. Restart nodes. + +### devicemapper Storage Driver Configuration + +If you need to set the storage driver of Docker to devicemapper, you can also use either of the following methods to explicitly configure the driver: + +- Edit the **/etc/docker/daemon.json** file to explicitly configure the **storage-driver** field. + + ```shell + $ cat /etc/docker/daemon.json + { + "storage-driver": "devicemapper" + } + ``` + +- Edit the **/etc/sysconfig/docker-storage** file and explicitly configure the Docker daemon startup parameters. + + ```shell + $ cat /etc/sysconfig/docker-storage + DOCKER_STORAGE_OPTIONS="--storage-driver=devicemapper" + ``` + +#### Precautions + +- To use devicemapper, you must use the direct-lvm mode. For details about the configuration method, refer to . +- When configuring devicemapper, if the system does not have sufficient space for automatic capacity expansion of thinpool, disable the automatic capacity expansion function. +- Do not set both the following two parameters in the **/etc/lvm/profile/docker-thinpool.profile** file to **100**: + + ```text + activation { + thin_pool_autoextend_threshold=80 + thin_pool_autoextend_percent=20 + } + ``` + +- You are advised to add **--storage-opt dm.use\_deferred\_deletion=true** and **--storage-opt dm.use\_deferred\_removal=true** when using devicemapper. +- When devicemapper is used, you are advised to use Ext4 as the container file system. You need to add **--storage-opt dm.fs=ext4** to the configuration parameters of Docker daemon. +- If graphdriver is devicemapper and the metadata files are damaged and cannot be restored, you need to manually restore the metadata files. Do not directly operate or tamper with metadata of the devicemapper storage driver in Docker daemon. +- When the devicemapper LVM is used, if the devicemapper thinpool is damaged due to abnormal power-off, you cannot ensure the data integrity or whether the damaged thinpool can be restored. Therefore, you need to rebuild the thinpool. + +**Precautions for Switching the devicemapper Storage Pool When the User Namespace Feature Is Enabled on Docker Daemon** + +- Generally, the path of the deviceset-metadata file is **/var/lib/docker/devicemapper/metadata/deviceset-metadata** during container startup. +- If user namespaces are used, the path of the deviceset-metadata file is **/var/lib/docker/**_userNSUID.GID_**/devicemapper/metadata/deviceset-metadata**. +- When you use the devicemapper storage driver and the container is switched between the user namespace scenario and common scenario, the **BaseDeviceUUID** content in the corresponding deviceset-metadata file needs to be cleared. In the thinpool capacity expansion or rebuild scenario, you also need to clear the **BaseDeviceUUID** content in the deviceset-metadata file. Otherwise, the Docker service fails to be restarted. + +## Impact of Forcibly Killing Docker Background Processes + +### Semaphores May Be Residual + +When the devicemapper is used as the graphdriver, forcible killing may cause residual semaphores. Docker creates semaphores when performing operations on devicemapper. If daemon is forcibly killed before the semaphores are released, the release may fail. A maximum of one semaphore can be leaked at a time, and the leakage probability is low. However, the Linux OS has an upper limit on semaphores. When the number of semaphore leakage times reaches the upper limit, new semaphores cannot be created. As a result, Docker daemon fails to be started. The troubleshooting method is as follows: + +1. Check the residual semaphores in the system. + + ```shell + $ ipcs + ------ Message Queues -------- + key msqid owner perms used-bytes messages + ------ Shared Memory Segments -------- + key shmid owner perms bytes nattch status + ------ Semaphore Arrays -------- + key semid owner perms nsems + 0x0d4d3358 238977024 root 600 1 + 0x0d4d0ec9 270172161 root 600 1 + 0x0d4dc02e 281640962 root 600 1 + ``` + +2. Run the **dmsetup** command to check semaphores created by devicemapper. The semaphore set is the subset of the system semaphores queried in the previous step. + + ```shell + dmsetup udevcookies + ``` + +3. Check the upper limit of kernel semaphores. The fourth value is the upper limit of the current system semaphores. + + ```shell + $ cat /proc/sys/kernel/sem + 250 32000 32 128 + ``` + + If the number of residual semaphores in step 1 is the same as the upper limit of semaphores in step 3, the number of residual semaphores reaches the upper limit. In this case, Docker daemon cannot be normally started. You can run the following command to increase the upper limit to restart Docker: + + ```shell + echo 250 32000 32 1024 > /proc/sys/kernel/sem + ``` + + You can also run the following command to manually clear the residual devicemapper semaphores. The following describes how to clear the devicemapper semaphores applied one minute ago. + + ```shell + $ dmsetup udevcomplete_all 1 + This operation will destroy all semaphores older than 1 minutes with keys that have a prefix 3405 (0xd4d). + Do you really want to continue? [y/n]: y + 0 semaphores with keys prefixed by 3405 (0xd4d) destroyed. 0 skipped. + ``` + +### NICs May Be Residual + +When a container is started in bridge mode, forcibly killing may cause residual NICs. In bridge network mode, when Docker creates a container, a pair of veths are created on the host, and then the NIC information is saved to the database. If daemon is forcibly killed before the NIC information is saved to the database of Docker, the NIC cannot be associated with Docker and cannot be deleted during the next startup because Docker deletes unused NICs from its database. + +### Failed to Restart a Container + +If container hook takes a long time, and containerd is forcibly killed during container startup, the container start operation may fail. When containerd is forcibly killed during container startup, an error is returned for the Docker start operation. After containerd is restarted, the last startup may still be in the **runc create** execution phase \(executing the user-defined hook may take a long time\). If you run the **docker start** command again to start the container, the following error message may be displayed: + +```text +Error response from daemon: oci runtime error: container with id exists: xxxxxx +``` + +This error is caused by running **runc create** on an existing container \(or being created\). After the **runc create** operation corresponding to the first start operation is complete, the **docker start** command can be successfully executed. + +The execution of hook is not controlled by Docker. In this case, if the container is recycled, the containerd process may be suspended when an unknown hook program is executed. In addition, the risk is controllable \(although the creation of the current container is affected in a short period\). + +- After the first operation is complete, the container can be successfully started again. +- Generally, a new container is created after the container fails to be started. The container that fails to be started cannot be reused. + +In conclusion, this problem has a constraint on scenarios. + +### Failed to Restart the Docker Service + +The Docker service cannot be restarted properly due to frequent startup in a short period The Docker system service is monitored by systemd. If the Docker service is restarted for more than five times within 10s, the systemd service detects the abnormal startup. Therefore, the Docker service is disabled. Docker can respond to the restart command and be normally restarted only when the next period of 10s starts. + +## Impact of System Power-off + +When a system is unexpectedly powered off or system panic occurs, Docker daemon status may not be updated to the disk in time. As a result, Docker daemon is abnormal after the system is restarted. The possible problems include but are not limited to the following: + +- A container is created before the power-off. After the restart, the container is not displayed when the **docker ps -a** command is run, as the file status of the container is not updated to the disk. As a result, daemon cannot obtain the container status after the restart. +- Before the system power-off, a file is being written. After daemon is restarted, the file format is incorrect or the file content is incomplete. As a result, loading fails. +- As Docker database \(DB\) will be damaged during power-off, all DB files in **data-root** will be deleted during node restart. Therefore, the following information created before the restart will be deleted after the restart: + - Network: Resources created through Docker network will be deleted after the node is restarted. + - Volume: Resources created through Docker volume will be deleted after the node is restarted. + - Cache construction: The cache construction information will be deleted after the node is restarted. + - Metadata stored in containerd: Metadata stored in containerd will be recreated when a container is started. Therefore, the metadata stored in containerd will be deleted when the node is restarted. + + > [!NOTE]NOTE + > If you want to manually clear data and restore the environment, you can set the environment variable **DISABLE\_CRASH\_FILES\_DELETE** to **true** to disable the function of clearing DB files when the daemon process is restarted due to power-off. diff --git a/docs/en/docs/container_engine/docker_engine/overview.md b/docs/en/docs/container_engine/docker_engine/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..ac4364ddcb2c3eece46714de173bc0d271ea8717 --- /dev/null +++ b/docs/en/docs/container_engine/docker_engine/overview.md @@ -0,0 +1,7 @@ +# Docker Container + +Docker is an open-source Linux container engine that enables quick application packaging, deployment, and delivery. The original meaning of Docker is dork worker, whose job is to pack the goods to the containers, and move containers, and load containers. Similarly, the job of Docker in Linux is to pack applications to containers, and deploy and run applications on various platforms using containers. Docker uses Linux Container technology to turn applications into standardized, portable, and self-managed components, enabling the "build once" and "run everywhere" features of applications. Features of Docker technology include: quick application release, easy application deployment and management, and high application density. + +> [!NOTE]**Note:** +> +> Root privileges are necessary for installing and operating Docker containers. diff --git a/docs/en/docs/container_engine/docker_engine/statistics.md b/docs/en/docs/container_engine/docker_engine/statistics.md new file mode 100644 index 0000000000000000000000000000000000000000..3cb0f2738ed28bbf253d6629af0b6c50c60b0f00 --- /dev/null +++ b/docs/en/docs/container_engine/docker_engine/statistics.md @@ -0,0 +1,97 @@ +# Statistics + +## events + +Syntax: **docker events \[**_options_**\]** + +Function: Obtains real-time events from the docker daemon. + +Parameter description: + +**--since=""**: Displays events generated after the specified timestamp. + +**--until=""**: Displays events generated before the specified timestamp. + +Example: + +After the **docker events** command is executed, a container is created and started by running the **docker run** command. create and start events are output. + +```shell +$ sudo docker events +2019-08-28T16:23:09.338838795+08:00 container create 53450588a20800d8231aa1dc4439a734e16955387efb5f259c47737dba9e2b5e (image=busybox:latest, name=eager_wu) +2019-08-28T16:23:09.339909205+08:00 container attach 53450588a20800d8231aa1dc4439a734e16955387efb5f259c47737dba9e2b5e (image=busybox:latest, name=eager_wu) +2019-08-28T16:23:09.397717518+08:00 network connect e2e20f52662f1ee2b01545da3b02e5ec7ff9c85adf688dce89a9eb73661dedaa (container=53450588a20800d8231aa1dc4439a734e16955387efb5f259c47737dba9e2b5e, name=bridge, type=bridge) +2019-08-28T16:23:09.922224724+08:00 container start 53450588a20800d8231aa1dc4439a734e16955387efb5f259c47737dba9e2b5e (image=busybox:latest, name=eager_wu) +2019-08-28T16:23:09.924121158+08:00 container resize 53450588a20800d8231aa1dc4439a734e16955387efb5f259c47737dba9e2b5e (height=48, image=busybox:latest, name=eager_wu, width=210) +``` + +## info + +Syntax: **docker info** + +Function: Displays the Docker system information, including the number of containers, number of images, image storage driver, container execution driver, kernel version, and host OS version. + +Parameter description: none. + +Example: + +```shell +$ sudo docker info +Containers: 4 + Running: 3 + Paused: 0 + Stopped: 1 +Images: 45 +Server Version: 18.09.0 +Storage Driver: devicemapper + Pool Name: docker-thinpool + Pool Blocksize: 524.3kB + Base Device Size: 10.74GB + Backing Filesystem: ext4 + Udev Sync Supported: true + Data Space Used: 11GB + Data Space Total: 51GB + Data Space Available: 39.99GB + Metadata Space Used: 5.083MB + Metadata Space Total: 532.7MB + Metadata Space Available: 527.6MB + Thin Pool Minimum Free Space: 5.1GB + Deferred Removal Enabled: true + Deferred Deletion Enabled: true + Deferred Deleted Device Count: 0 +...... +``` + +## version + +Syntax: **docker version** + +Function: Displays the Docker version information, including the client version, server version, Go version, and OS and Arch information. + +Parameter description: none. + +Example: + +```shell +$ sudo docker version +Client: + Version: 18.09.0 + EulerVersion: 18.09.0.48 + API version: 1.39 + Go version: go1.11 + Git commit: cbf6283 + Built: Mon Apr 1 00:00:00 2019 + OS/Arch: linux/arm64 + Experimental: false + +Server: + Engine: + Version: 18.09.0 + EulerVersion: 18.09.0.48 + API version: 1.39 (minimum version 1.12) + Go version: go1.11 + Git commit: cbf6283 + Built: Mon Apr 1 00:00:00 2019 + OS/Arch: linux/arm64 + Experimental: false +``` diff --git a/docs/en/docs/container_engine/isula_container_engine/_toc.yaml b/docs/en/docs/container_engine/isula_container_engine/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f113ef37c1b3ea0478e0b416ce0a25b49cfc552 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/_toc.yaml @@ -0,0 +1,50 @@ +label: iSulad Container Engine +isManual: true +description: iSula is a C/C++-based container engine known for its lightweight design, flexibility, simplicity, and high performance. +sections: +- label: Overview + href: ./overview.md +- label: Installation, Upgrade, and Uninstallation + href: ./installation_upgrade_uninstallation.md + sections: + - label: Installation and Configuration + href: ./installation_configuration.md + - label: Upgrade + href: ./upgrade_methods.md + - label: Uninstallation + href: ./uninstallation.md +- label: User Guide + href: ./application_scenarios.md + sections: + - label: Container Management + href: ./container_management.md + - label: Interconnection with the CNI Network + href: ./interconnection_with_the_cni_network.md + - label: Container Resource Management + href: ./container_resource_management.md + - label: Privileged Container + href: ./privileged_container.md + - label: CRI API v1alpha2 + href: ./cri.md + - label: CRI API v1 + href: ./cri_2.md + - label: Image Management + href: ./image_management.md + - label: Checking the Container Health Status + href: ./checking_the_container_health_status.md + - label: Querying Information + href: ./query_information.md + - label: Security Features + href: ./security_features.md + - label: Supporting OCI hooks + href: ./supporting_oci_hooks.md + - label: Local Volume Management + href: ./local_volume_management.md + - label: Interconnecting iSulad shim v2 with StratoVirt + href: ./interconnecting_isula_shim_v2_with_stratovirt.md + - label: iSulad Support for cgroup v2 + href: ./isulad_support_cgroup_v2.md + - label: iSulad Support for CDI + href: ./isulad_support_cdi.md +- label: Appendix + href: ./appendix.md diff --git a/docs/en/docs/container_engine/isula_container_engine/appendix.md b/docs/en/docs/container_engine/isula_container_engine/appendix.md new file mode 100644 index 0000000000000000000000000000000000000000..1ebde2b822d492df4e7d614cd5d651ee2817ff0b --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/appendix.md @@ -0,0 +1,709 @@ +# Appendix + +## Command Line Parameters + +**Table 1** login command parameters + + + + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

login

+

  

+

  

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-p, --password

+

Specifies the password for logging in to the registry.

+

--password-stdin

+

Specifies the password for obtaining the registry from standard input.

+

-u, --username

+

Specifies the username for logging in to the registry.

+
+ +**Table 2** logout command parameters + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

logout

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+
+ +**Table 3** pull command parameters + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

pull

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+
+ +**Table 4** rmi command parameters + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

rmi

+

  

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-f, --force

+

Forcibly removes an image.

+
+ +**Table 5** load command parameters + + + + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

load

+

-H, --host (supported only by iSula)

+

Specifies the iSulad socket file path to be accessed.

+

-i, --input

+

Specifies where to import an image. If the image is of the docker type, the value is the image package path. If the image is of the embedded type, the value is the image manifest path.

+

--tag

+

Uses the image name specified by TAG instead of the default image name. This parameter is supported when the type is set to docker.

+

-t, --type

+

Specifies the image type. The value can be embedded or docker (default value).

+
+ +**Table 6** images command parameters + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

images

+

  

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-q, --quit

+

Displays only the image name.

+
+ +**Table 7** inspect command parameters + + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

inspect

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-f, --format

+

Outputs using a template.

+

-t, --time

+

Timeout interval, in seconds. If the inspect command fails to query container information within the specified period, the system stops waiting and reports an error immediately. The default value is 120s. If the value is less than or equal to 0, the inspect command keeps waiting until the container information is obtained successfully.

+
+ +## CNI Parameters + +**Table 1** CNI single network parameters + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Type

+

Mandatory or Not

+

Description

+

cniVersion

+

string

+

Yes

+

CNI version. Only 0.3.0 and 0.3.1 are supported.

+

name

+

string

+

Yes

+

Network name, which is user-defined and must be unique.

+

type

+

string

+

Yes

+

Network type. The following types are supported:

+

underlay_ipvlan

+

overlay_l2

+

underlay_l2

+

vpc-router

+

dpdk-direct

+

phy-direct

+

ipmasp

+

bool

+

No

+

Configures the IP masquerade.

+

ipam

+

structure

+

No

+

For details, see the IPAM parameter definition.

+

ipam.type

+

string

+

No

+

IPAM type. The following types are supported:

+

(1) For underlay_l2, overlay_l2, and vpc-router networking, only the default value distributed_l2 is supported.

+

(2) For underlay_ipvlan networking, the default value is distributed_l2. In the CCN scenario, only null and fixed are supported. In the CCE and FST 5G core scenarios, only null and distributed_l2 are supported.

+

(3) For phy-direct and dpdk-direct networking, the default value is l2, and optional values are null and distributed_l2. In the FST 5G core scenario, only null and distributed_l2 are supported.

+

Description:

+

If the value is out of the range (for example, host-local), Canal automatically sets the value to the default value and no error is returned.

+

null: Canal is not used to manage IP addresses.

+

fixed: fixed IP address, which is used in the CCN scenario.

+

l2: This value is not used in any scenario.

+

distributed_l2: The distributed small subnet is used to manage IP addresses.

+

ipam.subnet

+

string

+

No

+

Subnet information. Canal supports the subnet mask ranging from 8 to 29. The IP address cannot be a multicast address (for example, 224.0.0.0/4), reserved address (240.0.0.0/4), local link address (169.254.0.0/16), or local loop address (127.0.0.0/8).

+

ipam.gateway

+

string

+

No

+

Gateway IP address.

+

ipam.range-start

+

string

+

No

+

Available start IP address.

+

ipam.range-end

+

string

+

No

+

Available end IP address.

+

ipam.routes

+

structure

+

No

+

Subnet list. Each element is a route dictionary. For details, see the route definition.

+

ipam.routes.dst

+

string

+

No

+

Destination network.

+

ipam.routes.gw

+

string

+

No

+

Gateway address.

+

dns

+

structure

+

No

+

Contains some special DNS values.

+

dns.nameservers

+

[]string

+

No

+

NameServers

+

dns.domain

+

string

+

No

+

Domain

+

dns.search

+

[]string

+

No

+

Search

+

dns.options

+

[]string

+

No

+

Options

+

multi_entry

+

int

+

No

+

Number of IP addresses required by a vNIC. The value ranges from 0 to 16. For physical passthrough, a maximum of 128 IP addresses can be applied for a single NIC.

+

backup_mode

+

bool

+

No

+

Active/Standby mode, which is used only for phy-direct and dpdk-direct networking.

+

vlanID

+

int

+

No

+

The value ranges from 0 to 4095. It can be specified through PaaS.

+

vlan_inside

+

bool

+

No

+

The value true indicates that the VLAN function is implemented internally on the node, and the value false indicates that the VLAN function is implemented externally.

+

vxlanID

+

int

+

No

+

The value ranges from 0 to 16777215. It can be specified through PaaS.

+

vxlan_inside

+

bool

+

No

+

The value true indicates that the VLAN function is implemented internally on the node, and the value false indicates that the VLAN function is implemented externally.

+

action

+

string

+

No

+

This parameter can be used only with the special container ID 000000000000.

+

Create: creates a network.

+

Delete: deletes a network.

+

args

+

map[string]interface{}

+

No

+

Key-value pair type.

+

runtimeConfig

+

structure

+

No

+

None

+

capabilities

+

structure

+

No

+

None

+
+ +**Table 2** CNI args parameters + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Type

+

Mandatory

+

Description

+

K8S_POD_NAME

+

string

+

No

+

Set this parameter when you apply for a fixed IP address (runtimeConfig.ican_caps.fixed_ip is set to true).

+

K8S_POD_NAMESPACE

+

string

+

No

+

Set this parameter when you apply for a fixed IP address (runtimeConfig.ican_caps.fixed_ip is set to true).

+

SECURE_CONTAINER

+

string

+

No

+

Secure container flag.

+

multi_port

+

int

+

No

+

The value ranges from 1 to 8. The default value is 1. Specifies the number of passthrough NICs. Only phy-direct and dpdk-direct networks are supported.

+

phy-direct

+

string

+

No

+

Specifies the NIC to be connected when you create an SR-IOV container network.

+

dpdk-direct

+

string

+

No

+

Specifies the NIC to be connected when you create a DPDK passthrough container network.

+

tenant_id

+

string

+

No

+

Indicates the tenant ID.

+

Only vpc-router networks are supported.

+

vpc_id

+

string

+

No

+

VPC ID.

+

Only vpc-router networks are supported.

+

secret_name

+

string

+

No

+

Specifies the AK/SK object name on the K8S APIServer.

+

Only vpc-router networks are supported.

+

For details, see the configuration of VPC-Router logical networks.

+

IP

+

string

+

No

+

IP address specified by the user, in the format of 192.168.0.10.

+

K8S_POD_NETWORK_ARGS

+

string

+

No

+

Specifies an IP address, in the format of 192.168.0.10. If both IP and K8S_POD_NETWORK_ARGS in args are not empty, the value of K8S_POD_NETWORK_ARGS prevails.

+

INSTANCE_NAME

+

string

+

No

+

INSTANCE ID.

+

Refer to fixed IP addresses that support containers.

+

dist_gateway_disable

+

bool

+

No

+

The value true indicates that no gateway is created, and the value false indicates that a gateway is created.

+

phynet

+

string or []string

+

No

+

Specifies the name of the physical plane to be added. The physical plane name is predefined and corresponds to that in the SNC system. When two plane names are entered, the active and standby planes are supported. Example: phy_net1 or ["phy_net2","phy_net3"]

+

endpoint_policies

+

struct

+

No

+

"endpoint_policies": [

+

{

+

"Type": "",

+

"ExceptionList": [

+

""

+

],

+

"NeedEncap": true,

+

"DestinationPrefix": ""

+

}

+

]

+

port_map

+

struct

+

No

+

On a NAT network, container ports can be advertised to host ports.

+

"port_map": [

+

{

+

"local_port": number,

+

"host_port": number,

+

"protocol": [string...]

+

}...

+

]

+
+ +**Table 3** CNI multiple network parameters + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Type

+

Mandatory

+

Description

+

cniVersion

+

string

+

Yes

+

CNI version. Only 0.3.0 and 0.3.1 are supported.

+

name

+

string

+

Yes

+

Network name, which is user-defined and must be unique.

+

plugins

+

struct

+

Yes

+

For details, see CNI single network parameters

+
diff --git a/docs/en/docs/container_engine/isula_container_engine/application_scenarios.md b/docs/en/docs/container_engine/isula_container_engine/application_scenarios.md new file mode 100644 index 0000000000000000000000000000000000000000..17d6cbbb8d067c752d63c17b7a0fb3106e1a78b5 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/application_scenarios.md @@ -0,0 +1,6 @@ +# User Guide + +This section describes how to use iSulad. + +> [!NOTE]NOTE +> All iSulad operations require root privileges. diff --git a/docs/en/docs/container_engine/isula_container_engine/checking_the_container_health_status.md b/docs/en/docs/container_engine/isula_container_engine/checking_the_container_health_status.md new file mode 100644 index 0000000000000000000000000000000000000000..842eb9d662889dc579bcbe723f113161b424f833 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/checking_the_container_health_status.md @@ -0,0 +1,62 @@ +# Checking the Container Health Status + +## Scenarios + +In the production environment, bugs are inevitable in applications provided by developers or services provided by platforms. Therefore, a management system is indispensable for periodically checking and repairing applications. The container health check mechanism adds a user-defined health check function for containers. When a container is created, the **--health-cmd** option is configured so that commands are periodically executed in the container to monitor the health status of the container based on return values. + +## Configuration Methods + +Configurations during container startup: + +```shell +isula run -itd --health-cmd "echo iSulad >> /tmp/health_check_file || exit 1" --health-interval 5m --health-timeout 3s --health-exit-on-unhealthy busybox bash +``` + +The configurable options are as follows: + +- **--health-cmd**: This option is mandatory. If **0** is returned after a command is run in a container, the command execution succeeds. If a value other than **0** is returned, the command execution fails. +- **--health-interval**: interval between two consecutive command executions. The default value is **30s**. The value ranges from **1s** to the maximum value of Int64 \(unit: nanosecond\). If the input parameter is set to **0s**, the default value is used. +- **--health-timeout**: maximum duration for executing a single check command. If the execution times out, the command execution fails. The default value is **30s**. The value ranges from **1s** to the maximum value of Int64 \(unit: nanosecond\). If the input parameter is set to **0s**, the default value is used. Only containers whose runtime is of the LCR type are supported. +- **--health-start-period**: container initialization time. The default value is **0s**. The value ranges from **1s** to the maximum value of Int64 \(unit: nanosecond\). +- **--health-retries**: maximum number of retries for the health check. The default value is **3**. The maximum value is the maximum value of Int32. +- **--health-exit-on-unhealthy**: specifies whether to kill a container when it is unhealthy. The default value is **false**. + +## Check Rules + +1. After a container is started, the container status is **health:starting**. +2. After the period specified by **start-period**, the **cmd** command is periodically executed in the container at the interval specified by **interval**. That is, after the command is executed, the command will be executed again after the specified period. +3. If the **cmd** command is successfully executed within the time specified by **timeout** and the return value is **0**, the check is successful. Otherwise, the check fails. If the check is successful, the container status changes to **health:healthy**. +4. If the **cmd** command fails to be executed for the number of times specified by **retries**, the container status changes to **health:unhealthy**, and the container continues the health check. +5. When the container status is **health:unhealthy**, the container status changes to **health:healthy** if a check succeeds. +6. If **--exit-on-unhealthy** is set, and the container exits due to reasons other than being killed \(the returned exit code is **137**\), the health check takes effect only after the container is restarted. +7. When the **cmd** command execution is complete or times out, Docker daemon will record the start time, return value, and standard output of the check to the configuration file of the container. A maximum of five records can be recorded. In addition, the configuration file of the container stores health check parameters. +8. When the container is running, the health check status is written into the container configurations. You can run the **isula inspect** command to view the status. + +```json +"Health": { + "Status": "healthy", + "FailingStreak": 0, + "Log": [ + { + "Start": "2018-03-07T07:44:15.481414707-05:00", + "End": "2018-03-07T07:44:15.556908311-05:00", + "ExitCode": 0, + "Output": "" + }, + { + "Start": "2018-03-07T07:44:18.557297462-05:00", + "End": "2018-03-07T07:44:18.63035891-05:00", + "ExitCode": 0, + "Output": "" + }, + ...... +} +``` + +## Usage Restrictions + +- A maximum of five health check status records can be stored in a container. The last five records are saved. +- If health check parameters are set to **0** during container startup, the default values are used. +- After a container with configured health check parameters is started, if iSulad daemon exits, the health check is not executed. After iSulad daemon is restarted, the health status of the running container changes to **starting**. Afterwards, the check rules are the same as above. +- If the health check fails for the first time, the health check status will not change from **starting** to **unhealthy** until the specified number of retries \(**--health-retries**\) is reached, or to **healthy** until the health check succeeds. +- The health check function of containers whose runtime is of the Open Container Initiative \(OCI\) type needs to be improved. Only containers whose runtime is of the LCR type are supported. diff --git a/docs/en/docs/container_engine/isula_container_engine/container_management.md b/docs/en/docs/container_engine/isula_container_engine/container_management.md new file mode 100644 index 0000000000000000000000000000000000000000..a81b15a8498eb93cff285731437cc7125cbb4f28 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/container_management.md @@ -0,0 +1,1956 @@ +# Container Management + +- [Container Management](#container-management) + - [Creating a Container](#creating-a-container) + - [Starting a Container](#starting-a-container) + - [Running a Container](#running-a-container) + - [Stopping a Container](#stopping-a-container) + - [Forcibly Stopping a Container](#forcibly-stopping-a-container) + - [Removing a Container](#removing-a-container) + - [Attaching to a Container](#attaching-to-a-container) + - [Renaming a Container](#renaming-a-container) + - [Executing a Command in a Running Container](#executing-a-command-in-a-running-container) + - [Querying Information About a Single Container](#querying-information-about-a-single-container) + - [Querying Information About All Containers](#querying-information-about-all-containers) + - [Restarting a Container](#restarting-a-container) + - [Waiting for a Container to Exit](#waiting-for-a-container-to-exit) + - [Viewing Process Information in a Container](#viewing-process-information-in-a-container) + - [Displaying Resource Usage Statistics of a Container](#displaying-resource-usage-statistics-of-a-container) + - [Obtaining Container Logs](#obtaining-container-logs) + - [Copying Data Between a Container and a Host](#copying-data-between-a-container-and-a-host) + - [Pausing a Container](#pausing-a-container) + - [Resuming a Container](#resuming-a-container) + - [Obtaining Event Messages from the Server in Real Time](#obtaining-event-messages-from-the-server-in-real-time) + +## Creating a Container + +### Description + +To create a container, run the **isula create** command. The container engine will use the specified container image to create a read/write layer, or use the specified local rootfs as the running environment of the container. After the creation is complete, the container ID is output as standard output. You can run the **isula start** command to start the container. The new container is in the **inited** state. + +### Usage + +```shell +isula create [OPTIONS] IMAGE [COMMAND] [ARG...] +``` + +### Parameters + +The following table lists the parameters supported by the **create** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

create

+

  

+

--annotation

+

Sets annotations for the container. For example, set the native.umask parameter.

+
--annotation native.umask=normal #The umask value of the started container is 0022.
+--annotation native.umask=secure #The umask value of the started container is 0027.
+

If this parameter is not set, the umask configuration in iSulad is used.

+

--cap-drop

+

Deletes Linux permissions.

+

--cgroup-parent

+

Specifies the cgroup parent path of the container.

+

--cpuset-cpus

+

Allowed CPUs (for example, 0-3, 0, 1).

+

--cpu-shares

+

CPU share (relative weight).

+

--cpu-quota

+

Limits the CPU CFS quota.

+

--device=[]

+

Adds a device to the container.

+

--dns

+

Adds a DNS server.

+

--dns-opt

+

Adds DNS options.

+

--dns-search

+

Sets the search domain of a container.

+

-e, --env

+

Sets environment variables.

+

--env-file

+

Configures environment variables using a file.

+

--entrypoint

+

Entry point to run when the container is started.

+

--external-rootfs=PATH

+

Specifies a rootfs (a folder or block device) that is not managed by iSulad for the container.

+

--files-limit

+

Limits the number of file handles that can be opened in a container. The value -1 indicates no limit.

+

--group-add=[]

+

Adds additional user groups to the container.

+

--help

+

Displays help information.

+

--health-cmd

+

Command executed in a container.

+

--health-exit-on-unhealthy

+

Determines whether to kill a container when the container is detected unhealthy.

+

--health-interval

+

Interval between two consecutive command executions.

+

--health-retries

+

Maximum number of health check retries.

+

--health-start-period

+

Container initialization interval.

+

--health-timeout

+

Maximum time for executing a single check command.

+

--hook-spec

+

Hook configuration file.

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-h, --hostname

+

Container host name.

+

-i, --interactive

+

Enables the standard input of the container even if it is not connected to the standard input of the container.

+

--hugetlb-limit=[]

+

Limits the size of huge-page files, for example, --hugetlb-limit 2MB:32MB.

+

--log-opt=[]

+

Log driver option. By default, the container serial port log function is disabled. You can run the --log-opt disable-log=false command to enable it.

+

-l,--label

+

Sets a label for a container.

+

--lablel-file

+

Sets container labels using files.

+

-m, --memory

+

Memory limit.

+

--memory-reservation

+

Sets the container memory limit. The default value is the same as that of --memory. --memory is a hard limit, and --memory-reservation is a soft limit. When the memory usage exceeds the preset value, the memory usage is dynamically adjusted (the system attempts to reduce the memory usage to a value less than the preset value when reclaiming the memory). However, the memory usage may exceed the preset value. Generally, this parameter can be used together with --memory. The value must be less than the preset value of --memory. The minimum value is 4 MB.

+

--memory-swap

+

Memory swap space, which should be a positive integer. The value -1 indicates no limit.

+

--memory-swappiness

+

The value of swappiness is a positive integer ranging from 0 to 100. The smaller the value is, the less the swap partition is used and the more the memory is used in the Linux system. The larger the value is, the more the swap space is used by the kernel. The default value is -1, indicating that the default system value is used.

+

--mount

+

Mounts the host directory, volume, or file system to the container.

+

--no-healthcheck

+

Disables the health check configuration.

+

--name=NAME

+

Container name.

+

--net=none

+

Connects a container to a network.

+

--pids-limit

+

Limits the number of processes that can be executed in the container. The value -1 indicates no limit.

+

--privileged

+

Grants container extension privileges.

+

-R, --runtime

+

Container runtime. The parameter value can be lcr, which is case insensitive. Therefore, LCR and lcr are equivalent.

+

--read-only

+

Sets the rootfs of a container to read-only.

+

--restart

+

Restart policy upon container exit.

+

For a system container, --restart on-reboot is supported.

+

--storage-opt

+

Configures the storage driver option for a container.

+

-t, --tty

+

Allocates a pseudo terminal.

+

--ulimit

+

Sets the ulimit for a container.

+

-u, --user

+

User name or UID, in the format of [<name|uid>][:<group|gid>].

+

-v, --volume=[]

+

Mounts a volume.

+

--volumes-from=[]

+

Uses the mounting configuration of the specified container.

+
+ +### Constraints + +- When the **--user** or **--group-add** parameter is used to verify the user or group during container startup, if the container uses an OCI image, the verification is performed in the **etc/passwd** and **etc/group** files of the actual rootfs of the image. If a folder or block device is used as the rootfs of the container, the **etc/passwd** and **etc/group** files in the host are verified. The rootfs ignores mounting parameters such as **-v** and **--mount**. That is, when these parameters are used to attempt to overwrite the **etc/passwd** and **etc/group** files, the parameters do not take effect during the search and take effect only when the container is started. The generated configuration is saved in the **iSulad root directory/engine/container ID/start\_generate\_config.json** file. The file format is as follows: + + ```json + { + "uid": 0, + "gid": 8, + "additionalGids": [ + 1234, + 8 + ] + } + ``` + +### Example + +Create a container. + +```shell +$ isula create busybox +fd7376591a9c3d8ee9a14f5d2c2e5255b02cc44cddaabca82170efd4497510e1 +$ isula ps -a +STATUS PID IMAGE COMMAND EXIT_CODE RESTART_COUNT STARTAT FINISHAT RUNTIME ID NAMES inited - busybox "sh" 0 0 - - lcr fd7376591a9c fd7376591a9c4521... +``` + +## Starting a Container + +### Description + +To start one or more containers, run the **isula start** command. + +### Usage + +```shell +isula start [OPTIONS] CONTAINER [CONTAINER...] +``` + +### Parameters + +The following table lists the parameters supported by the **start** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

start

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-R, --runtime

+

Container runtime. The parameter value can be lcr, which is case insensitive. Therefore, LCR and lcr are equivalent.

+
+ +### Example + +Start a new container. + +```shell +isula start fd7376591a9c3d8ee9a14f5d2c2e5255b02cc44cddaabca82170efd4497510e1 +``` + +## Running a Container + +### Description + +To create and start a container, run the **isula run** command. You can use a specified container image to create a container read/write layer and prepare for running the specified command. After the container is created, run the specified command to start the container. The **run** command is equivalent to creating and starting a container. + +### Usage + +```shell +isula run [OPTIONS] ROOTFS|IMAGE [COMMAND] [ARG...] +``` + +### Parameters + +The following table lists the parameters supported by the **run** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

run

+

--annotation

+

Sets annotations for the container. For example, set the native.umask option.

+
--annotation native.umask=normal #The umask value of the started container is 0022.
+--annotation native.umask=secure #The umask value of the started container is 0027.
+

If this parameter is not set, the umask configuration in iSulad is used.

+

--cap-add

+

Adds Linux functions.

+

--cap-drop

+

Deletes Linux functions.

+

--cgroup-parent

+

Specifies the cgroup parent path of the container.

+

--cpuset-cpus

+

Allowed CPUs (for example, 0-3, 0, 1).

+

--cpu-shares

+

CPU share (relative weight).

+

--cpu-quota

+

Limits the CPU CFS quota.

+

-d, --detach

+

Runs the container in the background and displays the container ID.

+

--device=[]

+

Adds a device to the container.

+

--dns

+

Adds a DNS server.

+

--dns-opt

+

Adds DNS options.

+

--dns-search

+

Sets the search domain of a container.

+

-e, --env

+

Sets environment variables.

+

--env-file

+

Configures environment variables using a file.

+

--entrypoint

+

Entry point to run when the container is started.

+

--external-rootfs=PATH

+

Specifies a rootfs (a folder or block device) that is not managed by iSulad for the container.

+

--files-limit

+

Limits the number of file handles that can be opened in the container. The value -1 indicates no limit.

+

--group-add=[]

+

Adds additional user groups to the container.

+

--help

+

Displays help information.

+

--health-cmd

+

Command executed in a container.

+

--health-exit-on-unhealthy

+

Determines whether to kill a container when the container is detected unhealthy.

+

--health-interval

+

Interval between two consecutive command executions.

+

--health-retries

+

Maximum number of health check retries.

+

--health-start-period

+

Container initialization interval.

+

--health-timeout

+

Maximum time for executing a single check command.

+

--hook-spec

+

Hook configuration file.

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-h, --hostname

+

Container host name.

+

--hugetlb-limit=[]

+

Limits the size of huge-page files, for example, --hugetlb-limit 2MB:32MB.

+

-i, --interactive

+

Enables the standard input of the container even if it is not connected to the standard input of the container.

+

--log-opt=[]

+

Log driver option. By default, the container serial port log function is disabled. You can run the --log-opt disable-log=false command to enable it.

+

-m, --memory

+

Memory limit.

+

--memory-reservation

+

Sets the container memory limit. The default value is the same as that of --memory. --memory is a hard limit, and --memory-reservation is a soft limit. When the memory usage exceeds the preset value, the memory usage is dynamically adjusted (the system attempts to reduce the memory usage to a value less than the preset value when reclaiming the memory). However, the memory usage may exceed the preset value. Generally, this parameter can be used together with --memory. The value must be less than the preset value of --memory. The minimum value is 4 MB.

+

--memory-swap

+

Memory swap space, which should be a positive integer. The value -1 indicates no limit.

+

--memory-swappiness

+

The value of swappiness is a positive integer ranging from 0 to 100. The smaller the value is, the less the swap partition is used and the more the memory is used in the Linux system. The larger the value is, the more the swap space is used by the kernel. The default value is -1, indicating that the default system value is used.

+

--mount

+

Mounts a host directory to a container.

+

--no-healthcheck

+

Disables the health check configuration.

+

--name=NAME

+

Container name.

+

--net=none

+

Connects a container to a network.

+

--pids-limit

+

Limits the number of processes that can be executed in the container. The value -1 indicates no limit.

+

--privileged

+

Grants container extension privileges.

+

-R, --runtime

+

Container runtime. The parameter value can be lcr, which is case insensitive. Therefore, LCR and lcr are equivalent.

+

--read-only

+

Sets the rootfs of a container to read-only.

+

--restart

+

Restart policy upon container exit.

+

For a system container, --restart on-reboot is supported.

+

--rm

+

Automatically clears a container upon exit.

+

--storage-opt

+

Configures the storage driver option for a container.

+

-t, --tty

+

Allocates a pseudo terminal.

+

--ulimit

+

Sets the ulimit for a container.

+

-u, --user

+

User name or UID, in the format of [<name|uid>][:<group|gid>].

+

-v, --volume=[]

+

Mounts a volume.

+
+ +### Constraints + +- When the parent process of a container exits, the corresponding container automatically exits. +- When a common container is created, the parent process cannot be initiated because the permission of common containers is insufficient. As a result, the container does not respond when you run the **attach** command though it is created successfully. +- If **--net** is not specified when the container is running, the default host name is **localhost**. +- If the **--files-limit** parameter is to transfer a small value, for example, 1, when the container is started, iSulad creates a cgroup, sets the files.limit value, and writes the PID of the container process to the **cgroup.procs** file of the cgroup. At this time, the container process has opened more than one handle. As a result, a write error is reported, and the container fails to be started. +- If both**--mount** and **--volume** exist and their destination paths conflict, **--mount** will be run after **--volume** \(that is, the mount point in **--volume** will be overwritten\). + + Note: The value of the **type** parameter of lightweight containers can be **bind** or **squashfs**. When **type** is set to **squashfs**, **src** is the image path. The value of the **type** parameter of the native Docker can be **bind**, **volume**, and **tmpfs**. + +- The restart policy does not support **unless-stopped**. +- The values returned for Docker and lightweight containers are 127 and 125 respectively in the following three scenarios: + + The host device specified by **--device** does not exist. + + The hook JSON file specified by **--hook-spec** does not exist. + + The entry point specified by **--entrypoint** does not exist. + +- When the **--volume** parameter is used, /dev/ptmx will be deleted and recreated during container startup. Therefore, do not mount the **/dev** directory to that of the container. Use **--device** to mount the devices in **/dev** of the container. +- Do not use the echo option to input data to the standard input of the **run** command. Otherwise, the client will be suspended. The echo value should be directly transferred to the container as a command line parameter. + + ```shell + [root@localhost ~]# echo ls | isula run -i busybox /bin/sh + + + ^C + [root@localhost ~]# + ``` + + The client is suspended when the preceding command is executed because the preceding command is equivalent to input **ls** to **stdin**. Then EOF is read and the client does not send data and waits for the server to exit. However, the server cannot determine whether the client needs to continue sending data. As a result, the server is suspended in reading data, and both parties are suspended. + + The correct execution method is as follows: + + ```shell + [root@localhost ~]# isula run -i busybox ls + bin + dev + etc + home + proc + root + sys + tmp + usr + var + [root@localhost ~]# + ``` + +- If the root directory \(/\) of the host is used as the file system of the container, the following situations may occur during the mounting: + + **Table 2** Mounting scenarios + + + + + + + + + + + + + +

Host Path (Source)

+

Container Path (Destination)

+

/home/test1

+

/mnt/

+

/home/test2

+

/mnt/abc

+
+ + > [!TIP]NOTICE + > Scenario 1: Mount **/home/test1** and then **/home/test2**. In this case, the content in **/home/test1** overwrites the content in **/mnt**. As a result, the **abc** directory does not exist in **/mnt**, and mounting**/home/test2** to **/mnt/abc** fails. + > Scenario 2: Mount **/home/test2** and then **/home/test1**. In this case, the content of **/mnt** is replaced with the content of **/home/test1** during the second mounting. In this way, the content mounted during the first mounting from **/home/test2** to **/mnt/abc** is overwritten. + > The first scenario is not supported. For the second scenario, users need to understand the risk of data access failures. + +- Exercise caution when configuring the **/sys** and **/proc** directories as writable. The **/sys** and **/proc** directories contain interfaces for Linux to manage kernel parameters and devices. Configuring these directories as writable in a container may lead to container escape. +- Exercise caution when configuring containers to share namespaces with the host. For example, using **--pid**, **--ipc**, **--uts**, or **--net** to share namespace spaces between the container and the host eliminates namespace isolation between them. This allows attacks on the host from within the container. For instance, using **--pid** to share the PID namespace with the host enables the container to view and kill processes on the host. +- Exercise caution when using parameters like **--device** or **-v** to mount host resources. Avoid mapping sensitive directories or devices of the host into the container to prevent sensitive information leakage. +- Exercise caution when starting containers with the **--privileged** option. The **--privileged** option grants excessive permissions to the container, which can affect the host configuration. + + > [!TIP]NOTICE + > In high concurrency scenarios \(200 containers are concurrently started\), the memory management mechanism of Glibc may cause memory holes and large virtual memory \(for example, 10 GB\). This problem is caused by the restriction of the Glibc memory management mechanism in the high concurrency scenario, but not by memory leakage. Therefore, the memory consumption does not increase infinitely. You can set the **MALLOC\_ARENA\_MAX** environment variable to reduce the virtual memory and increase the probability of reducing the physical memory. However, this environment variable will cause the iSulad concurrency performance to deteriorate. Set this environment variable based on the site requirements. + > + > To balance performance and memory usage, set MALLOC_ARENA_MAX to 4. (The iSulad performance deterioration on the ARM64 server is controlled by less than 10%.) + > Configuration method: + > 1. To manually start iSulad, run the export MALLOC_ARENA_MAX=4 command and then start the iSulad. + > 2. If systemd manages iSulad, you can modify the /etc/sysconfig/iSulad file by adding MALLOC_ARENA_MAX=4. + +### Example + +Run a new container. + +```shell +$ isula run -itd busybox +9c2c13b6c35f132f49fb7ffad24f9e673a07b7fe9918f97c0591f0d7014c713b +``` + +## Stopping a Container + +### Description + +To stop a container, run the **isula stop** command. The SIGTERM signal is sent to the first process in the container. If the container is not stopped within the specified time \(10s by default\), the SIGKILL signal is sent. + +### Usage + +```shell +isula stop [OPTIONS] CONTAINER [CONTAINER...] +``` + +### Parameters + +The following table lists the parameters supported by the **stop** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

stop

+

-f, --force

+

Forcibly stops a running container.

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-t, --time

+

Time for graceful stop. If the time exceeds the value of this parameter, the container is forcibly stopped.

+
+ +### Constraints + +- If the **t** parameter is specified and the value of **t** is less than 0, ensure that the application in the container can process the stop signal. + + Principle of the Stop command: Send the SIGTERM signal to the container, and then wait for a period of time \(**t** entered by the user\). If the container is still running after the period of time, the SIGKILL signal is sent to forcibly kill the container. + +- The meaning of the input parameter **t** is as follows: + + **t** < 0: Wait for graceful stop. This setting is preferred when users are assured that their applications have a proper stop signal processing mechanism. + + **t** = 0: Do not wait and send **kill -9** to the container immediately. + + **t** \> 0: Wait for a specified period and send **kill -9** to the container if the container does not stop within the specified period. + + Therefore, if **t** is set to a value less than 0 \(for example, **t** = -1\), ensure that the container application correctly processes the SIGTERM signal. If the container ignores this signal, the container will be suspended when the **isula stop** command is run. + +### Example + +Stop a container. + +```shell +$ isula stop fd7376591a9c3d8ee9a14f5d2c2e5255b02cc44cddaabca82170efd4497510e1 +fd7376591a9c3d8ee9a14f5d2c2e5255b02cc44cddaabca82170efd4497510e1 +``` + +## Forcibly Stopping a Container + +### Description + +To forcibly stop one or more running containers, run the **isula kill** command. + +### Usage + +```shell +isula kill [OPTIONS] CONTAINER [CONTAINER...] +``` + +### Parameters + +The following table lists the parameters supported by the **kill** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

kill

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-s, --signal

+

Signal sent to the container.

+
+ +### Example + +Kill a container. + +```shell +$ isula kill fd7376591a9c3d8ee9a14f5d2c2e5255b02cc44cddaabca82170efd4497510e1 +fd7376591a9c3d8ee9a14f5d2c2e5255b02cc44cddaabca82170efd4497510e1 +``` + +## Removing a Container + +### Description + +To remove a container, run the **isula rm** command. + +### Usage + +```shell +isula rm [OPTIONS] CONTAINER [CONTAINER...] +``` + +### Parameters + +The following table lists the parameters supported by the **rm** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

rm

+

-f, --force

+

Forcibly removes a running container.

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-v, --volume

+

Removes a volume mounted to a container. (Note: Currently, iSulad does not use this function.)

+
+ +### Constraints + +- In normal I/O scenarios, it takes T1 to delete a running container in an empty environment \(with only one container\). In an environment with 200 containers \(without a large number of I/O operations and with normal host I/O\), it takes T2 to delete a running container. The specification of T2 is as follows: T2 = max \{T1 x 3, 5\}s. + +### Example + +Delete a stopped container. + +```shell +$ isula rm fd7376591a9c3d8ee9a14f5d2c2e5255b02cc44cddaabca82170efd4497510e1 +fd7376591a9c3d8ee9a14f5d2c2e5255b02cc44cddaabca82170efd4497510e1 +``` + +## Attaching to a Container + +### Description + +To attach standard input, standard output, and standard error of the current terminal to a running container, run the **isula attach** command. Only containers whose runtime is of the LCR type are supported. + +### Usage + +```shell +isula attach [OPTIONS] CONTAINER +``` + +### Parameters + +The following table lists the parameters supported by the **attach** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

attach

+

--help

+

Displays help information.

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-D, --debug

+

Enables the debug mode.

+
+ +### Constraints + +- For the native Docker, running the **attach** command will directly enter the container. For the iSulad container, you have to run the **attach** command and press **Enter** to enter the container. + +### Example + +Attach to a running container. + +```shell +$ isula attach fd7376591a9c3d8ee9a14f5d2c2e5255b02cc44cddaabca82170efd4497510e1 +/ # +/ # +``` + +## Renaming a Container + +### Description + +To rename a container, run the **isula rename** command. + +### Usage + +```shell +isula rename [OPTIONS] OLD_NAME NEW_NAME +``` + +### Parameters + +The following table lists the parameters supported by the **rename** command. + +**Table 1** Parameter description + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

rename

+

-H, --host

+

Renames a container.

+
+ +### Example + +Rename a container. + +```shell +isula rename my_container my_new_container +``` + +## Executing a Command in a Running Container + +### Description + +To execute a command in a running container, run the **isula exec** command. This command is executed in the default directory of the container. If a user-defined directory is specified for the basic image, the user-defined directory is used. + +### Usage + +```shell +isula exec [OPTIONS] CONTAINER COMMAND [ARG...] +``` + +### Parameters + +The following table lists the parameters supported by the **exec** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

exec

+

  

+

-d, --detach

+

Runs a command in the background.

+

-e, --env

+

Sets environment variables. (Note: Currently, iSulad does not use this function.)

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-i, --interactive

+

Enables the standard input though no connection is set up. (Note: Currently, iSulad does not use this function.)

+

-t, --tty

+

Allocates a pseudo terminal. (Note: Currently, iSulad does not use this function.)

+

-u, --user

+

Logs in to the container as a specified user.

+
+ +### Constraints + +- If no parameter is specified in the **isula exec** command, the **-it** parameter is used by default, indicating that a pseudo terminal is allocated and the container is accessed in interactive mode. +- When you run the **isula exec** command to execute a script and run a background process in the script, you need to use the **nohup** flag to ignore the **SIGHUP** signal. + + When you run the **isula exec** command to execute a script and run a background process in the script, you need to use the **nohup** flag. Otherwise, the kernel sends the **SIGHUP** signal to the process executed in the background when the process \(first process of the session\) exits. As a result, the background process exits and zombie processes occur. + +- After running the **isula exec** command to access the container process, do not run background programs. Otherwise, the system will be suspended. + + To run the **isula exec** command to execute a background process, perform the following steps: + + 1. Run the **isula exec container\_name bash** command to access the container. + 2. After entering the container, run the **script &** command. + 3. Run the **exit** command. The terminal stops responding. + + After the **isula exec** command is executed to enter the container, the background program stops responding because the **isula exec** command is executed to enter the container and run the background while1 program. When Bash exits, the while1 program does not exit and becomes an orphan process, which is taken over by process 1. + The the while1 process is executed by the initial Bash process **fork &exec** of the container. The while1 process copies the file handle of the Bash process. As a result, the handle is not completely closed when the Bash process exits. The console process cannot receive the handle closing event, epoll_wait stops responding, and the process does not exit. + +- Do not run the **isula exec** command in the background. Otherwise, the system may be suspended. + + Run the **isula exec** command in the background as follows: + + Run the **isula exec script &** command in the background, for example, **isula exec container\_name script &,isula exec**. The command is executed in the background. The script continuously displays a file by running the **cat** command. Normally, there is output on the current terminal. If you press **Enter** on the current terminal, the client exits the stdout read operation due to the I/O read failure. As a result, the terminal does not output data. The server continues to write data to the buffer of the FIFO because the process is still displaying files by running the **cat** command. When the buffer is full, the process in the container is suspended in the write operation. + +- When a lightweight container uses the **exec** command to execute commands with pipe operations, you are advised to run the **/bin/bash -c** command. + + Typical application scenarios: + + Run the **isula exec container\_name -it ls /test | grep "xx" | wc -l** command to count the number of xx files in the test directory. The output is processed by **grep** and **wc** through the pipe because **ls /test** is executed with **exec**. The output of **ls /test** executed by **exec** contains line breaks. When the output is processed, the result is incorrect. + + Cause: Run the **ls /test** command using **exec**. The command output contains a line feed character. Run the**| grep "xx" | wc -l** command for the output. The processing result is 2 \(two lines\). + + ```shell + [root@localhost ~]# isula exec -it container ls /test + xx xx10 xx12 xx14 xx3 xx5 xx7 xx9 + xx1 xx11 xx13 xx2 xx4 xx6 xx8 + [root@localhost ~]# + ``` + + Suggestion: When running the **run/exec** command to perform pipe operations, run the **/bin/bash -c** command to perform pipe operations in the container. + + ```shell + [root@localhost ~]# isula exec -it container /bin/sh -c "ls /test | grep "xx" | wc -l" + 15 + [root@localhost ~]# + ``` + +- Do not use the **echo** option to input data to the standard input of the **exec** command. Otherwise, the client will be suspended. The echo value should be directly transferred to the container as a command line parameter. + + ```shell + [root@localhost ~]# echo ls | isula exec 38 /bin/sh + + + ^C + [root@localhost ~]# + ``` + + The client is suspended when the preceding command is executed because the preceding command is equivalent to input **ls** to **stdin**. Then EOF is read and the client does not send data and waits for the server to exit. However, the server cannot determine whether the client needs to continue sending data. As a result, the server is suspended in reading data, and both parties are suspended. + + The correct execution method is as follows: + + ```shell + [root@localhost ~]# isula exec 38 ls + bin dev etc home proc root sys tmp usr var + ``` + +### Example + +Run the echo command in a running container. + +```shell +$ isula exec c75284634bee echo "hello,world" +hello,world +``` + +## Querying Information About a Single Container + +### Description + +To query information about a single container, run the **isula inspect** command. + +### Usage + +```shell +isula inspect [OPTIONS] CONTAINER|IMAGE [CONTAINER|IMAGE...] +``` + +### Parameters + +The following table lists the parameters supported by the **inspect** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

inspect

+

  

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-f, --format

+

Output format.

+

-t, --time

+

Timeout interval, in seconds. If the inspect command fails to query container information within the specified period, the system stops waiting and reports an error immediately. The default value is 120s. If the value is less than or equal to 0, the inspect command keeps waiting until the container information is obtained successfully.

+
+ +### Constraints + +- Lightweight containers do not support the output in \{ \{.State\} \} format but support the output in the \{ \{json .State\} \} format. The **-f** parameter is not supported when the object is an image. + +### Example + +Query information about a container. + +```shell +$ isula inspect c75284634bee +[ + { + "Id": "c75284634beeede3ab86c828790b439d16b6ed8a537550456b1f94eb852c1c0a", + "Created": "2019-08-01T22:48:13.993304927-04:00", + "Path": "sh", + "Args": [], + "State": { + "Status": "running", + "Running": true, + "Paused": false, + "Restarting": false, + "Pid": 21164, + "ExitCode": 0, + "Error": "", + "StartedAt": "2019-08-02T06:09:25.535049168-04:00", + "FinishedAt": "2019-08-02T04:28:09.479766839-04:00", + "Health": { + "Status": "", + "FailingStreak": 0, + "Log": [] + } + }, + "Image": "busybox", + "ResolvConfPath": "", + "HostnamePath": "", + "HostsPath": "", + "LogPath": "none", + "Name": "c75284634beeede3ab86c828790b439d16b6ed8a537550456b1f94eb852c1c0a", + "RestartCount": 0, + "HostConfig": { + "Binds": [], + "NetworkMode": "", + "GroupAdd": [], + "IpcMode": "", + "PidMode": "", + "Privileged": false, + "SystemContainer": false, + "NsChangeFiles": [], + "UserRemap": "", + "ShmSize": 67108864, + "AutoRemove": false, + "AutoRemoveBak": false, + "ReadonlyRootfs": false, + "UTSMode": "", + "UsernsMode": "", + "Sysctls": {}, + "Runtime": "lcr", + "RestartPolicy": { + "Name": "no", + "MaximumRetryCount": 0 + }, + "CapAdd": [], + "CapDrop": [], + "Dns": [], + "DnsOptions": [], + "DnsSearch": [], + "ExtraHosts": [], + "HookSpec": "", + "CPUShares": 0, + "Memory": 0, + "OomScoreAdj": 0, + "BlkioWeight": 0, + "BlkioWeightDevice": [], + "CPUPeriod": 0, + "CPUQuota": 0, + "CPURealtimePeriod": 0, + "CPURealtimeRuntime": 0, + "CpusetCpus": "", + "CpusetMems": "", + "SecurityOpt": [], + "StorageOpt": {}, + "KernelMemory": 0, + "MemoryReservation": 0, + "MemorySwap": 0, + "OomKillDisable": false, + "PidsLimit": 0, + "FilesLimit": 0, + "Ulimits": [], + "Hugetlbs": [], + "HostChannel": { + "PathOnHost": "", + "PathInContainer": "", + "Permissions": "", + "Size": 0 + }, + "EnvTargetFile": "", + "ExternalRootfs": "" + }, + "Mounts": [], + "Config": { + "Hostname": "localhost", + "User": "", + "Env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm", + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + ], + "Tty": true, + "Cmd": [ + "sh" + ], + "Entrypoint": [], + "Labels": {}, + "Annotations": { + "log.console.file": "none", + "log.console.filerotate": "7", + "log.console.filesize": "1MB", + "rootfs.mount": "/var/lib/isulad/mnt/rootfs", + "native.umask": "secure" + }, + "HealthCheck": { + "Test": [], + "Interval": 0, + "Timeout": 0, + "StartPeriod": 0, + "Retries": 0, + "ExitOnUnhealthy": false + } + }, + "NetworkSettings": { + "IPAddress": "" + } + } +] +``` + +## Querying Information About All Containers + +### Description + +To query information about all containers, run the **isula ps** command. + +### Usage + +```shell +isula ps [OPTIONS] +``` + +### Parameters + +The following table lists the parameters supported by the **ps** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

ps

+

  

+

  

+

  

+

  

+

-a, --all

+

Displays all containers.

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-q, --quiet

+

Displays only the container name.

+

-f, --filter

+

Adds filter criteria.

+

--format

+

Output format.

+

--no-trunc

+

Do not truncate the container ID.

+
+ +### Example + +Query information about all containers. + +```shell +$ isula ps -a + +ID IMAGE STATUS PID COMMAND EXIT_CODE RESTART_COUNT STARTAT FINISHAT RUNTIME NAMES +e84660aa059c rnd-dockerhub.huawei.com/official/busybox running 304765 "sh" 0 0 13 minutes ago - lcr e84660aa059cafb0a77a4002e65cc9186949132b8e57b7f4d76aa22f28fde016 +$ isula ps -a --format "table {{.ID}} {{.Image}}" --no-trunc +ID IMAGE +e84660aa059cafb0a77a4002e65cc9186949132b8e57b7f4d76aa22f28fde016 rnd-dockerhub.huawei.com/official/busybox +``` + +## Restarting a Container + +### Description + +To restart one or more containers, run the **isula restart** command. + +### Usage + +```shell +isula restart [OPTIONS] CONTAINER [CONTAINER...] +``` + +### Parameters + +The following table lists the parameters supported by the **restart** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

restart

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-t, --time

+

Time for graceful stop. If the time exceeds the value of this parameter, the container is forcibly stopped.

+
+ +### Constraints + +- If the **t** parameter is specified and the value of **t** is less than 0, ensure that the application in the container can process the stop signal. + + The restart command first calls the stop command to stop the container. Send the SIGTERM signal to the container, and then wait for a period of time \(**t** entered by the user\). If the container is still running after the period of time, the SIGKILL signal is sent to forcibly kill the container. + +- The meaning of the input parameter **t** is as follows: + + **t** < 0: Wait for graceful stop. This setting is preferred when users are assured that their applications have a proper stop signal processing mechanism. + + **t** = 0: Do not wait and send **kill -9** to the container immediately. + + **t** \> 0: Wait for a specified period and send **kill -9** to the container if the container does not stop within the specified period. + + Therefore, if **t** is set to a value less than 0 \(for example, **t** = -1\), ensure that the container application correctly processes the SIGTERM signal. If the container ignores this signal, the container will be suspended when the **isula stop** command is run. + +### Example + +Restart a container. + +```shell +$ isula restart c75284634beeede3ab86c828790b439d16b6ed8a537550456b1f94eb852c1c0a + c75284634beeede3ab86c828790b439d16b6ed8a537550456b1f94eb852c1c0a +``` + +## Waiting for a Container to Exit + +### Description + +To wait for one or more containers to exit, run the **isula wait** command. Only containers whose runtime is of the LCR type are supported. + +### Usage + +```shell +isula wait [OPTIONS] CONTAINER [CONTAINER...] +``` + +### Parameters + +The following table lists the parameters supported by the **wait** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

wait

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

/

+

Blocks until the container stops and displays the exit code.

+
+ +### Example + +Wait for a single container to exit. + +```shell +$ isula wait c75284634beeede3ab86c828790b439d16b6ed8a537550456b1f94eb852c1c0a + 137 +``` + +## Viewing Process Information in a Container + +### Description + +To view process information in a container, run the **isula top** command. Only containers whose runtime is of the LCR type are supported. + +### Usage + +```shell +isula top [OPTIONS] container [ps options] +``` + +### Parameters + +The following table lists the parameters supported by the **top** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

top

+

  

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

/

+

Queries the process information of a running container.

+
+ +### Example + +Query process information in a container. + +```shell +$ isula top 21fac8bb9ea8e0be4313c8acea765c8b4798b7d06e043bbab99fc20efa72629c +UID PID PPID C STIME TTY TIME CMD +root 22166 22163 0 23:04 pts/1 00:00:00 sh +``` + +## Displaying Resource Usage Statistics of a Container + +### Description + +To display resource usage statistics in real time, run the **isula stats** command. Only containers whose runtime is of the LCR type are supported. + +### Usage + +```shell +isula stats [OPTIONS] [CONTAINER...] +``` + +### Parameters + +The following table lists the parameters supported by the **stats** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

stats

+

  

+

  

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-a, --all

+

Displays all containers. (By default, only running containers are displayed.)

+

--no-stream

+

Display the first result only. Only statistics in non-stream mode are displayed.

+
+ +### Example + +Display resource usage statistics. + +```shell +$ isula stats --no-stream 21fac8bb9ea8e0be4313c8acea765c8b4798b7d06e043bbab99fc20efa72629c CONTAINER CPU % MEM USAGE / LIMIT MEM % BLOCK I / O PIDS +21fac8bb9ea8 0.00 56.00 KiB / 7.45 GiB 0.00 0.00 B / 0.00 B 1 +``` + +## Obtaining Container Logs + +### Description + +To obtain container logs, run the **isula logs** command. Only containers whose runtime is of the LCR type are supported. + +### Usage + +```shell +isula logs [OPTIONS] [CONTAINER...] +``` + +### Parameters + +The following table lists the parameters supported by the **logs** command. + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

logs

+

  

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-f, --follow

+

Traces log output.

+

--tail

+

Displays the number of log records.

+
+ +### Constraints + +- By default, the container log function is enabled. To disable this function, run the **isula create --log-opt disable-log=true** or **isula run --log-opt disable-log=true** command. + +### Example + +Obtain container logs. + +```shell +$ isula logs 6a144695f5dae81e22700a8a78fac28b19f8bf40e8827568b3329c7d4f742406 +hello, world +hello, world +hello, world +``` + +## Copying Data Between a Container and a Host + +### Description + +To copy data between a host and a container, run the **isula cp** command. Only containers whose runtime is of the LCR type are supported. + +### Usage + +```shell +isula cp [OPTIONS] CONTAINER:SRC_PATH DEST_PATH +isula cp [OPTIONS] SRC_PATH CONTAINER:DEST_PATH +``` + +### Parameters + +The following table lists the parameters supported by the **cp** command. + +**Table 1** Parameter description + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

cp

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+
+ +### Constraints + +- When iSulad copies files, note that the **/etc/hostname**, **/etc/resolv.conf**, and **/etc/hosts** files are not mounted to the host, neither the **--volume** and **--mount** parameters. Therefore, the original files in the image instead of the files in the real container are copied. + + ```shell + [root@localhost tmp]# isula cp b330e9be717a:/etc/hostname /tmp/hostname + [root@localhost tmp]# cat /tmp/hostname + [root@localhost tmp]# + ``` + +- When decompressing a file, iSulad does not check the type of the file or folder to be overwritten in the file system. Instead, iSulad directly overwrites the file or folder. Therefore, if the source is a folder, the file with the same name is forcibly overwritten as a folder. If the source file is a file, the folder with the same name will be forcibly overwritten as a file. + + ```shell + [root@localhost tmp]# rm -rf /tmp/test_file_to_dir && mkdir /tmp/test_file_to_dir + [root@localhost tmp]# isula exec b330e9be717a /bin/sh -c "rm -rf /tmp/test_file_to_dir && touch /tmp/test_file_to_dir" + [root@localhost tmp]# isula cp b330e9be717a:/tmp/test_file_to_dir /tmp + [root@localhost tmp]# ls -al /tmp | grep test_file_to_dir + -rw-r----- 1 root root 0 Apr 26 09:59 test_file_to_dir + ``` + +- iSulad freezes the container during the copy process and restores the container after the copy is complete. + +### Example + +Copy the **/test/host** directory on the host to the **/test** directory on container 21fac8bb9ea8. + +```shell +isula cp /test/host 21fac8bb9ea8:/test +``` + +Copy the **/www** directory on container 21fac8bb9ea8 to the **/tmp** directory on the host. + +```shell +isula cp 21fac8bb9ea8:/www /tmp/ +``` + +## Pausing a Container + +### Description + +To pause all processes in a container, run the **isula pause** command. Only containers whose runtime is of the LCR type are supported. + +### Usage + +```shell +isula pause CONTAINER [CONTAINER...] +``` + +### Parameters + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

pause

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+
+ +### Constraints + +- Only containers in the running state can be paused. +- After a container is paused, other lifecycle management operations \(such as **restart**, **exec**, **attach**, **kill**, **stop**, and **rm**\) cannot be performed. +- After a container with health check configurations is paused, the container status changes to unhealthy. + +### Example + +Pause a running container. + +```shell +$ isula pause 8fe25506fb5883b74c2457f453a960d1ae27a24ee45cdd78fb7426d2022a8bac + 8fe25506fb5883b74c2457f453a960d1ae27a24ee45cdd78fb7426d2022a8bac +``` + +## Resuming a Container + +### Description + +To resume all processes in a container, run the **isula unpause** command. It is the reverse process of **isula pause**. Only containers whose runtime is of the LCR type are supported. + +### Usage + +```shell +isula unpause CONTAINER [CONTAINER...] +``` + +### Parameters + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

pause

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+
+ +### Constraints + +- Only containers in the paused state can be unpaused. + +### Example + +Resume a paused container. + +```shell +$ isula unpause 8fe25506fb5883b74c2457f453a960d1ae27a24ee45cdd78fb7426d2022a8bac + 8fe25506fb5883b74c2457f453a960d1ae27a24ee45cdd78fb7426d2022a8bac +``` + +## Obtaining Event Messages from the Server in Real Time + +### **Description** + +The **isula events** command is used to obtain event messages such as container image lifecycle and running event from the server in real time. Only containers whose runtime type is **lcr** are supported. + +### Usage + +```shell +isula events [OPTIONS] +``` + +### Parameter + + + + + + + + + + + + + + + + + + +

Command

+

Parameter

+

Description

+

events

+

-H, --host

+

Specifies the iSulad socket file path to be accessed.

+

-n, --name

+

Obtains event messages of a specified container.

+

-S, --since

+

Obtains event messages generated since a specified time.

+
+ +### Example + +Run the following command to obtain event messages from the server in real time: + +```shell +isula events +``` diff --git a/docs/en/docs/container_engine/isula_container_engine/container_resource_management.md b/docs/en/docs/container_engine/isula_container_engine/container_resource_management.md new file mode 100644 index 0000000000000000000000000000000000000000..5fdd16d14f1b7e0b7c8e60f207973183c530d6c3 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/container_resource_management.md @@ -0,0 +1,717 @@ +# Container Resource Management + +## Introduction + +Container resource management can be implemented through features like namespace and cgroup. iSula supports both cgroup v1 and v2 for resource constraints, with cgroup v2 being an experimental feature not approved for commercial use. When the system is configured to exclusively support cgroup v2 and mounts it to the /sys/fs/cgroup directory, iSula utilizes cgroup v2 for resource management. In both cgroup v1 and cgroup v2 implementations, iSula provides users with a consistent interface for setting resource limits. + +## Sharing Resources + +### Description + +Containers or containers and hosts can share namespace information mutually, including PID, network, IPC, and UTS information. + +> [!NOTE]Note +> Sharing host namespaces (and thereby bypassing namespace isolation) enables containers to access and modify host system information, introducing serious security vulnerabilities. For instance, sharing a host process namespace via `--pid=host` exposes all host processes, which could lead to information disclosure or allow termination of critical system processes. This functionality should be used with extreme caution and only in properly secured environments. + +### Usage + +When running the **isula create/run** command, you can set the namespace parameters to share resources. For details, see the following parameter description table. + +### Parameters + +You can specify the following parameters when running the **isula create/run** command: + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Description

+

Value Range

+

Mandatory or Not

+

--pid

+

Specifies the PID namespace to be shared.

+

[none, host, container:<containerID>]: none indicates that the namespace is not shared. host indicates that the namespace is shared with the host. container:<containerID> indicates that the namespace is shared with the container.

+

No

+

--net

+

Specifies the network namespace to be shared.

+

[none, host, container:<containerID>]: none indicates that the namespace is not shared. host indicates that the namespace is shared with the host. container:<containerID> indicates that the namespace is shared with the container.

+

No

+

--ipc

+

Specifies the IPC namespace to be shared.

+

[none, host, container:<containerID>]: none indicates that the namespace is not shared. host indicates that the namespace is shared with the host. container:<containerID> indicates that the namespace is shared with the container.

+

No

+

--uts

+

Specifies the UTS namespace to be shared.

+

[none, host, container:<containerID>]: none indicates that the namespace is not shared. host indicates that the namespace is shared with the host. container:<containerID> indicates that the namespace is shared with the container.

+

No

+
+ +### Example + +If two containers need to share the same PID namespace, add **--pid container:** when running the container. For example: + +```shell +isula run -tid --name test_pid busybox sh +isula run -tid --name test --pid container:test_pid busybox sh +``` + +## Restricting CPU Resources of a Running Container + +### Description + +You can set parameters to restrict the CPU resources of a container. + +### Usage + +When running the **isula create/run** command, you can set CPU-related parameters to limit the CPU resources of a container. For details about the parameters and values, see the following table. + +### Parameters + +You can specify the following parameters when running the **isula create/run** command: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Description

+

Value Range

+

Mandatory or Not

+

--cpu-period

+

Limits the CPU CFS period in a container.

+

64-bit integer

+

No

+

--cpu-quota

+

Limits the CPU CFS quota.

+

64-bit integer

+

No

+

--cpu-shares

+

Limits the CPU share (relative weight).

+

64-bit integer

+

No

+

--cpuset-cpus

+

Limits the CPU nodes.

+

A character string. The value is the number of CPUs to be configured. The value ranges from 0 to 3, or 0 and 1.

+

No

+

--cpuset-mems

+

Limits the memory nodes used by cpuset in the container.

+

A character string. The value is the number of CPUs to be configured. The value ranges from 0 to 3, or 0 and 1.

+

No

+
+ +### Example + +To restrict a container to use a specific CPU, add **--cpuset-cpus number** when running the container. For example: + +```shell +isula run -tid --cpuset-cpus 0,2-3 busybox sh +``` + +>[!NOTE]NOTE +>You can check whether the configuration is successful. For details, see "Querying Information About a Single Container." + +## Restricting the Memory Usage of a Running Container + +### Description + +You can set parameters to restrict the memory usage of a container. + +### Usage + +When running the **isula create/run** command, you can set memory-related parameters to restrict memory usage of containers. For details about the parameters and values, see the following table. + +### Parameters + +You can specify the following parameters when running the **isula create/run** command: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Description

+

Value Range

+

Mandatory or Not

+

--memory

+

Specifies the upper limit of the memory usage of a container.

+

64-bit integer The value is a non-negative number. The value 0 indicates that no limit is set. The unit can be empty (byte), KB, MB, GB, TB, or PB.

+

No

+

--memory-reservation

+

Specifies the soft upper limit of the memory of a container.

+

64-bit integer The value is a non-negative number. The value 0 indicates that no limit is set. The unit can be empty (byte), KB, MB, GB, TB, or PB.

+

No

+

--memory-swap

+

Specifies the upper limit of the swap memory of the container.

+

64-bit integer The value can be -1 or a non-negative number. The value -1 indicates no limit, and the value 0 indicates that no limit is set. The unit can be empty (byte), KB, MB, GB, TB, or PB.

+

No

+

--kernel-memory

+

Specifies the upper limit of the kernel memory of the container.

+

64-bit integer The value is a non-negative number. The value 0 indicates that no limit is set. The unit can be empty (byte), KB, MB, GB, TB, or PB.

+

No

+
+ +### Example + +To set the upper limit of the memory of a container, add **--memory \[\]** when running the container. For example: + +```shell +isula run -tid --memory 1G busybox sh +``` + +## Restricting I/O Resources of a Running Container + +### Description + +You can set parameters to limit the read/write speed of devices in the container. + +### Usage + +When running the **isula create/run** command, you can set **--device-read-bps/--device-write-bps :\[\]** to limit the read/write speed of devices in the container. + +### Parameters + +When running the **isula create/run** command, set **--device-read/write-bps**. + + + + + + + + + + + + + + +

Parameter

+

Description

+

Value Range

+

Mandatory or Not

+

--device-read-bps/--device-write-bps

+

Limits the read/write speed of devices in the container.

+

64-bit integer The value is a positive integer. The value can be 0, indicating that no limit is set. The unit can be empty (byte), KB, MB, GB, TB, or PB.

+

No

+
+ +### Example + +To limit the read/write speed of devices in the container, add **--device-write-bps/--device-read-bps :\[\]** when running the container. For example, to limit the read speed of the device **/dev/sda** in the container **busybox** to 1 Mbit/s, run the following command: + +```shell +isula run -tid --device-write /dev/sda:1mb busybox sh +``` + +To limit the write speed, run the following command: + +```shell +isula run -tid read-bps /dev/sda:1mb busybox sh +``` + +## Restricting the Rootfs Storage Space of a Container + +### Description + +When the overlay2 storage driver is used on the EXT4 file system, the file system quota of a single container can be set. For example, the quota of container A is set to 5 GB, and the quota of container B is set to 10 GB. + +This feature is implemented by the project quota function of the EXT4 file system. If the kernel supports this function, use the syscall SYS\_IOCTL to set the project ID of a directory, and then use the syscall SYS\_QUOTACTL to set the hard limit and soft limit of the corresponding project ID. + +### Usage + +1. Prepare the environment. + + Ensure that the file system supports the **Project ID** and **Project Quota** attributes, the kernel version is 4.19 or later, and the version of the peripheral package e2fsprogs is 1.43.4-2 or later. + +2. Before mounting overlayfs to a container, set different project IDs for the upper and work directories of different containers and set inheritance options. After overlayfs is mounted to a container, the project IDs and inherited attributes cannot be modified. +3. Set the quota as a privileged user outside the container. +4. Add the following configuration to daemon: + + ```shell + -s overlay2 --storage-opt overlay2.override_kernel_check=true + ``` + +5. Daemon supports the following options for setting default restrictions for containers: + + **--storage-opt overlay2.basesize=128M** specifies the default limit. If **--storeage-opt size** is also specified when you run the **isula run** command, the value of this parameter takes effect. If no size is specified during the daemon process or when you run the **isula run** command, the size is not limited. + +6. Enable the **Project ID** and **Project Quota** attributes of the file system. + - Format and mount the file system. + + ```shell + # mkfs.ext4 -O quota,project /dev/sdb + # mount -o prjquota /dev/sdb /var/lib/isulad + ``` + +### Parameters + +When running the **create/run** command, set **--storage-opt**. + + + + + + + + + + + + + + +

Parameter

+

Description

+

Value Range

+

Mandatory or Not

+

--storage-opt size=${rootfsSize}

+

Restricts the root file system (rootfs) storage space of the container.

+

The size parsed by rootfsSize is a positive 64-bit integer expressed in bytes. You can also set it to ([kKmMgGtTpP])?[iI]?[bB]?$.

+

No

+
+ +### Example + +In the **isula run/create** command, use the existing parameter **--storage-opt size=**_value_ to set the quota. The value is a positive number in the unit of **\[kKmMgGtTpP\]?\[iI\]?\[bB\]?**. If the value does not contain a unit, the default unit is byte. + +```shell +$ [root@localhost ~]# isula run -ti --storage-opt size=10M busybox +/ # df -h +Filesystem Size Used Available Use% Mounted on +overlay 10.0M 48.0K 10.0M 0% / +none 64.0M 0 64.0M 0% /dev +none 10.0M 0 10.0M 0% /sys/fs/cgroup +tmpfs 64.0M 0 64.0M 0% /dev +shm 64.0M 0 64.0M 0% /dev/shm +/dev/mapper/vg--data-ext41 + 9.8G 51.5M 9.2G 1% /etc/hostname +/dev/mapper/vg--data-ext41 + 9.8G 51.5M 9.2G 1% /etc/resolv.conf +/dev/mapper/vg--data-ext41 + 9.8G 51.5M 9.2G 1% /etc/hosts +tmpfs 3.9G 0 3.9G 0% /proc/acpi +tmpfs 64.0M 0 64.0M 0% /proc/kcore +tmpfs 64.0M 0 64.0M 0% /proc/keys +tmpfs 64.0M 0 64.0M 0% /proc/timer_list +tmpfs 64.0M 0 64.0M 0% /proc/sched_debug +tmpfs 3.9G 0 3.9G 0% /proc/scsi +tmpfs 64.0M 0 64.0M 0% /proc/fdthreshold +tmpfs 64.0M 0 64.0M 0% /proc/fdenable +tmpfs 3.9G 0 3.9G 0% /sys/firmware +/ # +/ # dd if=/dev/zero of=/home/img bs=1M count=12 && sync +dm-4: write failed, project block limit reached. +10+0 records in +9+0 records out +10432512 bytes (9.9MB) copied, 0.011782 seconds, 844.4MB/s +/ # df -h | grep overlay +overlay 10.0M 10.0M 0 100% / +/ # +``` + +### Constraints + +1. The quota applies only to the rw layer. + + The quota of overlay2 is for the rw layer of the container. The image size is not included. + +2. The kernel supports and enables this function. + + The kernel must support the EXT4 project quota function. When running **mkfs**, add **-O quota,project**. When mounting the file system, add **-o prjquota**. If any of the preceding conditions is not met, an error is reported when **--storage-opt size=**_value_ is used. + + ```shell + $ [root@localhost ~]# isula run -it --storage-opt size=10Mb busybox df -h + Error response from daemon: Failed to prepare rootfs with error: time="2019-04-09T05:13:52-04:00" level=fatal msg="error creating read- + write layer with ID "a4c0e55e82c55e4ee4b0f4ee07f80cc2261cf31b2c2dfd628fa1fb00db97270f": --storage-opt is supported only for overlay over + xfs or ext4 with 'pquota' mount option" + ``` + +3. Description of the limit of quota: + 1. If the quota is greater than the size of the partition where user **root** of iSulad is located, the file system quota displayed by running the **df** command in the container is the size of the partition where user **root** of iSulad is located, not the specified quota. + 2. **--storage-opt size=0** indicates that the size is not limited and the value cannot be less than 4096. The precision of size is one byte. If the specified precision contains decimal bytes, the decimal part is ignored. For example, if size is set to **0.1**, the size is not limited. \(The value is restricted by the precision of the floating point number stored on the computer. That is, 0.999999999999999999999999999 is equal to 1. The number of digits 9 may vary according to computers. Therefore, 4095.999999999999999999999999999 is equal to 4096.\) Note that running **isula inspect** displays the original command line specified format. If the value contains decimal bytes, you need to ignore the decimal part. + 3. If the quota is too small, for example,**--storage-opt size=4k**, the container may fail to be started because some files need to be created for starting the container. + 4. The **-o prjquota** option is added to the root partition of iSulad when iSulad is started last time. If this option is not added during this startup, the setting of the container with quota created during the last startup does not take effect. + 5. The value range of the daemon quota **--storage-opt overlay2.basesize** is the same as that of **--storage-opt size**. + +4. When **storage-opt** is set to 4 KB, the lightweight container startup is different from that of Docker. + + Use the **storage-opt size=4k** and image **rnd-dockerhub.huawei.com/official/ubuntu-arm64:latest** to run the container. + + Docker fails to be started. + + ```shell + [root@localhost ~]# docker run -itd --storage-opt size=4k rnd-dockerhub.huawei.com/official/ubuntu-arm64:latest + docker: Error response from daemon: symlink /proc/mounts /var/lib/docker/overlay2/e6e12701db1a488636c881b44109a807e187b8db51a50015db34a131294fcf70-init/merged/etc/mtab: disk quota exceeded. + See 'docker run --help'. + ``` + + The lightweight container is started properly and no error is reported. + + ```shell + [root@localhost ~]# isula run -itd --storage-opt size=4k rnd-dockerhub.huawei.com/official/ubuntu-arm64:latest + 636480b1fc2cf8ac895f46e77d86439fe2b359a1ff78486ae81c18d089bbd728 + [root@localhost ~]# isula ps + STATUS PID IMAGE COMMAND EXIT_CODE RESTART_COUNT STARTAT FINISHAT RUNTIME ID NAMES + running 17609 rnd-dockerhub.huawei.com/official/ubuntu-arm64:latest /bin/bash 0 0 2 seconds ago - lcr 636480b1fc2c 636480b1fc2cf8ac895f46e77d86439fe2b359a1ff78486ae81c18d089bbd728 + ``` + + During container startup, if you need to create a file in the **rootfs** directory of the container, the image size exceeds 4 KB, and the quota is set to 4 KB, the file creation will fail. + + When Docker starts the container, it creates more mount points than iSulad to mount some directories on the host to the container, such as **/proc/mounts** and **/dev/shm**. If these files do not exist in the image, the creation will fail, therefore, the container fails to be started. + + When a lightweight container uses the default configuration during container startup, there are few mount points. The lightweight container is created only when the directory like **/proc** or **/sys** does not exist. The image **rnd-dockerhub.huawei.com/official/ubuntu-arm64:latest** in the test case contains **/proc** and **/sys**. Therefore, no new file or directory is generated during the container startup. As a result, no error is reported during the lightweight container startup. To verify this process, when the image is replaced with **rnd-dockerhub.huawei.com/official/busybox-aarch64:latest**, an error is reported when the lightweight container is started because **/proc** does not exist in the image. + + ```shell + [root@localhost ~]# isula run -itd --storage-opt size=4k rnd-dockerhub.huawei.com/official/busybox-aarch64:latest + 8e893ab483310350b8caa3b29eca7cd3c94eae55b48bfc82b350b30b17a0aaf4 + Error response from daemon: Start container error: runtime error: 8e893ab483310350b8caa3b29eca7cd3c94eae55b48bfc82b350b30b17a0aaf4:tools/lxc_start.c:main:404 starting container process caused "Failed to setup lxc, + please check the config file." + ``` + +5. Other description: + + When using iSulad with the quota function to switch data drives, ensure that the data drives to be switched are mounted using the **prjquota** option and the mounting mode of the **/var/lib/isulad/storage/overlay2** directory is the same as that of the **/var/lib/isulad** directory. + + > [!NOTE]NOTE + > Before switching the data drive, ensure that the mount point of **/var/lib/isulad/storage/overlay2** is unmounted. + +## Restricting the Number of File Handles in a Container + +### Description + +You can set parameters to limit the number of file handles that can be opened in a container. + +### Usage + +When running the **isula create/run** command, set the **--files-limit** parameter to limit the number of file handles that can be opened in a container. + +### Parameters + +Set the **--files-limit** parameter when running the **isula create/run** command. + + + + + + + + + + + + + + +

Parameter

+

Description

+

Value Range

+

Mandatory or Not

+

--files-limit

+

Limits the number of file handles that can be opened in a container.

+

64-bit integer The value can be 0 or a negative number, but cannot be greater than 2 to the power of 63 minus 1. The value 0 or a negative number indicates no limit.

+

During container creation, some handles are opened temporarily. Therefore, the value cannot be too small. Otherwise, the container may not be restricted by the file limit. If the value is less than the number of opened handles, the cgroup file cannot be written. It is recommended that the value be greater than 30.

+

No

+
+ +### Example + +When running the container, add **--files-limit n**. For example: + +```shell +isula run -ti --files-limit 1024 busybox bash +``` + +### Constraints + +1. If the **--files-limit** parameter is set to a small value, for example, 1, the container may fail to be started. + + ```shell + [root@localhost ~]# isula run -itd --files-limit 1 rnd-dockerhub.huawei.com/official/busybox-aarch64 + 004858d9f9ef429b624f3d20f8ba12acfbc8a15bb121c4036de4e5745932eff4 + Error response from daemon: Start container error: Container is not running:004858d9f9ef429b624f3d20f8ba12acfbc8a15bb121c4036de4e5745932eff4 + ``` + + Docker will be started successfully, and the value of **files.limit cgroup** is **max**. + + ```shell + [root@localhost ~]# docker run -itd --files-limit 1 rnd-dockerhub.huawei.com/official/busybox-aarch64 + ef9694bf4d8e803a1c7de5c17f5d829db409e41a530a245edc2e5367708dbbab + [root@localhost ~]# docker exec -it ef96 cat /sys/fs/cgroup/files/files.limit + max + ``` + + The root cause is that the startup principles of the lxc and runc processes are different. After the lxc process creates the cgroup, the files.limit value is set, and then the PID of the container process is written into the cgroup.procs file of the cgroup. At this time, the process has opened more than one handle. As a result, an error is reported, and the startup fails. After you create a cgroup by running the **runc** command, the PID of the container process is written to the cgroup.procs file of the cgroup, and then the files.limit value is set. Because more than one handle is opened by the process in the cgroup, the file.limit value does not take effect, the kernel does not report any error, and the container is started successfully. + +## Restricting the Number of Processes or Threads that Can Be Created in a Container + +### Description + +You can set parameters to limit the number of processes or threads that can be created in a container. + +### Usage + +When creating or running a container, use the **--pids-limit** parameter to limit the number of processes or threads that can be created in the container. + +### Parameters + +When running the **create/run** command, set the **--pids-limit** parameter. + + + + + + + + + + + + + + +

Parameter

+

Description

+

Value Range

+

Mandatory or Not

+

--pids-limit

+

Limits the number of file handles that can be opened in a container.

+

64-bit integer The value can be 0 or a negative number, but cannot be greater than 2 to the power of 63 minus 1. The value 0 or a negative number indicates no limit.

+

No

+
+ +### Example + +When running the container, add **--pids-limit n**. For example: + +```shell +isula run -ti --pids-limit 1024 busybox bash +``` + +### Constraints + +During container creation, some processes are created temporarily. Therefore, the value cannot be too small. Otherwise, the container may fail to be started. It is recommended that the value be greater than 10. + +## Configuring the ulimit Value in a Container + +### Description + +You can use parameters to control the resources for executed programs. + +### Usage + +Set the **--ulimit** parameter when creating or running a container, or configure the parameter on the daemon to control the resources for executed programs in the container. + +### Parameters + +Use either of the following methods to configure ulimit: + +1. When running the **isula create/run** command, use **--ulimit =\[:\]** to control the resources of the executed shell program. + + + + + + + + + + + + + + +

Parameter

+

Description

+

Value Range

+

Mandatory or Not

+

--ulimit

+

Limits the resources of the executed shell program.

+

64-bit integer The value of the soft limit must be less than or equal to that of the hard limit. If only the soft limit is specified, the value of the hard limit is equal to that of the soft limit. Some types of resources do not support negative numbers. For details, see the following table.

+

No

+
+ +2. Use daemon parameters or configuration files. + + For details, see **--default-ulimits** in [Configuration Mode](installation-configuration.md#configuration-mode). + + **--ulimit** can limit the following types of resources: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Type

+

Description

+

Value Range

+

core

+

limits the core file size (KB)

+

64-bit integer, without unit. The value can be 0 or a negative number. The value -1 indicates no limit. Other negative numbers are forcibly converted into a large positive integer.

+

cpu

+

max CPU time (MIN)

+

data

+

max data size (KB)

+

fsize

+

maximum filesize (KB)

+

locks

+

max number of file locks the user can hold

+

memlock

+

max locked-in-memory address space (KB)

+

msgqueue

+

max memory used by POSIX message queues (bytes)

+

nice

+

nice priority

+

nproc

+

max number of processes

+

rss

+

max resident set size (KB)

+

rtprio

+

max realtime priority

+

rttime

+

realtime timeout

+

sigpending

+

max number of pending signals

+

stack

+

max stack size (KB)

+

nofile

+

max number of open file descriptors

+

64-bit integer, without unit. The value cannot be negative. A negative number is forcibly converted to a large positive number. In addition, "Operation not permitted" is displayed during the setting.

+
+ +### Example + +When creating or running a container, add **--ulimit =\[:\]**. For example: + +```shell +isula create/run -tid --ulimit nofile=1024:2048 busybox sh +``` + +### Constraints + +The ulimit cannot be configured in the **daemon.json** and **/etc/sysconfig/iSulad** files \(or the iSulad command line\). Otherwise, an error is reported when iSulad is started. diff --git a/docs/en/docs/container_engine/isula_container_engine/cri.md b/docs/en/docs/container_engine/isula_container_engine/cri.md new file mode 100644 index 0000000000000000000000000000000000000000..49c57b164e02f211c4f9f62f7e14d3e80c87e143 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/cri.md @@ -0,0 +1,1271 @@ +# CRI API v1alpha2 + +## Description + +CRI API is the container runtime APIs provided by Kubernetes. CRI defines service interfaces for containers and images. iSulad uses CRI API to interconnect with Kubernetes. + +The lifecycle of a container is isolated from that of an image. Therefore, two services are required. CRI API is defined using [Protocol Buffers](https://developers.google.com/protocol-buffers/) and is based on [gRPC](https://grpc.io/). + +Currently, the default CRI API version used by iSulad is v1alpha2. The official API description file is as follows: + +[https://github.com/kubernetes/kubernetes/blob/release-1.14/pkg/kubelet/apis/cri/runtime/v1alpha2/api.proto](https://github.com/kubernetes/kubernetes/blob/release-1.14/pkg/kubelet/apis/cri/runtime/v1alpha2/api.proto), + +iSulad uses the API description file of version 1.14 used by Pass, which is slightly different from the official API. The interfaces in this document prevail. + +> [!NOTE]NOTE +> For the WebSocket streaming service of CRI API, the listening address of the server is 127.0.0.1, and the port number is 10350. The port number can be configured through the `--websocket-server-listening-port` command option or in the **daemon.json** configuration file. + +## Interfaces + +The following tables list the parameters that may be used by the interfaces. Some parameters cannot be configured. + +### Interface Parameters + +- **DNSConfig** + + Specifies the DNS servers and search domains of a sandbox. + + | Member | Description | + | ------------------------ | ------------------------------------------------------------------- | + | repeated string servers | List of DNS servers of the cluster | + | repeated string searches | List of DNS search domains of the cluster | + | repeated string options | List of DNS options. See . | + +- **Protocol** + + Enum values of the protocols. + + | Member | Description | + | ------- | ----------- | + | TCP = 0 | TCP | + | UDP = 1 | UDP | + +- **PortMapping** + + Specifies the port mapping configurations of a sandbox. + + | Member | Description | + | -------------------- | -------------------------------- | + | Protocol protocol | Protocol of the port mapping | + | int32 container_port | Port number within the container | + | int32 host_port | Port number on the host | + | string host_ip | Host IP address | + +- **MountPropagation** + + Enum values for mount propagation. + + | Member | Description | + | --------------------------------- | ------------------------------------------------------------------------------------------------------------ | + | PROPAGATION_PRIVATE = 0 | No mount propagation ("rprivate" in Linux) | + | PROPAGATION_HOST_TO_CONTAINER = 1 | Mounts get propagated from the host to the container ("rslave" in Linux) | + | PROPAGATION_BIDIRECTIONAL = 2 | Mounts get propagated from the host to the container and from the container to the host ("rshared" in Linux) | + +- **Mount** + + Specifies a host volume to mount into a container. (Only files and folders are supported.) + + | Member | Description | + | ---------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | + | string container_path | Path in the container | + | string host_path | Path on the host | + | bool readonly | Whether the configuration is read-only in the container. The default value is **false**. | + | bool selinux_relabel | Whether to set the SELinux label (not supported) | + | MountPropagation propagation | Mount propagation configuration. The value can be **0**, **1**, or **2**, corresponding to **rprivate**, **rslave**, or **rshared**. The default value is **0**. | + +- **NamespaceOption** + + | Member | Description | + | ----------------- | ------------------------------------------------ | + | bool host_network | Whether to use the network namespace of the host | + | bool host_pid | Whether to use the PID namespace of the host | + | bool host_ipc | Whether to use the IPC namespace of the host | + +- **Capability** + + Contains information about the capabilities to add or drop. + + | Member | Description | + | --------------------------------- | -------------------- | + | repeated string add_capabilities | Capabilities to add | + | repeated string drop_capabilities | Capabilities to drop | + +- **Int64Value** + + Wrapper of the int64 type. + + | Member | Description | + | ----------- | ------------------ | + | int64 value | Actual int64 value | + +- **UInt64Value** + + Wrapper of the uint64 type. + + | Member | Description | + | ------------ | ------------------- | + | uint64 value | Actual uint64 value | + +- **LinuxSandboxSecurityContext** + + Specifies Linux security options for a sandbox. + + Note that these security options are not applied to containers in the sandbox and may not be applicable to a sandbox without any running process. + + | Member | Description | + | -------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | + | NamespaceOption namespace_options | Options for namespaces of the sandbox | + | SELinuxOption selinux_options | SELinux options (not supported) | + | Int64Value run_as_user | UID to run sandbox processes | + | bool readonly_rootfs | Whether the root file system of the sandbox is read-only | + | repeated int64 supplemental_groups | User group information of process 1 in the sandbox besides the primary group | + | bool privileged | Whether the sandbox can run a privileged container | + | string seccomp_profile_path | Path of the seccomp configuration file. Valid values are:
// **unconfined**: seccomp is not used.
// **localhost/***\*: path of the configuration file installed in the system.
// *\*:Full path of the configuration file.
//By default, this parameter is not set, which is identical to **unconfined**.| + +- **LinuxPodSandboxConfig** + + Sets configurations related to Linux hosts and containers. + + | Member | Description | + | -------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | + | string cgroup_parent | Parent cgroup path of the sandbox. The runtime can convert it to the cgroupfs or systemd semantics as required. (Not configurable) | + | LinuxSandboxSecurityContext security_context | Security attributes of the sandbox | + | map\ sysctls | Linux sysctls configurations of the sandbox | + +- **PodSandboxMetadata** + + Stores all necessary information for building the sandbox name. The container runtime is encouraged to expose the metadata in its user interface for better user experience. For example, the runtime can construct a unique sandbox name based on the metadata. + + | Member | Description | + | ---------------- | --------------------------------------------------------------------- | + | string name | Sandbox name | + | string uid | Sandbox UID | + | string namespace | Sandbox namespace | + | uint32 attempt | Number of attempts to create the sandbox. The default value is **0**. | + +- **PodSandboxConfig** + + Contains all the required and optional fields for creating a sandbox. + + | Member | Description | + | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | + | PodSandboxMetadata metadata | Metadata of the sandbox. This information uniquely identifies the sandbox, and the runtime should leverage this to ensure correct operation. The runtime may also use this information to improve user experience, such as by constructing a readable sandbox name.| + | string hostname | Host name of the sandbox | + | string log_directory | Directory for storing log files of containers in the sandbox | + | DNSConfig dns_config | DNS configuration of the sandbox | + | repeated PortMapping port_mappings | Port mappings of the sandbox | + | map\ labels | Key-value pairs that may be used to identify a single sandbox or a series of sandboxes | + | map\ annotations | Key-value pair holding arbitrary data. The value cannot be modified and can be queried by using **PodSandboxStatus**. | + | LinuxPodSandboxConfig linux | Options related to the linux host | + +- **PodSandboxNetworkStatus** + + Describes the network status of the sandbox. + + | Member | Description | + | -------------- | -------------------------------------------- | + | string ip | IP address of the sandbox | + | string name | Name of the network interface in the sandbox | + | string network | Name of the additional network | + +- **Namespace** + + Stores namespace options. + + | Member | Description | + | ----------------------- | ----------------------- | + | NamespaceOption options | Linux namespace options | + +- **LinuxPodSandboxStatus** + + Describes the status of the Linux sandbox. + + | Member | Description | + | ----------------------- | ----------------- | + | Namespace**namespaces** | Sandbox namespace | + +- **PodSandboxState** + + Enum values for sandbox states. + + | Member | Description | + | -------------------- | ------------------------------ | + | SANDBOX_READY = 0 | Ready state of the sandbox | + | SANDBOX_NOTREADY = 1 | Non-ready state of the sandbox | + +- **PodSandboxStatus** + + Describes the podsandbox status. + + | Member | Description | + | ----------------------------------------- | -------------------------------------------------------------------------------------- | + | string id | Sandbox ID | + | PodSandboxMetadata metadata | Sandbox metadata | + | PodSandboxState state | Sandbox state | + | int64 created_at | Creation timestamps of the sandbox in nanoseconds | + | repeated PodSandboxNetworkStatus networks | Multi-plane network status of the sandbox | + | LinuxPodSandboxStatus linux | Status specific to Linux sandboxes | + | map\ labels | Key-value pairs that may be used to identify a single sandbox or a series of sandboxes | + | map\ annotations | Key-value pair holding arbitrary data. The value cannot be modified by the runtime. | + +- **PodSandboxStateValue** + + Wrapper of **PodSandboxState**. + + | Member | Description | + | --------------------- | ------------- | + | PodSandboxState state | Sandbox state | + +- **PodSandboxFilter** + + Filtering conditions when listing sandboxes. The intersection of multiple conditions is displayed. + + | Member | Description | + | ----------------------------------- | ------------------------------------------------------------------------------------ | + | string id | Sandbox ID | + | PodSandboxStateValue state | Sandbox state | + | map\ label_selector | Sandbox labels. Only full match is supported. Regular expressions are not supported. | + +- **PodSandbox** + + Minimal data that describes a sandbox. + + | Member | Description | + | -------------------------------- | -------------------------------------------------------------------------------------- | + | string id | Sandbox ID | + | PodSandboxMetadata metadata | Sandbox metadata | + | PodSandboxState state | Sandbox state | + | int64 created_at | Creation timestamps of the sandbox in nanoseconds | + | map\ labels | Key-value pairs that may be used to identify a single sandbox or a series of sandboxes | + | map\ annotations | Key-value pair holding arbitrary data. The value cannot be modified by the runtime | + +- **KeyValue** + + Wrapper of a key-value pair. + + | Member | Description | + | ------------ | ----------- | + | string key | Key | + | string value | Value | + +- **SELinuxOption** + + SELinux labels to be applied to the container. + + | Member | Description | + | ------------ | ----------- | + | string user | User | + | string role | Role | + | string type | Type | + | string level | Level | + +- **ContainerMetadata** + + ContainerMetadata contains all necessary information for building the container name. The container runtime is encouraged to expose the metadata in its user interface for better user experience. For example, the runtime can construct a unique container name based on the metadata. + + | Member | Description | + | -------------- | ----------------------------------------------------------------------- | + | string name | Name of a container | + | uint32 attempt | Number of attempts to create the container. The default value is **0**. | + +- **ContainerState** + + Enum values for container states. + + | Member | Description | + | --------------------- | ---------------------------------- | + | CONTAINER_CREATED = 0 | The container is created | + | CONTAINER_RUNNING = 1 | The container is running | + | CONTAINER_EXITED = 2 | The container is in the exit state | + | CONTAINER_UNKNOWN = 3 | The container state is unknown | + +- **ContainerStateValue** + + Wrapper of ContainerState. + + | Member | Description | + | -------------------- | --------------------- | + | ContainerState state | Container state value | + +- **ContainerFilter** + + Filtering conditions when listing containers. The intersection of multiple conditions is displayed. + + | Member | Description | + | ----------------------------------- | -------------------------------------------------------------------------------------- | + | string id | Container ID | + | PodSandboxStateValue state | Container state | + | string pod_sandbox_id | Sandbox ID | + | map\ label_selector | Container labels. Only full match is supported. Regular expressions are not supported. | + +- **LinuxContainerSecurityContext** + + Security configuration that will be applied to a container. + + | Member | Description | + | --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | + | Capability capabilities | Capabilities to add or drop | + | bool privileged | Whether the container is in privileged mode. The default value is **false**. | + | NamespaceOption namespace_options | Namespace options of the container | + | SELinuxOption selinux_options | SELinux context to be optionally applied (**not supported currently**) | + | Int64Value run_as_user | UID to run container processes. Only one of **run_as_user** and **run_as_username** can be specified at a time. **run_as_username** takes effect preferentially. | + | string run_as_username | User name to run container processes. If specified, the user must exist in the container image (that is, in **/etc/passwd** inside the image) and be resolved there by the runtime. Otherwise, the runtime must throw an error.| + | bool readonly_rootfs | Whether the root file system in the container is read-only. The default value is configured in **config.json**. | + | repeated int64 supplemental_groups | List of groups of the first process in the container besides the primary group | + | string apparmor_profile | AppArmor configuration file for the container (**not supported currently**) | + | string seccomp_profile_path | Seccomp configuration file for the container | + | bool no_new_privs | Whether to set the **no_new_privs** flag on the container | + +- **LinuxContainerResources** + + Resource specification for the Linux container. + + | Member | Description | + | --------------------------- | ----------------------------------------------------------------------------- | + | int64 cpu_period | CPU Completely Fair Scheduler (CFS) period. The default value is **0**. | + | int64 cpu_quota | CPU CFS quota. The default value is **0**. | + | int64 cpu_shares | CPU shares (weight relative to other containers). The default value is **0**. | + | int64 memory_limit_in_bytes | Memory limit, in bytes. The default value is **0**. | + | int64 oom_score_adj | oom-killer score. The default value is **0**. | + | string cpuset_cpus | CPU cores to be used by the container. The default value is **""**. | + | string cpuset_mems | Memory nodes to be used by the container. The default value is **""**. | + +- **Image** + + Basic information about a container image. + + | Member | Description | + | ---------------------------- | ------------------------------ | + | string id | Image ID | + | repeated string repo_tags | Image tag name (**repo_tags**) | + | repeated string repo_digests | Image digest information | + | uint64 size | Image size | + | Int64Value uid | UID of the default image user | + | string username | Name of the default image user | + +- **ImageSpec** + + Internal data structure that represents an image. Currently, **ImageSpec** wraps only the container image name. + + | Member | Description | + | ------------ | -------------------- | + | string image | Container image name | + +- **StorageIdentifier** + + Unique identifier of a storage device. + + | Member | Description | + | ----------- | ------------------ | + | string uuid | UUID of the device | + +- **FilesystemUsage** + + | Member | Description | + | ---------------------------- | ------------------------------------------------ | + | int64 timestamp | Timestamp at which the information was collected | + | StorageIdentifier storage_id | UUID of the file system that stores the image | + | UInt64Value used_bytes | Space size used for storing image metadata | + | UInt64Value inodes_used | Number of inodes for storing image metadata | + +- **AuthConfig** + + | Member | Description | + | --------------------- | --------------------------------------------------------------------------------------------- | + | string username | User name used for downloading images | + | string password | Password used for downloading images | + | string auth | Base64-encoded authentication information used for downloading images | + | string server_address | Address of the server for downloaded images (not supported currently) | + | string identity_token | Token information used for authentication with the image repository (not supported currently) | + | string registry_token | Token information used for interaction with the image repository (not supported currently) | + +- **Container** + + Container description information, such as the ID and state. + + | Member | Description | + | -------------------------------- | ----------------------------------------------------------------------------------------- | + | string id | Container ID | + | string pod_sandbox_id | ID of the sandbox to which the container belongs | + | ContainerMetadata metadata | Container metadata | + | ImageSpec image | Image specifications | + | string image_ref | Reference to the image used by the container. For most runtimes, this is an image ID. | + | ContainerState state | Container state | + | int64 created_at | Creation timestamps of the container in nanoseconds | + | map\ labels | Key-value pairs that may be used to identify a single container or a series of containers | + | map\ annotations | Key-value pair holding arbitrary data. The value cannot be modified by the runtime | + +- **ContainerStatus** + + Container status information. + + | Member | Description | + | -------------------------------- | -------------------------------------------------------------------------------------------------------- | + | string id | Container ID | + | ContainerMetadata metadata | Container metadata | + | ContainerState state | Container state | + | int64 created_at | Creation timestamps of the container in nanoseconds | + | int64 started_at | Startup timestamps of the container in nanoseconds | + | int64 finished_at | Exit timestamps of the container in nanoseconds | + | int32 exit_code | Container exit code | + | ImageSpec image | Image specifications | + | string image_ref | Reference to the image used by the container. For most runtimes, this is an image ID. | + | string reason | Brief explanation of why the container is in its current state | + | string message | Human-readable message explaining why the container is in its current state | + | map\ labels | Key-value pairs that may be used to identify a single container or a series of containers | + | map\ annotations | Key-value pair holding arbitrary data. The value cannot be modified by the runtime. | + | repeated Mount mounts | Container mount point information | + | string log_path | Container log file path. The file is in the **log_directory** folder configured in **PodSandboxConfig**. | + +- **ContainerStatsFilter** + + Filtering conditions when listing container states. The intersection of multiple conditions is displayed. + + | Member | Description | + | ----------------------------------- | -------------------------------------------------------------------------------------- | + | string id | Container ID | + | string pod_sandbox_id | Sandbox ID | + | map\ label_selector | Container labels. Only full match is supported. Regular expressions are not supported. | + +- **ContainerStats** + + Filtering conditions when listing container states. The intersection of multiple conditions is displayed. + + | Member | Description | + | ------------------------------ | --------------------------- | + | ContainerAttributes attributes | Container Information | + | CpuUsage cpu | CPU usage | + | MemoryUsage memory | Memory usage | + | FilesystemUsage writable_layer | Usage of the writable layer | + +- **ContainerAttributes** + + Basic information about the container. + + | Member | Description | + | ------------------------------- | ----------------------------------------------------------------------------------------- | + | string id | Container ID | + | ContainerMetadata metadata | Container metadata | + | map\ labels | Key-value pairs that may be used to identify a single container or a series of containers | + | map\ annotations | Key-value pair holding arbitrary data. The value cannot be modified by the runtime. | + +- **CpuUsage** + + Container CPU usage. + + | Member | Description | + | ----------------------------------- | ---------------------------------- | + | int64 timestamp | Timestamp | + | UInt64Value usage_core_nano_seconds | CPU usage duration, in nanoseconds | + +- **MemoryUsage** + + Container memory usage. + + | Member | Description | + | ----------------------------- | ------------ | + | int64 timestamp | Timestamp | + | UInt64Value working_set_bytes | Memory usage | + +- **FilesystemUsage** + + Usage of the writable layer of the container. + + | Member | Description | + | ---------------------------- | ------------------------------------------------------------ | + | int64 timestamp | Timestamp | + | StorageIdentifier storage_id | Writable layer directory | + | UInt64Value used_bytes | Number of bytes occupied by the image at the writable layer | + | UInt64Value inodes_used | Number of inodes occupied by the image at the writable layer | + +- **Device** + + Host volume to mount into a container. + + | Member | Description | + | -------------------- | --------------------------------------------------------------------------------------------------------- | + | string container_path | Mount path within the container | + | string host_path | Mount path on the host | + | string permissions | cgroup permissions of the device (**r** allows the container to read from the specified device; **w** allows the container to write to the specified device; **m** allows the container to create device files that do not yet exist).| + +- **LinuxContainerConfig** + + Configuration specific to Linux containers. + + | Member | Description | + | ---------------------------------------------- | -------------------------------------- | + | LinuxContainerResources resources | Container resource specifications | + | LinuxContainerSecurityContext security_context | Linux container security configuration | + +- **ContainerConfig** + + Required and optional fields for creating a container. + + | Member | Description | + | ------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | + | ContainerMetadata metadata | Container metadata. This information uniquely identifies the container, and the runtime should leverage this to ensure correct operation. The runtime may also use this information to improve user experience, such as by constructing a readable container name. (**Required**) | + | ImageSpec image | Image used by the container. (Required) | + | repeated string command | Command to be executed. The default value is **"/bin/sh"**. | + | repeated string args | Arguments of the command to be executed | + | string working_dir | Current working directory of the command to be executed | + | repeated KeyValue envs | Environment variables to set in the container | + | repeated Mount mounts | Mount points in the container | + | repeated Device devices | Devices to be mapped in the container | + | mapstring, labels | Key-value pairs that may be used to index and select individual resources | + | mapstring, annotations | Unstructured key-value map that may be used to store and retrieve arbitrary metadata | + | string log_path | Path relative to **PodSandboxConfig.LogDirectory** for container to store the logs (STDOUT and STDERR) on the host | + | bool stdin | Whether to enable STDIN of the container | + | bool stdin_once | Whether to immediately disconnect all data streams connected to STDIN when a data stream connected to stdin is disconnected (**not supported currently**) | + | bool tty | Whether to use a pseudo terminal to connect to STDIO of the container | + | LinuxContainerConfig linux | Configuration specific to Linux containers | + +- **NetworkConfig** + + Runtime network configuration. + + | Member | Description | + | --------------- | ------------------------- | + | string pod_cidr | CIDR for pod IP addresses | + +- **RuntimeConfig** + + Runtime network configuration. + + | Member | Description | + | ---------------------------- | ----------------------------- | + | NetworkConfig network_config | Runtime network configuration | + +- **RuntimeCondition** + + Runtime condition information. + + | Member | Description | + | -------------- | ----------------------------------------------------------------------------- | + | string type | Runtime condition type | + | bool status | Runtime status | + | string reason | Brief description of the reason for the runtime condition change | + | string message | Human-readable message describing the reason for the runtime condition change | + +- **RuntimeStatus** + + Runtime status. + + | Member | Description | + | ------------------------------------ | -------------------------- | + | repeated RuntimeCondition conditions | Current runtime conditions | + +### Runtime Service + +The runtime service contains interfaces for operating pods and containers, and interfaces for querying the configuration and status of the runtime service. + +#### RunPodSandbox + +#### Interface Prototype + +```protobuf +rpc RunPodSandbox(RunPodSandboxRequest) returns (RunPodSandboxResponse) {} +``` + +#### Interface Description + +Creates and starts a pod sandbox. The sandbox is in the ready state on success. + +#### Precautions + +1. The default image for starting the sandbox is **rnd-dockerhub.huawei.com/library/pause-$\{machine\}:3.0**, where **$\{machine\}** indicates the architecture. On x86\_64, the value of **machine** is **amd64**, on ARM64, the value of **machine** is **aarch64**. Currently, only the **amd64** and **aarch64** images can be downloaded from the rnd-dockerhub repository. If the images do not exist on the host, ensure that the host can download them from the rnd-dockerhub repository. +2. The container names use the field in **PodSandboxMetadata** and are separated by underscores (\_). Therefore, the data in metadata cannot contain underscores. Otherwise, the sandbox runs successfully, but the **ListPodSandbox** interface cannot query the sandbox. + +#### Parameter + +| Member | Description | +| ----------------------- | -------------------------------------------------------------------------------------- | +| PodSandboxConfig config | Sandbox configuration | +| string runtime_handler | Runtime to use for the sandbox. Currently, **lcr** and **kata-runtime** are supported. | + +#### Returns + +| Return | Description | +| --------------------- | --------------------------------------- | +| string pod_sandbox_id | The response data is return on success. | + +#### StopPodSandbox + +#### Interface Prototype + +```protobuf +rpc StopPodSandbox(StopPodSandboxRequest) returns (StopPodSandboxResponse) {} +``` + +#### Interface Description + +Stops the pod sandbox, stops the sandbox container, and reclaims the network resources (such as IP addresses) allocated to the sandbox. If any running container belongs to the sandbox, the container must be forcibly terminated. + +#### Parameter + +| Member | Description | +| --------------------- | ----------- | +| string pod_sandbox_id | Sandbox ID | + +#### Returns + +| Return | Description | +| ------ | ----------- | +| None | None | + +#### RemovePodSandbox + +#### Interface Prototype + +```text +rpc RemovePodSandbox(RemovePodSandboxRequest) returns (RemovePodSandboxResponse) {} +``` + +#### Interface Description + +Removes a sandbox. If there are any running containers in the sandbox, they must be forcibly terminated and removed. This interface must not return an error if the sandbox has already been removed. + +#### Precautions + +1. When a sandbox is deleted, the network resources of the sandbox are not deleted. Before deleting the pod, you must call **StopPodSandbox** to remove the network resources. Ensure that **StopPodSandbox** is called at least once before deleting the sandbox. +2. If the container in a sandbox fails to be deleted when the sandbox is deleted, the sandbox is deleted but the container remains. In this case, you need to manually delete the residual container. + +#### Parameter + +| Member | Description | +| --------------------- | ----------- | +| string pod_sandbox_id | Sandbox ID | + +#### Returns + +| Return | Description | +| ------ | ----------- | +| None | None | + +#### PodSandboxStatus + +#### Interface Prototype + +```text +rpc PodSandboxStatus(PodSandboxStatusRequest) returns (PodSandboxStatusResponse) {} +``` + +#### Interface Description + +Queries the status of the sandbox. If the sandbox does not exist, this interface returns an error. + +#### Parameter + +| Member | Description | +| --------------------- | ---------------------------------------------------------------------------------- | +| string pod_sandbox_id | Sandbox ID | +| bool verbose | Whether to return extra information about the sandbox (not configurable currently) | + +#### Returns + +| Return | Description | +| ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------- | +| PodSandboxStatus status | Sandbox status information | +| map\ info | Extra information of the sandbox. The **key** can be an arbitrary string, and **value** is in JSON format. **info** can include anything debug information. When **verbose** is set to **true**, **info** cannot be empty (not configurable currently). | + +#### ListPodSandbox + +#### Interface Prototype + +```text +rpc ListPodSandbox(ListPodSandboxRequest) returns (ListPodSandboxResponse) {} +``` + +#### Interface Description + +Returns sandbox information. Conditional filtering is supported. + +#### Parameter + +| Member | Description | +| ----------------------- | -------------------------------- | +| PodSandboxFilter filter | Conditional filtering parameters | + +#### Returns + +| Return | Description | +| ------------------------- | ----------- | +| repeated PodSandbox items | Sandboxes | + +#### CreateContainer + +#### Interface Prototype + +```text +rpc CreateContainer(CreateContainerRequest) returns (CreateContainerResponse) {} +``` + +#### Interface Description + +Creates a container in a PodSandbox. + +#### Precautions + +- **sandbox\_config** in **CreateContainerRequest** is the same as the configuration passed to **RunPodSandboxRequest** to create the PodSandbox. It is passed again for reference. **PodSandboxConfig** is immutable and remains unchanged throughout the lifecycle of a pod. +- The container names use the field in **ContainerMetadata** and are separated by underscores (\_). Therefore, the data in metadata cannot contain underscores. Otherwise, the container runs successfully, but the **ListContainers** interface cannot query the container. +- **CreateContainerRequest** does not contain the **runtime\_handler** field. The runtime type of the created container is the same as that of the corresponding sandbox. + +#### Parameter + +| Member | Description | +| ------------------------------- | --------------------------------------------------------- | +| string pod_sandbox_id | ID of the PodSandbox where the container is to be created | +| ContainerConfig config | Container configuration information | +| PodSandboxConfig sandbox_config | PodSandbox configuration information | + +#### Supplementary Information + +Unstructured key-value map that may be used to store and retrieve arbitrary metadata. Some fields can be transferred through this field because CRI does not provide specific parameters. + +- Customization + + | Custom Key:Value | Description | + | ----------------------- | ---------------------------------------------------------------------------------- | + | cgroup.pids.max:int64_t | Limits the number of processes/threads in a container. (Set **-1** for unlimited.) | + +#### Returns + +| Return | Description | +| ------------------- | --------------------------- | +| string container_id | ID of the created container | + +#### StartContainer + +#### Interface Prototype + +```text +rpc StartContainer(StartContainerRequest) returns (StartContainerResponse) {} +``` + +#### Interface Description + +Starts a container. + +#### Parameter + +| Member | Description | +| ------------------- | ------------ | +| string container_id | Container ID | + +#### Returns + +| Return | Description | +| ------ | ----------- | +| None | None | + +#### StopContainer + +#### Interface Prototype + +```text +rpc StopContainer(StopContainerRequest) returns (StopContainerResponse) {} +``` + +#### Interface Description + +Stops a running container. The graceful stop timeout can be configured. If the container has been stopped, no error can be returned. + +#### Parameter + +| Member | Description | +| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | +| string container_id | Container ID | +| int64 timeout | Waiting time before a container is forcibly stopped. The default value is **0**, indicating that the container is forcibly stopped immediately. | + +#### Returns + +None + +#### RemoveContainer + +#### Interface Prototype + +```text +rpc RemoveContainer(RemoveContainerRequest) returns (RemoveContainerResponse) {} +``` + +#### Interface Description + +Deletes a container. If the container is running, it must be forcibly stopped. If the container has been deleted, no error can be returned. + +#### Parameter + +| Member | Description| +| --------------- --- | ------------- | +| string container_id | Container ID | + +#### Returns + +None + +#### ListContainers + +#### Interface Prototype + +```text +rpc ListContainers(ListContainersRequest) returns (ListContainersResponse) {} +``` + +#### Interface Description + +Returns container information. Conditional filtering is supported. + +#### Parameter + +| Member | Description | +| ---------------------- | -------------------------------- | +| ContainerFilter filter | Conditional filtering parameters | + +#### Returns + +| Return | Description | +| ----------------------------- | ----------- | +| repeated Container containers | Containers | + +#### ContainerStatus + +#### Interface Prototype + +```text +rpc ContainerStatus(ContainerStatusRequest) returns (ContainerStatusResponse) {} +``` + +#### Interface Description + +Returns container status information. If the container does not exist, an error is returned. + +#### Parameter + +| Member | Description | +| ------------------- | ---------------------------------------------------------------------------------------- | +| string container_id | Container ID | +| bool verbose | Whether to display additional information about the sandbox (not configurable currently) | + +#### Returns + +| Return | Description | +| ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------- | +| ContainerStatus status | Container status information | +| map\ info | Extra information of the sandbox. The **key** can be an arbitrary string, and **value** is in JSON format. **info** can include anything debug information. When **verbose** is set to **true**, **info** cannot be empty (not configurable currently).| + +#### UpdateContainerResources + +#### Interface Prototype + +```text +rpc UpdateContainerResources(UpdateContainerResourcesRequest) returns (UpdateContainerResourcesResponse) {} +``` + +#### Interface Description + +Updates container resource configurations. + +#### Precautions + +- This interface is used exclusively to update the resource configuration of a container, not a pod. +- Currently, the **oom\_score\_adj** configuration of containers cannot be updated. + +#### Parameter + +| Member | Description | +| ----------------------------- | ---------------------------------------- | +| string container_id | Container ID | +| LinuxContainerResources linux | Linux resource configuration information | + +#### Returns + +None + +#### ExecSync + +#### Interface Prototype + +```text +rpc ExecSync(ExecSyncRequest) returns (ExecSyncResponse) {} +``` + +#### Interface Description + +Runs a command synchronously in a container and communicates using gRPC. + +#### Precautions + +This interface runs a single command and cannot open a terminal to interact with the container. + +#### Parameter + +| Member | Description | +| ------------------ | ------------------------------------------------------------------ | +| string container_id | Container ID | +| repeated string cmd | Command to be executed | +| int64 timeout | Timeout interval before a command to be stopped is forcibly terminated, in seconds. The default value is **0**, indicating that there is no timeout limit (**not supported currently**).| + +#### Returns + +| Return | Description | +| --------------- | ------------------------------------------------------------------------------------ | +| bytes stdout | Captures the standard output of the command | +| bytes stderr | Captures the standard error output of the command | +| int32 exit_code | Exit code the command finished with. The default value is **0**, indicating success. | + +#### Exec + +#### Interface Prototype + +```text +rpc Exec(ExecRequest) returns (ExecResponse) {} +``` + +#### Interface Description + +Runs a command in the container, obtains the URL from the CRI server using gRPC, and establishes a persistent connection with the WebSocket server based on the obtained URL to interact with the container. + +#### Precautions + +This interface runs a single command and can open a terminal to interact with the container. One of **stdin**, **stdout**, or **stderr** must be true. If **tty** is true, **stderr** must be false. Multiplexing is not supported. In that case, the outputs of **stdout** and **stderr** are combined into a single stream. + +#### Parameter + +| Member | Description | +| ------------------- | --------------------------------------- | +| string container_id | Container ID | +| repeated string cmd | Command to be executed | +| bool tty | Whether to run the command in a TTY | +| bool stdin | Whether to stream standard input | +| bool stdout | Whether to stream standard output | +| bool stderr | Whether to stream standard error output | + +#### Returns + +| Return | Description | +| ---------- | ------------------------------------------------ | +| string url | Fully qualified URL of the exec streaming server | +| | | + +#### Attach + +#### Interface Prototype + +```text +rpc Attach(AttachRequest) returns (AttachResponse) {} +``` + +#### Interface Description + +Takes over process 1 of the container, obtains the URL from the CRI server using gRPC, and establishes a persistent connection with the WebSocket server based on the obtained URL to interact with the container. + +#### Parameter + +| Member | Description | +| ------------------- | --------------------------------------- | +| string container_id | Container ID | +| bool tty | Whether to run the command in a TTY | +| bool stdin | Whether to stream standard input | +| bool stdout | Whether to stream standard output | +| bool stderr | Whether to stream standard error output | + +#### Returns + +| Return | Description | +| ---------- | -------------------------------------------------- | +| string url | Fully qualified URL of the attach streaming server | + +#### ContainerStats + +#### Interface Prototype + +```text +rpc ContainerStats(ContainerStatsRequest) returns (ContainerStatsResponse) {} +``` + +#### Interface Description + +Returns information about the resources occupied by a single container. Only containers whose runtime type is lcr are supported. + +#### Parameter + +| Member | Description | +| ------------------- | ------------ | +| string container_id | Container ID | + +#### Returns + +| Return | Description | +| -------------------- | ---------------------------------------------------------------------------------------------------------------------------------- | +| ContainerStats stats | Container information. Information about drives and inodes can be returned only for containers started using images in oci format. | + +#### ListContainerStats + +#### Interface Prototype + +```text +rpc ListContainerStats(ListContainerStatsRequest) returns (ListContainerStatsResponse) {} +``` + +#### Interface Description + +Returns information about resources occupied by multiple containers. Conditional filtering is supported. + +#### Parameter + +| Member | Description | +| --------------------------- | -------------------------------- | +| ContainerStatsFilter filter | Conditional filtering parameters | + +#### Returns + +| Return | Description | +| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | +| repeated ContainerStats stats | List of container information. Information about drives and inodes can be returned only for containers started using images in OCI format. | + +#### UpdateRuntimeConfig + +#### Interface Prototype + +```text +rpc UpdateRuntimeConfig(UpdateRuntimeConfigRequest) returns (UpdateRuntimeConfigResponse); +``` + +#### Interface Description + +Provides standard CRI for updating pod CIDR of the network plugin. Currently, the CNI network plugins do not need to update the pod CIDR. Therefore, this interface only records access logs. + +#### Precautions + +This interface does not modify the system management information, but only records logs. + +#### Parameter + +| Member | Description | +| ---------------------------- | -------------------------------------------- | +| RuntimeConfig runtime_config | Information to be configured for the runtime | + +#### Returns + +None + +#### Status + +#### Interface Prototype + +```text +rpc Status(StatusRequest) returns (StatusResponse) {}; +``` + +#### Interface Description + +Obtains the network status of the runtime and pod. When the network status is obtained, the network configuration is updated. + +#### Precautions + +If the network configuration fails to be updated, the original configuration is not affected. The original configuration is overwritten only when the network configuration is updated successfully. + +#### Parameter + +| Member | Description | +| ------------ | --------------------------------------------------------------------------- | +| bool verbose | Whether to display additional runtime information (not supported currently) | + +#### Returns + +| Return | Description | +| ----------------------- | ---------------------------------------------------------------------------------------------------------- | +| RuntimeStatus status | Runtime status | +| map\ info | Additional runtime information. The key of **info** can be any value, and the **value** is in JSON format and can contain any debug information. Additional information is displayed only when **Verbose** is set to **true**.| + +### Image Service + +Provides gRPC APIs for pulling, viewing, and removing images from the image repository. + +#### ListImages + +#### Interface Prototype + +```text +rpc ListImages(ListImagesRequest) returns (ListImagesResponse) {} +``` + +#### Interface Description + +Lists information about existing images. + +#### Precautions + +This interface is a unified interface. Images of embedded format can be queried using **cri images**. However, because embedded images are not in OCI standard, the query result has the following restrictions: + +- The displayed image ID is **digest** of **config** of the image because embedded images do not have image IDs. +- **digest** cannot be displayed because embedded images have only **digest** of **config**, not **digest** of themselves, and **digest** does not comply with OCI specifications. + +#### Parameter + +| Member | Description | +| ---------------- | ----------------------------- | +| ImageSpec filter | Name of images to be filtered | +| | | + +#### Returns + +| Return | Description| +| -------------------- | ------------- | +| repeated Image images | List of images | + +#### ImageStatus + +#### Interface Prototype + +```text +rpc ImageStatus(ImageStatusRequest) returns (ImageStatusResponse) {} +``` + +#### Interface Description + +Queries the details about a specified image. + +#### Precautions + +1. This interface is used to query information about a specified image. If the image does not exist, **ImageStatusResponse** is returned, in which **Image** is **nil**. +2. This interface is a unified interface. Images of embedded format cannot be queried because they do not comply with the OCI specification and lack some fields. + +#### Parameter + +| Member | Description | +| --------------- | ---------------------------------------------------------------------------------------------------------- | +| ImageSpec image | Image name | +| bool verbose | Queries extra information. This parameter is not supported currently and no extra information is returned. | + +#### Returns + +| Return | Description | +| ------------------------- | -------------------------------------------------------------------------------------------------------- | +| Image image | Image information | +| map\ info | Extra image information. This parameter is not supported currently and no extra information is returned. | + +#### PullImage + +#### Interface Prototype + +```text +rpc PullImage(PullImageRequest) returns (PullImageResponse) {} +``` + +#### Interface Description + +Downloads an image. + +#### Precautions + +You can download public images or private images using the username, password, and authentication information. The **server_address**, **identity_token**, and **registry_token** fields in **AuthConfig** are not supported. + +#### Parameter + +| Member | Description | +| ------------------------------- | ---------------------------------------------------------------- | +| ImageSpec image | Name of the image to download | +| AuthConfig auth | Authentication information for downloading a private image | +| PodSandboxConfig sandbox_config | Downloads an Image in the pod context (not supported currently). | + +#### Returns + +| Return | Description | +| ---------------- | -------------------------------------- | +| string image_ref | Information about the downloaded image | +| | | + +#### RemoveImage + +#### Interface Prototype + +```text +rpc RemoveImage(RemoveImageRequest) returns (RemoveImageResponse) {} +``` + +#### Interface Description + +Deletes a specified image. + +#### Precautions + +This interface is a unified interface. Images of embedded format cannot be deleted based on the image ID because they do not comply with the OCI specification and lack some fields. + +#### Parameter + +| Member | Description | +| --------------- | ------------------------------------- | +| ImageSpec image | Name or ID of the image to be deleted | + +#### Returns + +None + +#### ImageFsInfo + +#### Interface Prototype + +```text +rpc ImageFsInfo(ImageFsInfoRequest) returns (ImageFsInfoResponse) {} +``` + +#### Interface Description + +Queries information about the file systems of an image. + +#### Precautions + +The queried information is the file system information in the image metadata. + +#### Parameter + +None + +#### Returns + +| Return | Description | +| ------------------------------------------ | ----------------------------- | +| repeated FilesystemUsage image_filesystems | Image file system information | + +### Constraints + +1. If **log_directory** is configured in **PodSandboxConfig** when a sandbox is created, **log_path** must be specified in **ContainerConfig** when a container of the sandbox is created. Otherwise, the container may fail to be started or even deleted using CRI API. + + The actual **LOGPATH** of the container is **log_directory/log_path**. If **log_path** is not configured, the final **LOGPATH** changes to **log_directory**. + + - If the path does not exist, iSulad creates a soft link pointing to the final path of container logs when starting the container, and **log_directory** becomes a soft link. In this case, there are two situations: + + 1. If **log_path** is not configured for other containers in the sandbox, when other containers are started, **log_directory** is deleted and points to **log_path** of the newly started container. As a result, the logs of the previously started container point to the logs of the container started later. + 2. If **log_path** is configured for other containers in the sandbox, **LOGPATH** of the container is **log_directory/log_path**. Because **log_directory** is a soft link, if **log_directory/log_path** is used as the soft link target to point to the actual log path of the container, the container creation fails. + - If the path exists, iSulad attempts to delete the path (non-recursively) when starting the container. If the path is a folder that contains content, the deletion fails. As a result, the soft link fails to be created and the container fails to be started. When the container is deleted, the same symptom occurs. As a result, the container deletion fails. +2. If **log_directory** is configured in **PodSandboxConfig** when a sandbox is created and **log_path** is configured in **ContainerConfig** when a container is created, the final **LOGPATH** is **log_directory/log_path**. iSulad does not create **LOGPATH** recursively. Therefore, you must ensure that **dirname(LOGPATH)**, that is, the parent directory of the final log directory, exists. +3. If **log_directory** is configured in **PodSandboxConfig** when a sandbox is created, and the same **log_path** is specified in **ContainerConfig** when two or more containers are created or containers in different sandboxes point to the same **LOGPATH**, when the containers are started successfully, the log path of the container that is started later overwrites that of the container that is started earlier. +4. If the image content in the remote image repository changes and the CRI image pulling interface is used to download the image again, the image name and tag of the local original image (if it exists) change to "none." + + Example: + + Local image: + + ```text + IMAGE TAG IMAGE ID SIZE + rnd-dockerhub.huawei.com/pproxyisulad/test latest 99e59f495ffaa 753kB + ``` + + After the **rnd-dockerhub.huawei.com/pproxyisulad/test:latest** image in the remote repository is updated and downloaded again: + + ```text + IMAGE TAG IMAGE ID SIZE + 99e59f495ffaa 753kB + rnd-dockerhub.huawei.com/pproxyisulad/test latest d8233ab899d41 1.42MB + ``` + + Run the `isula images` command. **REF** is displayed as **-**. + + ```text + REF IMAGE ID CREATED SIZE + rnd-dockerhub.huawei.com/pproxyisulad/test:latest d8233ab899d41 2019-02-14 19:19:37 1.42MB + - 99e59f495ffaa 2016-05-04 02:26:41 753kB + ``` + +5. The exec and attach interfaces of iSulad CRI API are implemented using WebSocket. Clients interact with iSulad using the same protocol. When using the exec or attach interface, do not transfer a large amount of data or files over the serial port. The exec or attach interface is used only for basic command interaction. If the user side does not process the data or files in a timely manner, data may be lost. In addition, do not use the exec or attach interface to transfer binary data or files. +6. The iSulad CRI API exec/attach depends on libwebsockets (LWS). It is recommended that the streaming API be used only for persistent connection interaction but not in high-concurrency scenarios, because the connection may fail due to insufficient host resources. It is recommended that the number of concurrent connections be less than or equal to 100. diff --git a/docs/en/docs/container_engine/isula_container_engine/cri_2.md b/docs/en/docs/container_engine/isula_container_engine/cri_2.md new file mode 100644 index 0000000000000000000000000000000000000000..419170b47659cd42dcba25d4903848af5daf88c7 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/cri_2.md @@ -0,0 +1,205 @@ +# CRI API v1 + +## Overview + +Container Runtime Interface (CRI) is the main protocol used by kublet to communicate with container engines. +Kubernetes 1.25 and earlier versions support CRI v1alpha2 and CRI v1. Kubernetes 1.26 and later versions support only CRI v1. + +iSulad supports both [CRI v1alpha2](cri.md) and CRI v1. +For CRI v1, iSulad supports the functions described in [CRI v1alpha2](cri.md) and new interfaces and fields defined in CRI v1. + +Currently, iSulad supports CRI v1 1.29. The API described on the official website is as follows: + +[https://github.com/kubernetes/cri-api/blob/kubernetes-1.29.0/pkg/apis/runtime/v1/api.proto](https://github.com/kubernetes/cri-api/blob/kubernetes-1.29.0/pkg/apis/runtime/v1/api.proto) + +The API description file used by iSulad is slightly different from the official API. The interfaces in this document prevail. + +## New Fields of CRI v1 + +- **CgroupDriver** + + Enum values for cgroup drivers. + + | Member | Description | + | ------------ | --------------------- | + | SYSTEMD = 0 | systemd-cgroup driver | + | CGROUPFS = 1 | cgroupfs driver | + +- **LinuxRuntimeConfiguration** + + cgroup driver used by the container engine + + | Member | Description | + | -------------------------- | ------------------------------------------------------------- | + | CgroupDriver cgroup_driver | Enum value for the cgroup driver used by the container engine | + +- **ContainerEventType** + + Enum values for container event types + + | Member | Description | + | --------------------------- | ------------------------ | + | CONTAINER_CREATED_EVENT = 0 | Container creation event | + | CONTAINER_STARTED_EVENT = 1 | Container startup event | + | CONTAINER_STOPPED_EVENT = 1 | Container stop event | + | CONTAINER_DELETED_EVENT = 1 | Container deletion event | + +- **SwapUsage** + + Virtual memory usage + + | Member | Description | + | -------------------------------- | ------------------------------ | + | int64 timestamp | Timestamp information | + | UInt64Value swap_available_bytes | Available virtual memory bytes | + | UInt64Value swap_usage_bytes | Used virtual memory bytes | + +## New Interfaces + +### RuntimeConfig + +#### Interface Prototype + +```text +rpc RuntimeConfig(RuntimeConfigRequest) returns (RuntimeConfigResponse) {} +``` + +#### Interface Description + +Obtains the cgroup driver configuration (cgroupfs or systemd-cgroup). + +#### Parameter: RuntimeConfigRequest + +No such field + +#### Returns: RuntimeConfigResponse + +| Return | Description | +| ------------------------------- | ------------------------------------------------------ | +| LinuxRuntimeConfiguration linux | CgroupDriver enum value for cgroupfs or systemd-cgroup | + +### GetContainerEvents + +#### Interface Prototype + +```text +rpc GetContainerEvents(GetEventsRequest) returns (stream ContainerEventResponse) {} +``` + +#### Interface Description + +Obtains the pod lifecycle event stream. + +#### Parameter: GetEventsRequest + +No such field + +#### Returns: ContainerEventResponse + +| Return | Description | +| -------------------------------------------- | ------------------------------------------------------------------ | +| string container_id | Container ID | +| ContainerEventType container_event_type | Container event type | +| int64 created_at | Time when the container event is generated | +| PodSandboxStatus pod_sandbox_status | Status of the pod to which the container belongs | +| repeated ContainerStatus containers_statuses | Status of all containers in the pod to which the container belongs | + +## Change Description + +### CRI V1.29 + +#### [Obtaining the cgroup Driver Configuration](https://github.com/kubernetes/kubernetes/pull/118770) + +`RuntimeConfig` obtains the cgroup driver configuration (cgroupfs or systemd-cgroup). + +#### [GetContainerEvents Supports Pod Lifecycle Events](https://github.com/kubernetes/kubernetes/pull/111384) + +`GetContainerEvents` provides event streams related to the pod lifecycle. + +`PodSandboxStatus` is adjusted accordingly. `ContainerStatuses` is added to provide sandbox content status information. + +#### [ContainerStats Virtual Memory Information](https://github.com/kubernetes/kubernetes/pull/118865) + +The virtual memory usage information `SwapUsage` is added to `ContainerStats`. + +#### [OOMKilled Setting in the Reason Field of ContainerStatus](https://github.com/kubernetes/kubernetes/pull/112977) + +The **Reason** field in **ContainerStatus** should be set to OOMKilled when cgroup out-of-memory occurs. + +#### [Modification of PodSecurityContext.SupplementalGroups Description](https://github.com/kubernetes/kubernetes/pull/113047) + +The description is modified to optimize the comments of **PodSecurityContext.SupplementalGroups**. The behavior that the main UID defined by the container image is not in the list is clarified. + +#### [ExecSync Output Restriction](https://github.com/kubernetes/kubernetes/pull/110435) + +The **ExecSync** return value output is less than 16 MB. + +## User Guide + +### Configuring iSulad to Support CRI V1 + +Configure iSulad to support CRI v1 1.29 used by the new Kubernetes version. + +For CRI v1 1.25 or earlier, the functions of V1alpha2 are the same as those of V1. The new features of CRI v1 1.26 or later are supported only in CRI v1. +The functions and features of this upgrade are supported only in CRI v1. Therefore, you need to enable CRI v1as follows. + +Enable CRI v1. + +Set **enable-cri-v1** in **daemon.json** of iSulad to **true** and restart iSulad. + +```json +{ + "group": "isula", + "default-runtime": "runc", + ... + "enable-cri-v1": true +} +``` + +If iSulad is installed from source, enable the **ENABLE_CRI_API_V1** compile option. + +```bash +cmake ../ -D ENABLE_CRI_API_V1=ON +``` + +### Using RuntimeConfig to Obtain the cgroup Driver Configuration + +#### systemd-cgroup Configuration + +iSulad supports both systemd and cgroupfs cgroup drivers. +By default, cgroupfs is used. You can configure iSulad to support systemd-cgroup. +iSulad supports only systemd-cgroup when the runtime is runc. In the iSulad configuration file **daemon.json**, +set **systemd-cgroup** to **true** and restart iSulad to use the systemd-cgroup driver. + +```json +{ + "group": "isula", + "default-runtime": "runc", + ... + "enable-cri-v1": true, + "systemd-cgroup": true +} +``` + +### Using GetContainerEvents to Generate Pod Lifecycle Events + +#### Pod Events Configuration + +In the iSulad configuration file **daemon.json**, +set **enable-pod-events** to **true** and restart iSulad. + +```json +{ + "group": "isula", + "default-runtime": "runc", + ... + "enable-cri-v1": true, + "enable-pod-events": true +} +``` + +## Constraints + +1. The preceding new features are supported by iSulad only when the container runtime is runc. +2. cgroup out-of-memory (OOM) triggers the deletion of the cgroup path of the container. If iSulad processes the OOM event after the cgroup path is deleted, iSulad cannot capture the OOM event of the container. As a result, the **Reason** field in **ContainerStatus** may be incorrect. +3. iSulad does not support the mixed use of different cgroup drivers to manage containers. After a container is started, the cgroup driver configuration in iSulad should not change. diff --git a/docs/en/docs/container_engine/isula_container_engine/figures/en-us_image_0183048952.png b/docs/en/docs/container_engine/isula_container_engine/figures/en-us_image_0183048952.png new file mode 100644 index 0000000000000000000000000000000000000000..fe9074f8fba969795f1e1d40fb879e21d5fc2a7c Binary files /dev/null and b/docs/en/docs/container_engine/isula_container_engine/figures/en-us_image_0183048952.png differ diff --git a/docs/en/docs/container_engine/isula_container_engine/image_management.md b/docs/en/docs/container_engine/isula_container_engine/image_management.md new file mode 100644 index 0000000000000000000000000000000000000000..1eedf7d85372e67c9a493f36200fb9c98ecfc8a3 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/image_management.md @@ -0,0 +1,354 @@ +# Image Management + +## Docker Image Management + +### Logging In to a Registry + +#### Description + +The **isula login** command is run to log in to a registry. After successful login, you can run the **isula pull** command to pull images from the registry. If the registry does not require a password, you do not need to run this command before pulling images. + +#### Usage + +```shell +isula login [OPTIONS] SERVER +``` + +#### Parameters + +For details about parameters in the **login** command, see Table 1 in [Command Line Parameters](./appendix.md#command-line-parameters). + +#### Example + +```shell +$ isula login -u abc my.csp-edge.com:5000 + +Login Succeeded +``` + +### Logging Out of a Registry + +#### Description + +The **isula logout** command is run to log out of a registry. If you run the **isula pull** command to pull images from the registry after logging out of the system, the image will fail to be pulled because you are not authenticated. + +#### Usage + +```shell +isula logout SERVER +``` + +#### Parameters + +For details about parameters in the **logout** command, see Table 2 in [Command Line Parameters](./appendix.md#command-line-parameters). + +#### Example + +```shell +$ isula logout my.csp-edge.com:5000 +Logout Succeeded +``` + +### Pulling Images from a Registry + +#### Description + +Pull images from a registry to the local host. + +#### Usage + +```shell +isula pull [OPTIONS] NAME[:TAG|@DIGEST] +``` + +#### Parameters + +For details about parameters in the **pull** command, see Table 3 in [Command Line Parameters](./appendix.md#command-line-parameters). + +#### Example + +```shell +$ isula pull localhost:5000/official/busybox +Image "localhost:5000/official/busybox" pulling +Image "localhost:5000/official/busybox@sha256:bf510723d2cd2d4e3f5ce7e93bf1e52c8fd76831995ac3bd3f90ecc866643aff" pulled +``` + +### Deleting Images + +#### Description + +Delete one or more images. + +#### Usage + +```shell +isula rmi [OPTIONS] IMAGE [IMAGE...] +``` + +#### Parameters + +For details about parameters in the **rmi** command, see Table 4 in [Command Line Parameters](./appendix.md#command-line-parameters). + +#### Example + +```shell +$ isula rmi rnd-dockerhub.huawei.com/official/busybox +Image "rnd-dockerhub.huawei.com/official/busybox" removed +``` + +### Loading Images + +#### Description + +Load images from a .tar package. The .tar package must be exported by using the **docker save** command or must be in the same format. + +#### Usage + +```shell +isula load [OPTIONS] +``` + +#### Parameters + +For details about parameters in the **load** command, see Table 5 in [Command Line Parameters](./appendix.md#command-line-parameters). + +#### Example + +```shell +$ isula load -i busybox.tar +Load image from "/root/busybox.tar" success +``` + +### Listing Images + +#### Description + +List all images in the current environment. + +#### Usage + +```shell +isula images +``` + +#### Parameters + +For details about parameters in the **images** command, see Table 6 in [Command Line Parameters](./appendix.md#command-line-parameters). + +#### Example + +```shell +$ isula images +REF IMAGE ID CREATED SIZE +rnd-dockerhub.huawei.com/official/busybox:latest e4db68de4ff2 2019-06-15 08:19:54 1.376 MB +``` + +### Inspecting Images + +#### Description + +After the configuration information of an image is returned, you can use the **-f** parameter to filter the information as needed. + +#### Usage + +```shell +isula inspect [options] CONTAINER|IMAGE [CONTAINER|IMAGE...] +``` + +#### Parameters + +For details about parameters in the **inspect** command, see Table 7 in [Command Line Parameters](./appendix.md#command-line-parameters). + +#### Example + +```shell +$ isula inspect -f "{{json .image.id}}" rnd-dockerhub.huawei.com/official/busybox +"e4db68de4ff27c2adfea0c54bbb73a61a42f5b667c326de4d7d5b19ab71c6a3b" +``` + +### Two-Way Authentication + +#### Description + +After this function is enabled, iSulad and image repositories communicate over HTTPS. Both iSulad and image repositories verify the validity of each other. + +#### Usage + +The corresponding registry needs to support this function and iSulad needs to be configured as follows: + +1. Modify iSulad configuration \(default path: **/etc/isulad/daemon.json**\) and set **use-decrypted-key** to **false**. +2. Place related certificates in the folder named after the registry in the **/etc/isulad/certs.d** directory. For details about how to generate certificates, visit the official Docker website: + - [https://docs.docker.com/engine/security/certificates/](https://docs.docker.com/engine/security/certificates/) + - [https://docs.docker.com/engine/security/https/](https://docs.docker.com/engine/security/https/) + +3. Run the **systemctl restart isulad** command to restart iSulad. + +#### Parameters + +Parameters can be configured in the **/etc/isulad/daemon.json** file or carried when iSulad is started. + +```shell +isulad --use-decrypted-key=false +``` + +#### Example + +Set **use-decrypted-key** to **false**. + +```shell +$ cat /etc/isulad/daemon.json +{ + "group": "isulad", + "graph": "/var/lib/isulad", + "state": "/var/run/isulad", + "engine": "lcr", + "log-level": "ERROR", + "pidfile": "/var/run/isulad.pid", + "log-opts": { + "log-file-mode": "0600", + "log-path": "/var/lib/isulad", + "max-file": "1", + "max-size": "30KB" + }, + "log-driver": "stdout", + "hook-spec": "/etc/default/isulad/hooks/default.json", + "start-timeout": "2m", + "storage-driver": "overlay2", + "storage-opts": [ + "overlay2.override_kernel_check=true" + ], + "registry-mirrors": [ + "docker.io" + ], + "insecure-registries": [ + "rnd-dockerhub.huawei.com" + ], + "pod-sandbox-image": "", + "image-opt-timeout": "5m", + "native.umask": "secure", + "network-plugin": "", + "cni-bin-dir": "", + "cni-conf-dir": "", + "image-layer-check": false, + "use-decrypted-key": false, + "insecure-skip-verify-enforce": false +} +``` + +Place the certificate in the corresponding directory. + +```shell +$ pwd +/etc/isulad/certs.d/my.csp-edge.com:5000 +$ ls +ca.crt tls.cert tls.key +``` + +Restart iSulad. + +```shell +systemctl restart isulad +``` + +Run the **pull** command to download images from the registry: + +```shell +$ isula pull my.csp-edge.com:5000/busybox +Image "my.csp-edge.com:5000/busybox" pulling +Image "my.csp-edge.com:5000/busybox@sha256:f1bdc62115dbfe8f54e52e19795ee34b4473babdeb9bc4f83045d85c7b2ad5c0" pulled +``` + +## Embedded Image Management + +### Loading Images + +#### Description + +Load images based on the **manifest** files of embedded images. The value of **--type** must be set to **embedded**. + +#### Usage + +```shell +isula load [OPTIONS] --input=FILE --type=TYPE +``` + +#### Parameters + +For details about parameters in the **load** command, see Table 5 in [Command Line Parameters](./appendix.md#command-line-parameters). + +#### Example + +```shell +$ isula load -i test.manifest --type embedded +Load image from "/root/work/bugfix/tmp/ci_testcase_data/embedded/img/test.manifest" success +``` + +### Listing Images + +#### Description + +List all images in the current environment. + +#### Usage + +```shell +isula images [OPTIONS] +``` + +#### Parameters + +For details about parameters in the **images** command, see Table 6 in [Command Line Parameters](./appendix.md#command-line-parameters). + +#### Example + +```shell +$ isula images +REF IMAGE ID CREATED SIZE +test:v1 9319da1f5233 2018-03-01 10:55:44 1.273 MB +``` + +### Inspecting Images + +#### Description + +After the configuration information of an image is returned, you can use the **-f** parameter to filter the information as needed. + +#### Usage + +```shell +isula inspect [options] CONTAINER|IMAGE [CONTAINER|IMAGE...] +``` + +#### Parameters + +For details about parameters in the **inspect** command, see Table 7 in [Command Line Parameters](./appendix.md#command-line-parameters). + +#### Example + +```shell +$ isula inspect -f "{{json .created}}" test:v1 +"2018-03-01T15:55:44.322987811Z" +``` + +### Deleting Images + +#### Description + +Delete one or more images. + +#### Usage + +```shell +isula rmi [OPTIONS] IMAGE [IMAGE...] +``` + +#### Parameters + +For details about parameters in the **rmi** command, see Table 4 in [Command Line Parameters](./appendix.md#command-line-parameters). + +#### Example + +```shell +$ isula rmi test:v1 +Image "test:v1" removed +``` diff --git a/docs/en/docs/container_engine/isula_container_engine/installation_configuration.md b/docs/en/docs/container_engine/isula_container_engine/installation_configuration.md new file mode 100644 index 0000000000000000000000000000000000000000..54588f5e902f24624e1308a6e0c67bbbc380f1d9 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/installation_configuration.md @@ -0,0 +1,961 @@ +# Installation and Configuration + +This chapter covers the installation, configuration, upgrade, and removal of iSulad. + +> [!NOTE]Note +> +> Root privilege is required for installing, upgrading, or uninstalling iSulad. + +## Installation Methods + +iSulad can be installed by running the **yum** or **rpm** command. The **yum** command is recommended because dependencies can be installed automatically. + +This section describes two installation methods. + +- \(Recommended\) Run the following command to install iSulad: + + ```shell + sudo yum install -y iSulad + ``` + +- If the **rpm** command is used to install iSulad, you need to download and manually install the RMP packages of iSulad and all its dependencies. To install the RPM package of a single iSulad \(the same for installing dependency packages\), run the following command: + + ```shell + sudo rpm -ihv iSulad-xx.xx.xx-YYYYmmdd.HHMMSS.gitxxxxxxxx.aarch64.rpm + ``` + +## Deployment Configuration + +### Configuration Mode + +The iSulad server daemon **isulad** can be configured with a configuration file or by running the **isulad --xxx** command. The priority in descending order is as follows: CLI \> configuration file \> default configuration in code. + +> [!NOTE]NOTE +> +> If systemd is used to manage the iSulad process, modify the **OPTIONS** field in the **/etc/sysconfig/iSulad** file, which functions the same as using the CLI. + +- **CLI** + + During service startup, configure iSulad using the CLI. To view the configuration options, run the following command: + + ```shell + # isulad --help + isulad + + lightweight container runtime daemon + + Usage: isulad [global options] + + GLOBAL OPTIONS: + + --authorization-plugin Use authorization plugin + --cgroup-parent Set parent cgroup for all containers + --cni-bin-dir The full path of the directory in which to search for CNI plugin binaries. Default: /opt/cni/bin + --cni-conf-dir The full path of the directory in which to search for CNI config files. Default: /etc/cni/net.d + --container-log-driver Set default container log driver, such as: json-file + --container-log-opts Set default container log driver options, such as: max-file=7 to set max number of container log files + --default-ulimit Default ulimits for containers (default []) + -e, --engine Select backend engine + -g, --graph Root directory of the iSulad runtime + -G, --group Group for the unix socket(default is isulad) + --help Show help + --hook-spec Default hook spec file applied to all containers + -H, --host The socket name used to create gRPC server + --image-layer-check Check layer integrity when needed + --insecure-registry Disable TLS verification for the given registry + --insecure-skip-verify-enforce Force to skip the insecure verify(default false) + --log-driver Set daemon log driver, such as: file + -l, --log-level Set log level, the levels can be: FATAL ALERT CRIT ERROR WARN NOTICE INFO DEBUG TRACE + --log-opt Set daemon log driver options, such as: log-path=/tmp/logs/ to set directory where to store daemon logs + --native.umask Default file mode creation mask (umask) for containers + --network-plugin Set network plugin, default is null, support null and cni + -p, --pidfile Save pid into this file + --pod-sandbox-image The image whose network/ipc namespaces containers in each pod will use. (default "pause-${machine}:3.0") + --registry-mirrors Registry to be prepended when pulling unqualified images, can be specified multiple times + --selinux-enabled Enable selinux support + --start-timeout timeout duration for waiting on a container to start before it is killed + -S, --state Root directory for execution state files + --storage-driver Storage driver to use(default overlay2) + -s, --storage-opt Storage driver options + --tls Use TLS; implied by --tlsverify + --tlscacert Trust certs signed only by this CA (default "/root/.iSulad/ca.pem") + --tlscert Path to TLS certificate file (default "/root/.iSulad/cert.pem") + --tlskey Path to TLS key file (default "/root/.iSulad/key.pem") + --tlsverify Use TLS and verify the remote + --use-decrypted-key Use decrypted private key by default(default true) + --userns-remap User/Group setting for user namespaces + -V, --version Print the version + --websocket-server-listening-port CRI websocket streaming service listening port (default 10350) + ``` + + Example: Start iSulad and change the log level to DEBUG. + + ```shell + isulad -l DEBUG + ``` + +- **Configuration file** + + The iSulad configuration file is **/etc/isulad/daemon.json**. The parameters in the file are described as follows: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Example

+

Description

+

Remarks

+

-e, --engine

+

"engine": "lcr"

+

iSulad runtime, which is Icr by default.

+

None

+

-G, --group

+

"group": "isulad"

+

Socket group.

+

None

+

--hook-spec

+

"hook-spec": "/etc/default/isulad/hooks/default.json"

+

Default hook configuration file for all containers.

+

None

+

-H, --host

+

"hosts": "unix:///var/run/isulad.sock"

+

Communication mode.

+

In addition to the local socket, the tcp://ip:port mode is supported. The port number ranges from 0 to 65535, excluding occupied ports.

+

--log-driver

+

"log-driver": "file"

+

Log driver configuration.

+

None

+

-l, --log-level

+

"log-level": "ERROR"

+

Log output level.

+

None

+

--log-opt

+

"log-opts": {

+

"log-file-mode": "0600",

+

"log-path": "/var/lib/isulad",

+

"max-file": "1",

+

"max-size": "30KB"

+

}

+

Log-related configuration.

+

You can specify max-file, max-size, and log-path. max-file indicates the number of log files. max-size indicates the threshold for triggering log anti-explosion. If max-file is 1, max-size is invalid. log-path specifies the path for storing log files. The log-file-mode command is used to set the permissions to read and write log files. The value must be in octal format, for example, 0666.

+

--start-timeout

+

"start-timeout": "2m"

+

Time required for starting a container.

+

None

+

--runtime

+

"default-runtime": "lcr"

+

Container runtime, which is lcr by default.

+

If neither the CLI nor the configuration file specifies the runtime, lcr is used by default. The priorities of the three specifying methods are as follows: CLI > configuration file > default value lcr. Currently, lcr and kata-runtime are supported.

+

None

+
"runtimes":  {
+        "kata-runtime": {
+          "path": "/usr/bin/kata-runtime",
+          "runtime-args": [
+            "--kata-config",
+            "/usr/share/defaults/kata-containers/configuration.toml"
+          ]
+        }
+    }
+

When starting a container, set this parameter to specify multiple runtimes. Runtimes in this set are valid for container startup.

+

Runtime whitelist of a container. The customized runtimes in this set are valid. kata-runtime is used as the example.

+

-p, --pidfile

+

"pidfile": "/var/run/isulad.pid"

+

File for storing PIDs.

+

This parameter is required only when more than two container engines need to be started.

+

-g, --graph

+

"graph": "/var/lib/isulad"

+

Root directory for iSulad runtimes.

+

-S, --state

+

"state": "/var/run/isulad"

+

Root directory of the execution file.

+

--storage-driver

+

"storage-driver": "overlay2"

+

Image storage driver, which is overlay2 by default.

+

Only overlay2 is supported.

+

-s, --storage-opt

+

"storage-opts": [ "overlay2.override_kernel_check=true" ]

+

Image storage driver configuration options.

+

The options are as follows:

+
overlay2.override_kernel_check=true #Ignore the kernel version check.
+    overlay2.size=${size} #Set the rootfs quota to ${size}.
+    overlay2.basesize=${size} #It is equivalent to overlay2.size.
+

--image-opt-timeout

+

"image-opt-timeout": "5m"

+

Image operation timeout interval, which is 5m by default.

+

The value -1 indicates that the timeout interval is not limited.

+

--registry-mirrors

+

"registry-mirrors": [ "docker.io" ]

+

Registry address.

+

None

+

--insecure-registry

+

"insecure-registries": [ ]

+

Registry without TLS verification.

+

None

+

--native.umask

+

"native.umask": "secure"

+

Container umask policy. The default value is secure. The value normal indicates insecure configuration.

+

Set the container umask value.

+

The value can be null (0027 by default), normal, or secure.

+
normal #The umask value of the started container is 0022.
+    secure #The umask value of the started container is 0027 (default value).
+

--pod-sandbox-image

+

"pod-sandbox-image": "rnd-dockerhub.huawei.com/library/pause-aarch64:3.0"

+

By default, the pod uses the image. The default value is rnd-dockerhub.huawei.com/library/pause-${machine}:3.0.

+

None

+

--network-plugin

+

"network-plugin": ""

+

Specifies a network plug-in. The value is a null character by default, indicating that no network configuration is available and the created sandbox has only the loop NIC.

+

The CNI and null characters are supported. Other invalid values will cause iSulad startup failure.

+

--cni-bin-dir

+

"cni-bin-dir": ""

+

Specifies the storage location of the binary file on which the CNI plug-in depends.

+

The default value is /opt/cni/bin.

+

--cni-conf-dir

+

"cni-conf-dir": ""

+

Specifies the storage location of the CNI network configuration file.

+

The default value is /etc/cni/net.d.

+

--image-layer-check=false

+

"image-layer-check": false

+

Image layer integrity check. To enable the function, set it to true; otherwise, set it to false. It is disabled by default.

+

When iSulad is started, the image layer integrity is checked. If the image layer is damaged, the related images are unavailable. iSulad cannot verify empty files, directories, and link files. Therefore, if the preceding files are lost due to a power failure, the integrity check of iSulad image data may fail to be identified. When the iSulad version changes, check whether the parameter is supported. If not, delete it from the configuration file.

+

--insecure-skip-verify-enforce=false

+

"insecure-skip-verify-enforce": false

+

Indicates whether to forcibly skip the verification of the certificate host name/domain name. The value is of the Boolean type, and the default value is false. If this parameter is set to true, the verification of the certificate host name/domain name is skipped.

+

The default value is false (not skipped). Note: Restricted by the YAJL JSON parsing library, if a non-Boolean value that meets the JSON format requirements is configured in the /etc/isulad/daemon.json configuration file, the default value used by iSulad is false.

+

--use-decrypted-key=true

+

"use-decrypted-key": true

+

Specifies whether to use an unencrypted private key. The value is of the Boolean type. If this parameter is set to true, an unencrypted private key is used. If this parameter is set to false, the encrypted private key is used, that is, two-way authentication is required.

+

The default value is true, indicating that an unencrypted private key is used. Note: Restricted by the YAJL JSON parsing library, if a non-Boolean value that meets the JSON format requirements is configured in the /etc/isulad/daemon.json configuration file, the default value used by iSulad is true.

+

--tls

+

"tls":false

+

Specifies whether to use TLS. The value is of the Boolean type.

+

This parameter is used only in -H tcp://IP:PORT mode. The default value is false.

+

--tlsverify

+

"tlsverify":false

+

Specifies whether to use TLS and verify remote access. The value is of the Boolean type.

+

This parameter is used only in -H tcp://IP:PORT mode.

+

--tlscacert

+

--tlscert

+

--tlskey

+

"tls-config": {

+

"CAFile": "/root/.iSulad/ca.pem",

+

"CertFile": "/root/.iSulad/server-cert.pem",

+

"KeyFile":"/root/.iSulad/server-key.pem"

+

}

+

TLS certificate-related configuration.

+

This parameter is used only in -H tcp://IP:PORT mode.

+

--authorization-plugin

+

"authorization-plugin": "authz-broker"

+

User permission authentication plugin.

+

Only authz-broker is supported.

+

--cgroup-parent

+

"cgroup-parent": "lxc/mycgroup"

+

Default cgroup parent path of a container, which is of the string type.

+

Specifies the cgroup parent path of a container. If --cgroup-parent is specified on the client, the client parameter prevails.

+

Note: If container A is started before container B, the cgroup parent path of container B is specified as the cgroup path of container A. When deleting a container, you need to delete container B and then container A in sequence. Otherwise, residual cgroup resources exist.

+

--default-ulimits

+

"default-ulimits": {

+

"nofile": {

+

"Name": "nofile",

+

"Hard": 6400,

+

"Soft": 3200

+

}

+

}

+

Specifies the ulimit restriction type, soft value, and hard value.

+

Specifies the restricted resource type, for example, nofile. The two field names must be the same, that is, nofile. Otherwise, an error is reported. The value of Hard must be greater than or equal to that of Soft. If the Hard or Soft field is not set, the default value 0 is used.

+

--websocket-server-listening-port

+

"websocket-server-listening-port": 10350

+

Specifies the listening port of the CRI WebSocket streaming service. The default port number is 10350.

+

Specifies the listening port of the CRI websocket streaming service.

+

If the client specifies --websocket-server-listening-port, the specified value is used. The port number ranges from 1024 to 49151.

+
+ + Example: + + ```shell + $ cat /etc/isulad/daemon.json + { + "group": "isulad", + "default-runtime": "lcr", + "graph": "/var/lib/isulad", + "state": "/var/run/isulad", + "engine": "lcr", + "log-level": "ERROR", + "pidfile": "/var/run/isulad.pid", + "log-opts": { + "log-file-mode": "0600", + "log-path": "/var/lib/isulad", + "max-file": "1", + "max-size": "30KB" + }, + "log-driver": "stdout", + "hook-spec": "/etc/default/isulad/hooks/default.json", + "start-timeout": "2m", + "storage-driver": "overlay2", + "storage-opts": [ + "overlay2.override_kernel_check=true" + ], + "registry-mirrors": [ + "docker.io" + ], + "insecure-registries": [ + "rnd-dockerhub.huawei.com" + ], + "pod-sandbox-image": "", + "image-opt-timeout": "5m", + "native.umask": "secure", + "network-plugin": "", + "cni-bin-dir": "", + "cni-conf-dir": "", + "image-layer-check": false, + "use-decrypted-key": true, + "insecure-skip-verify-enforce": false + } + ``` + + > [!TIP]NOTICE + > The default configuration file **/etc/isulad/daemon.json** is for reference only. Configure it based on site requirements. + +### Storage Description + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

File

+

Directory

+

Description

+

\*

+

/etc/default/isulad/

+

Stores the OCI configuration file and hook template file of iSulad. The file configuration permission is set to 0640, and the sysmonitor check permission is set to 0550.

+

\*

+

/etc/isulad/

+

Default configuration files of iSulad and seccomp.

+

isulad.sock

+

/var/run/

+

Pipe communication file, which is used for the communication between the client and iSulad.

+

isulad.pid

+

/var/run/

+

File for storing the iSulad PIDs. It is also a file lock to prevent multiple iSulad instances from being started.

+

\*

+

/run/lxc/

+

Lock file, which is created during iSulad running.

+

\*

+

/var/run/isulad/

+

Real-time communication cache file, which is created during iSulad running.

+

\*

+

/var/run/isula/

+

Real-time communication cache file, which is created during iSulad running.

+

\*

+

/var/lib/lcr/

+

Temporary directory of the LCR component.

+

\*

+

/var/lib/isulad/

+

Root directory where iSulad runs, which stores the created container configuration, default log path, database file, and mount point.

+

/var/lib/isulad/mnt/: mount point of the container rootfs.

+

/var/lib/isulad/engines/lcr/: directory for storing LCR container configurations. Each container has a directory named after the container.

+
+ +### Constraints + +- In high concurrency scenarios \(200 containers are concurrently started\), the memory management mechanism of Glibc may cause memory holes and large virtual memory \(for example, 10 GB\). This problem is caused by the restriction of the Glibc memory management mechanism in the high concurrency scenario, but not by memory leakage. Therefore, the memory consumption does not increase infinitely. You can set **MALLOC\_ARENA\_MAX** to reducevirtual memory error and increase the rate of reducing physical memory. However, this environment variable will cause the iSulad concurrency performance to deteriorate. Set this environment variable based on the site requirements. + + To balance performance and memory usage, set MALLOC_ARENA_MAX to 4. (The iSulad performance on the ARM64 server is affected by less than 10%.) + + Configuration method: + 1. To manually start iSulad, run the export MALLOC_ARENA_MAX=4 command and then start iSulad. + 2. If systemd manages iSulad, you can modify the /etc/sysconfig/iSulad file by adding MALLOC_ARENA_MAX=4. + +- Precautions for specifying the daemon running directories + + Take **--root** as an example. When **/new/path/** is used as the daemon new root directory, if a file exists in **/new/path/** and the directory or file name conflicts with that required by iSulad \(for example, **engines** and **mnt**\), iSulad may update the original directory or file attributes including the owner and permission. + + Therefore, please note the impact of re-specifying various running directories and files on their attributes. You are advised to specify a new directory or file for iSulad to avoid file attribute changes and security issues caused by conflicts. + +- Log file management: + + > [!TIP]NOTICE + > Log function interconnection: logs are managed by systemd as iSulad is and then transmitted to rsyslogd. By default, rsyslog restricts the log writing speed. You can add the configuration item **$imjournalRatelimitInterval 0** to the **/etc/rsyslog.conf** file and restart the rsyslogd service. + +- Restrictions on command line parameter parsing + + When the iSulad command line interface is used, the parameter parsing mode is slightly different from that of Docker. For flags with parameters in the command line, regardless of whether a long or short flag is used, only the first space after the flag or the character string after the equal sign \(=\) directly connected to the flag is used as the flag parameter. The details are as follows: + + 1. When a short flag is used, each character in the character string connected to the hyphen \(-\) is considered as a short flag. If there is an equal sign \(=\), the character string following the equal sign \(=\) is considered as the parameter of the short flag before the equal sign \(=\). + + **isula run -du=root busybox** is equivalent to **isula run -du root busybox**, **isula run -d -u=root busybox**, or **isula run -d -u root busybox**. When **isula run -du:root** is used, as **-:** is not a valid short flag, an error is reported. The preceding command is equivalent to **isula run -ud root busybox**. However, this method is not recommended because it may cause semantic problems. + + 2. When a long flag is used, the character string connected to **--** is regarded as a long flag. If the character string contains an equal sign \(=\), the character string before the equal sign \(=\) is a long flag, and the character string after the equal sign \(=\) is a parameter. + + ```shell + isula run --user=root busybox + ``` + + or + + ```shell + isula run --user root busybox + ``` + +- After an iSulad container is started, you cannot run the **isula run -i/-t/-ti** and **isula attach/exec** commands as a non-root user. +- When iSulad connects to an OCI container, only kata-runtime can be used to start the OCI container. + +### Daemon Multi-Port Binding + +#### Description + +The daemon can bind multiple UNIX sockets or TCP ports and listen on these ports. The client can interact with the daemon through these ports. + +#### Port + +Users can configure one or more ports in the hosts field in the **/etc/isulad/daemon.json** file, or choose not to specify hosts. + +```json +{ + "hosts": [ + "unix:///var/run/isulad.sock", + "tcp://localhost:5678", + "tcp://127.0.0.1:6789" + ] +} +``` + +Users can also run the **-H** or **--host** command in the **/etc/sysconfig/iSulad** file to configure a port, or choose not to specify hosts. + +```ini +OPTIONS='-H unix:///var/run/isulad.sock --host tcp://127.0.0.1:6789' +``` + +If hosts are not specified in the **daemon.json** file and iSulad, the daemon listens on **unix:///var/run/isulad.sock** by default after startup. + +#### Restrictions + +- Users cannot specify hosts in the **/etc/isulad/daemon.json** and **/etc/sysconfig/iSuald** files at the same time. Otherwise, an error will occur and iSulad cannot be started. + + ```text + unable to configure the isulad with file /etc/isulad/daemon.json: the following directives are specified both as a flag and in the configuration file: hosts: (from flag: [unix:///var/run/isulad.sock tcp://127.0.0.1:6789], from file: [unix:///var/run/isulad.sock tcp://localhost:5678 tcp://127.0.0.1:6789]) + ``` + +- If the specified host is a UNIX socket, the socket must start with **unix://** followed by a valid absolute path. +- If the specified host is a TCP port, the TCP port number must start with **tcp://** followed by a valid IP address and port number. The IP address can be that of the local host. +- A maximum of 10 valid ports can be specified. If more than 10 ports are specified, an error will occur and iSulad cannot be started. + +### Configuring TLS Authentication and Enabling Remote Access + +#### Description + +iSulad is designed in C/S mode. By default, the iSulad daemon process listens only on the local/var/run/isulad.sock. Therefore, you can run commands to operate containers only on the local client iSula. To enable iSula's remote access to the container, the iSulad daemon process needs to listen on the remote access port using TCP/IP. However, listening is performed only by simply configuring tcp ip:port. In this case, all IP addresses can communicate with iSulad by calling **isula -H tcp://**_remote server IP address_**:port**, which may cause security problems. Therefore, it is recommended that a more secure version, namely Transport Layer Security \(TLS\), be used for remote access. + +#### Generating TLS Certificate + +- Example of generating a plaintext private key and certificate + + ```shell + #!/bin/bash + set -e + echo -n "Enter pass phrase:" + read password + echo -n "Enter public network ip:" + read publicip + echo -n "Enter host:" + read HOST + + echo " => Using hostname: $publicip, You MUST connect to iSulad using this host!" + + mkdir -p $HOME/.iSulad + cd $HOME/.iSulad + rm -rf $HOME/.iSulad/* + + echo " => Generating CA key" + openssl genrsa -passout pass:$password -aes256 -out ca-key.pem 4096 + echo " => Generating CA certificate" + openssl req -passin pass:$password -new -x509 -days 365 -key ca-key.pem -sha256 -out ca.pem -subj "/C=CN/ST=zhejiang/L=hangzhou/O=Huawei/OU=iSulad/CN=iSulad@huawei.com" + echo " => Generating server key" + openssl genrsa -passout pass:$password -out server-key.pem 4096 + echo " => Generating server CSR" + openssl req -passin pass:$password -subj /CN=$HOST -sha256 -new -key server-key.pem -out server.csr + echo subjectAltName = DNS:$HOST,IP:$publicip,IP:127.0.0.1 >> extfile.cnf + echo extendedKeyUsage = serverAuth >> extfile.cnf + echo " => Signing server CSR with CA" + openssl x509 -req -passin pass:$password -days 365 -sha256 -in server.csr -CA ca.pem -CAkey ca-key.pem -CAcreateserial -out server-cert.pem -extfile extfile.cnf + echo " => Generating client key" + openssl genrsa -passout pass:$password -out key.pem 4096 + echo " => Generating client CSR" + openssl req -passin pass:$password -subj '/CN=client' -new -key key.pem -out client.csr + echo " => Creating extended key usage" + echo extendedKeyUsage = clientAuth > extfile-client.cnf + echo " => Signing client CSR with CA" + openssl x509 -req -passin pass:$password -days 365 -sha256 -in client.csr -CA ca.pem -CAkey ca-key.pem -CAcreateserial -out cert.pem -extfile extfile-client.cnf + rm -v client.csr server.csr extfile.cnf extfile-client.cnf + chmod -v 0400 ca-key.pem key.pem server-key.pem + chmod -v 0444 ca.pem server-cert.pem cert.pem + ``` + +- Example of generating an encrypted private key and certificate request file + + ```shell + #!/bin/bash + + echo -n "Enter public network ip:" + read publicip + echo -n "Enter pass phrase:" + read password + + # remove certificates from previous execution. + rm -f *.pem *.srl *.csr *.cnf + + + # generate CA private and public keys + echo 01 > ca.srl + openssl genrsa -aes256 -out ca-key.pem -passout pass:$password 2048 + openssl req -subj '/C=CN/ST=zhejiang/L=hangzhou/O=Huawei/OU=iSulad/CN=iSulad@huawei.com' -new -x509 -days $DAYS -passin pass:$password -key ca-key.pem -out ca.pem + + # create a server key and certificate signing request (CSR) + openssl genrsa -aes256 -out server-key.pem -passout pass:$PASS 2048 + openssl req -new -key server-key.pem -out server.csr -passin pass:$password -subj '/CN=iSulad' + + echo subjectAltName = DNS:iSulad,IP:${publicip},IP:127.0.0.1 > extfile.cnf + echo extendedKeyUsage = serverAuth >> extfile.cnf + # sign the server key with our CA + openssl x509 -req -days $DAYS -passin pass:$password -in server.csr -CA ca.pem -CAkey ca-key.pem -out server-cert.pem -extfile extfile.cnf + + # create a client key and certificate signing request (CSR) + openssl genrsa -aes256 -out key.pem -passout pass:$password 2048 + openssl req -subj '/CN=client' -new -key key.pem -out client.csr -passin pass:$password + + # create an extensions config file and sign + echo extendedKeyUsage = clientAuth > extfile.cnf + openssl x509 -req -days 365 -passin pass:$password -in client.csr -CA ca.pem -CAkey ca-key.pem -out cert.pem -extfile extfile.cnf + + # remove the passphrase from the client and server key + openssl rsa -in server-key.pem -out server-key.pem -passin pass:$password + openssl rsa -in key.pem -out key.pem -passin pass:$password + + # remove generated files that are no longer required + rm -f ca-key.pem ca.srl client.csr extfile.cnf server.csr + ``` + +#### APIs + +```json +{ + "tls": true, + "tls-verify": true, + "tls-config": { + "CAFile": "/root/.iSulad/ca.pem", + "CertFile": "/root/.iSulad/server-cert.pem", + "KeyFile":"/root/.iSulad/server-key.pem" + } +} +``` + +#### Restrictions + +The server supports the following modes: + +- Mode 1 \(client verified\): tlsverify, tlscacert, tlscert, tlskey +- Mode 2 \(client not verified\): tls, tlscert, tlskey + +The client supports the following modes: + +- Mode 1 \(verify the identity based on the client certificate, and verify the server based on the specified CA\): tlsverify, tlscacert, tlscert, tlskey +- Mode 2 \(server verified\): tlsverify, tlscacert + +Mode 1 is used for the server, and mode 2 for the client if the two-way authentication mode is used for communication. + +Mode 2 is used for the server and the client if the unidirectional authentication mode is used for communication. + +> [!TIP]NOTICE +> +> - If RPM is used for installation, the server configuration can be modified in the **/etc/isulad/daemon.json** and **/etc/sysconfig/iSulad** files. +> - Two-way authentication is recommended as it is more secure than non-authentication or unidirectional authentication. +> - GRPC open-source component logs are not taken over by iSulad. To view gRPC logs, set the environment variables **gRPC\_VERBOSITY** and **gRPC\_TRACE** as required. + +#### Example + +On the server: + +```shell + isulad -H=tcp://0.0.0.0:2376 --tlsverify --tlscacert ~/.iSulad/ca.pem --tlscert ~/.iSulad/server-cert.pem --tlskey ~/.iSulad/server-key.pem +``` + +On the client: + +```shell + isula version -H=tcp://$HOSTIP:2376 --tlsverify --tlscacert ~/.iSulad/ca.pem --tlscert ~/.iSulad/cert.pem --tlskey ~/.iSulad/key.pem +``` + +### devicemapper Storage Driver Configuration + +To use the devicemapper storage driver, you need to configure a thinpool device which requires an independent block device with sufficient free space. Take the independent block device **/dev/xvdf** as an example. The configuration method is as follows. + +#### Configuring a Thinpool + +1. Stop the iSulad service. + + ```shell + # systemctl stop isulad + ``` + +2. Create a logical volume manager \(LVM\) volume based on the block device. + + ```shell + # pvcreate /dev/xvdf + ``` + +3. Create a volume group based on the created physical volume. + + ```shell + # vgcreate isula /dev/xvdf + Volume group "isula" successfully created: + ``` + +4. Create two logical volumes named **thinpool** and **thinpoolmeta**. + + ```shell + # lvcreate --wipesignatures y -n thinpool isula -l 95%VG + Logical volume "thinpool" created. + ``` + + ```shell + # lvcreate --wipesignatures y -n thinpoolmeta isula -l 1%VG + Logical volume "thinpoolmeta" created. + ``` + +5. Convert the two logical volumes into a thinpool and the metadata used by the thinpool. + + ```shell + # lvconvert -y --zero n -c 512K --thinpool isula/thinpool --poolmetadata isula/thinpoolmeta + + WARNING: Converting logical volume isula/thinpool and isula/thinpoolmeta to + thin pool's data and metadata volumes with metadata wiping. + THIS WILL DESTROY CONTENT OF LOGICAL VOLUME (filesystem etc.) + Converted isula/thinpool to thin pool. + ``` + +#### Modifying the iSulad Configuration Files + +1. If iSulad has been used in the environment, back up the running data first. + + ```shell + # mkdir /var/lib/isulad.bk + # mv /var/lib/isulad/* /var/lib/isulad.bk + ``` + +2. Modify configuration files. + + Two configuration methods are provided. Select one based on site requirements. + - Edit the **/etc/isulad/daemon.json** file, set **storage-driver** to **devicemapper**, and set parameters related to the **storage-opts** field. For details about related parameters, see [Parameter Description](#en-us_topic_0222861454_section1712923715282). The following lists the configuration reference: + + ```json + { + "storage-driver": "devicemapper" + "storage-opts": [ + "dm.thinpooldev=/dev/mapper/isula-thinpool", + "dm.fs=ext4", + "dm.min_free_space=10%" + ] + } + ``` + + - Edit **/etc/sysconfig/iSulad** to explicitly specify related iSulad startup parameters. For details about related parameters, see [Parameter Description](#en-us_topic_0222861454_section1712923715282). The following lists the configuration reference: + + ```ini + OPTIONS="--storage-driver=devicemapper --storage-opt dm.thinpooldev=/dev/mapper/isula-thinpool --storage-opt dm.fs=ext4 --storage-opt dm.min_free_space=10%" + ``` + +3. Start iSulad for the settings to take effect. + + ```shell + # systemctl start isulad + ``` + +#### Parameter Description + +For details about parameters supported by storage-opts, see [Table 1](#en-us_topic_0222861454_table3191161993812). + +**Table 1** Parameter description + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Parameter

+

Mandatory or Not

+

Description

+

dm.fs

+

Yes

+

Specifies the type of the file system used by a container. This parameter must be set to ext4, that is, dm.fs=ext4.

+

dm.basesize

+

No

+

Specifies the maximum storage space of a single container. The unit can be k, m, g, t, or p. An uppercase letter can also be used, for example, dm.basesize=50G. This parameter is valid only during the first initialization.

+

dm.mkfsarg

+

No

+

Specifies the additional mkfs parameters when a basic device is created. For example: dm.mkfsarg=-O ^has_journal

+

dm.mountopt

+

No

+

Specifies additional mount parameters when a container is mounted. For example: dm.mountopt=nodiscard

+

dm.thinpooldev

+

No

+

Specifies the thinpool device used for container or image storage.

+

dm.min_free_space

+

No

+

Specifies minimum percentage of reserved space. For example, dm.min_free_space=10% indicates that storage-related operations such as container creation will fail when the remaining storage space falls below 10%.

+
+ +#### Precautions + +- When configuring devicemapper, if the system does not have sufficient space for automatic capacity expansion of thinpool, disable the automatic capacity expansion function. + + To disable automatic capacity expansion, set both **thin\_pool\_autoextend\_threshold** and **thin\_pool\_autoextend\_percent** in the **/etc/lvm/profile/isula-thinpool.profile** file to **100**. + + ```text + activation { + thin_pool_autoextend_threshold=100 + thin_pool_autoextend_percent=100 + } + ``` + +- When devicemapper is used, use Ext4 as the container file system. You need to add **--storage-opt dm.fs=ext4** to the iSulad configuration parameters. +- If graphdriver is devicemapper and the metadata files are damaged and cannot be restored, you need to manually restore the metadata files. Do not directly operate or tamper with metadata of the devicemapper storage driver in Docker daemon. +- When the devicemapper LVM is used, if the devicemapper thinpool is damaged due to abnormal power-off, you cannot ensure the data integrity or whether the damaged thinpool can be restored. Therefore, you need to rebuild the thinpool. + +**Precautions for Switching the devicemapper Storage Pool When the User Namespace Feature Is Enabled on iSula** + +- Generally, the path of the deviceset-metadata file is **/var/lib/isulad/devicemapper/metadata/deviceset-metadata** during container startup. +- If user namespaces are used, the path of the deviceset-metadata file is **/var/lib/isulad/**_userNSUID.GID_**/devicemapper/metadata/deviceset-metadata**. +- When you use the devicemapper storage driver and the container is switched between the user namespace scenario and common scenario, the **BaseDeviceUUID** content in the corresponding deviceset-metadata file needs to be cleared. In the thinpool capacity expansion or rebuild scenario, you also need to clear the **BaseDeviceUUID** content in the deviceset-metadata file. Otherwise, the iSulad service fails to be restarted. diff --git a/docs/en/docs/container_engine/isula_container_engine/installation_upgrade_uninstallation.md b/docs/en/docs/container_engine/isula_container_engine/installation_upgrade_uninstallation.md new file mode 100644 index 0000000000000000000000000000000000000000..2f38bababddaf2976a50b29a6deeb91663df4f2e --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/installation_upgrade_uninstallation.md @@ -0,0 +1,3 @@ +# Installation, Upgrade and Uninstallation + +This chapter describes how to install, configure, upgrade, and uninstall iSulad. diff --git a/docs/en/docs/container_engine/isula_container_engine/interconnecting_isula_shim_v2_with_stratovirt.md b/docs/en/docs/container_engine/isula_container_engine/interconnecting_isula_shim_v2_with_stratovirt.md new file mode 100644 index 0000000000000000000000000000000000000000..d27192d848e9ab8751dd3c2f15eae55f5c6e4b4a --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/interconnecting_isula_shim_v2_with_stratovirt.md @@ -0,0 +1,219 @@ +# Interconnecting iSula with the shim v2 Secure Container + +## Overview + +shim v2 is a next-generation shim solution. Compared with shim v1, shim v2 features shorter call chains, clearer architecture, and lower memory overhead in multi-service container scenarios. iSula can run secure containers through isulad-shim or containerd-shim-kata-v2. The isulad-shim component is the implementation of the shim v1 solution, and the containerd-shim-kata-v2 component is the implementation of the shim v2 solution in the secure container scenario. This document describes how to interconnect iSula with containerd-shim-kata-v2. + +## Interconnecting with containerd-shim-v2-kata + +### Prerequisites + +Before interconnecting iSula with containerd-shim-v2-kata, ensure that the following prerequisites are met: + +- iSulad, lib-shim-v2, and kata-containers have been installed. +- StratoVirt supports only the devicemapper storage driver. Therefore, you need to configure the devicemapper environment and ensure that the devicemapper storage driver used by iSulad works properly. + +### Environment Setup + +The following describes how to install and configure iSulad and kata-containers. + +#### Installing Dependencies + +Configure the YUM source based on the OS version and install iSulad, lib-shim-v2, and kata-containers as the **root** user. + +```shell +# yum install iSulad +# yum install kata-containers +# yum install lib-shim-v2 +``` + +#### Creating and Configuring a Storage Device + +Prepare a drive, for example, **/dev/sdx**. The drive will be formatted. This section uses the block device **/dev/sda** as an example. + +I. Creating devicemapper + +1. Create a physical volume (PV). + + ```shell + $ pvcreate /dev/sda + Physical volume "/dev/loop0" successfully created. + ``` + +2. Create a volume group (VG). + + ```shell + $ vgcreate isula /dev/sda + Volume group "isula" successfully created + ``` + +3. Create the logical volumes **thinpool** and **thinpoolmeta**. + + ```shell + $ lvcreate --wipesignatures y -n thinpool isula -l 95%VG + Logical volume "thinpool" created. + + $ lvcreate --wipesignatures y -n thinpoolmeta isula -l 1%VG + Logical volume "thinpoolmeta" created. + ``` + +4. Convert the created logical volumes to a thin pool. + + ```shell + $ lvconvert -y --zero n -c 64K \ + --thinpool isula/thinpool \ + --poolmetadata isula/thinpoolmeta + Thin pool volume with chunk size 512.00 KiB can address at most 126.50 TiB of data. + WARNING: Converting isula/thinpool and isula/thinpoolmeta to thin pool's data and metadata volumes with metadata wiping. + THIS WILL DESTROY CONTENT OF LOGICAL VOLUME (filesystem etc.) + Converted isula/thinpool and isula/thinpoolmeta to thin pool. + ``` + +5. Configure automatic extension of the thin pool using lvm. + + ```shell + $ touch /etc/lvm/profile/isula-thinpool.profile + $ cat << EOF > /etc/lvm/profile/isula-thinpool.profile + activation { + thin_pool_autoextend_threshold=80 + thin_pool_autoextend_percent=20 + } + EOF + $ lvchange --metadataprofile isula-thinpool isula/thinpool + Logical volume isula/thinpool changed. + ``` + +II. Changing the iSulad Storage Driver Type and Setting the Default Runtime + +Modify the **/etc/isulad/daemon.json** configuration file. Set **default-runtime** to **io.containerd.kata.v2** and **storage-driver** to **devicemapper**. The modification result is as follows: + +```json + { + "default-runtime": "io.containerd.kata.v2", + "storage-driver": "devicemapper", + "storage-opts": [ + "dm.thinpooldev=/dev/mapper/isula-thinpool", + "dm.fs=ext4", + "dm.min_free_space=10%" + ], +} +``` + +III. Making the Configuration Take Effect + +1. Restart the iSulad for the configuration to take effect. + + ```shell + # systemctl daemon-reload + # systemctl restart isulad + ``` + +2. Check whether the iSula storage driver is successfully configured. + + ```shell + # isula info + ``` + + If the following information is displayed, the configuration is successful: + + ```shell + Storage Driver: devicemapper + ``` + +### Interconnection Guide + +This section describes how to interconnect iSula with containerd-shim-kata-v2. + +By default, containerd-shim-kata-v2 uses QEMU as the virtualization component. The following describes how to configure QEMU and StratoVirt. + +#### Using QEMU + +If containerd-shim-kata-v2 uses QEMU as the virtualization component, perform the following operations to interconnect iSula with containerd-shim-kata-v2: + +1. Modify the kata configuration file **/usr/share/defaults/kata-containers/configuration.toml**. + + Set **sandbox_cgroup_with_emulator** to **false**. Currently, shim v2 does not support this function. Other parameters are the same as the kata configuration parameters in shim v1 or use the default values. + + ```toml + sandbox_cgroup_with_emulator = false + ``` + +2. Use the BusyBox image to run the secure container and check whether the used runtime is io.containerd.kata.v2. + + ```bash + $ id=`isula run -tid busybox /bin/sh` + $ isula inspect -f '{{ json .HostConfig.Runtime }}' $id + "io.containerd.kata.v2" + ``` + +3. Verify that the QEMU-based VM process is started. If it is started, QEMU is successfully interconnected with the shim v2 secure container. + + ```bash + ps -ef | grep qemu + ``` + +#### Using StratoVirt + +If containerd-shim-kata-v2 uses StratoVirt as the virtualization component, perform the following operations to interconnect iSula with containerd-shim-kata-v2: + +1. Create the **stratovirt.sh** script in any directory (for example, **/home**) and add the execute permission to the file as the **root** user. + + ```shell + # touch /home/stratovirt.sh + # chmod +x /home/stratovirt.sh + ``` + + The content of **stratovirt.sh** is as follows, which is used to specify the path of StratoVirt: + + ```shell + #!/bin/bash + export STRATOVIRT_LOG_LEVEL=info # set log level which includes trace, debug, info, warn and error. + /usr/bin/stratovirt $@ + ``` + +2. Modify the kata configuration file. Set **hypervisor** of the secure container to **stratovirt**, **kernel** to the absolute path of the StratoVirt kernel image, and **initrd** to the initrd image file of kata-containers (if you use YUM to install kata-containers, the initrd image file is downloaded by default and stored in the **/var/lib/kata/** directory). StratoVirt supports only the devicemapper storage mode, prepare the environment in advance and set iSulad to the devicemapper mode. + + The configurations are as follows: + + ```shell + [hypervisor.stratovirt] + path = "/home/stratovirt.sh" + kernel = "/var/lib/kata/vmlinux.bin" + initrd = "/var/lib/kata/kata-containers-initrd.img" + block_device_driver = "virtio-mmio" + use_vsock = true + enable_netmon = true + internetworking_model="tcfilter" + sandbox_cgroup_with_emulator = false + disable_new_netns = false + disable_block_device_use = false + disable_vhost_net = true + ``` + + To use the vsock function in StratoVirt, enable the vhost_vsock kernel module and check whether the module is successfully enabled. + + ```bash + modprobe vhost_vsock + lsmod |grep vhost_vsock + ``` + + Download the kernel of the required version and architecture and save it to the **/var/lib/kata/** directory. For example, download the [openeuler repo](https://repo.openeuler.org/) of the x86 architecture of openEuler 21.03. + + ```bash + cd /var/lib/kata + wget https://archives.openeuler.openatom.cn/openEuler-21.03/stratovirt_img/x86_64/vmlinux.bin + ``` + +3. Use the BusyBox image to run the secure container and check whether the used runtime is io.containerd.kata.v2. + + ```bash + $ id=`isula run -tid busybox sh` + $ isula inspect -f '{{ json .HostConfig.Runtime }}' $id + "io.containerd.kata.v2" + ``` + +4. Verify that the StratoVirt-based VM process is started. If it is started, StratoVirt is successfully interconnected with the shim v2 secure container. + + ```bash + ps -ef | grep stratovirt + ``` diff --git a/docs/en/docs/container_engine/isula_container_engine/interconnection_with_the_cni_network.md b/docs/en/docs/container_engine/isula_container_engine/interconnection_with_the_cni_network.md new file mode 100644 index 0000000000000000000000000000000000000000..765293d5ba84e1c9ad3364f0f5c3003944e12d88 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/interconnection_with_the_cni_network.md @@ -0,0 +1,115 @@ +# Interconnection with the CNI Network + +## Overview + +The container runtime interface \(CRI\) is provided to connect to the CNI network, including parsing the CNI network configuration file and adding or removing a pod to or from the CNI network. When a pod needs to support a network through a container network plug-in such as Canal, the CRI needs to be interconnected to Canal so as to provide the network capability for the pod. + +## Common CNIs + +Common CNIs include CNI network configuration items in the CNI network configuration and pod configuration. These CNIs are visible to users. + +- CNI network configuration items in the CNI network configuration refer to those used to specify the path of the CNI network configuration file, path of the binary file of the CNI network plug-in, and network mode. For details, see [Table 1](#en-us_topic_0183259146_table18221919589). +- CNI network configuration items in the pod configuration refer to those used to set the additional CNI network list to which the pod is added. By default, the pod is added only to the default CNI network plane. You can add the pod to multiple CNI network planes as required. + +**Table 1** CNI network configuration items + + + + + + + + + + + + + + + + + + + + + + + + +

Function

+

Command

+

Configuration File

+

Description

+

Path of the binary file of the CNI network plug-in

+

--cni-bin-dir

+

"cni-bin-dir": "",

+

The default value is /opt/cni/bin.

+

Path of the CNI network configuration file

+

--cni-conf-dir

+

"cni-conf-dir": "",

+

The system traverses all files with the extension .conf, .conflist, or .json in the directory. The default value is /etc/cni/net.d.

+

Network mode

+

--network-plugin

+

"network-plugin": "",

+

Specifies a network plug-in. The value is a null character by default, indicating that no network configuration is available and the created sandbox has only the loop NIC. The CNI and null characters are supported. Other invalid values will cause iSulad startup failure.

+
+ +Additional CNI network configuration mode: + +Add the network plane configuration item "network.alpha.kubernetes.io/network" to annotations in the pod configuration file. + +The network plane is configured in JSON format, including: + +- **name**: specifies the name of the CNI network plane. +- **interface**: specifies the name of a network interface. + +The following is an example of the CNI network configuration method: + +```json +"annotations" : { + "network.alpha.kubernetes.io/network": "{\"name\": \"mynet\", \"interface\": \"eth1\"}" + } +``` + +### CNI Network Configuration Description + +The CNI network configuration includes two types, both of which are in the .json file format. + +- Single-network plane configuration file with the file name extension .conf or .json. For details about the configuration items, see [Table 1](#cni-parameters.md#en-us_topic_0184347952_table425023335913) in the appendix. +- Multi-network plane configuration file with the file name extension .conflist. For details about the configuration items, see [Table 3](#cni-parameters.md#en-us_topic_0184347952_table657910563105) in the appendix. + +### Adding a Pod to the CNI Network List + +If **--network-plugin=cni** is configured for iSulad and the default network plane is configured, a pod is automatically added to the default network plane when the pod is started. If the additional network configuration is configured in the pod configuration, the pod is added to these additional network planes when the pod is started. + +**port\_mappings** in the pod configuration is also a network configuration item, which is used to set the port mapping of the pod. To set port mapping, perform the following steps: + +```json +"port_mappings":[ + { + "protocol": 1, + "container_port": 80, + "host_port": 8080 + } +] +``` + +- **protocol**: protocol used for mapping. The value can be **tcp** \(identified by 0\) or **udp** \(identified by 1\). +- **container\_port**: port through which the container is mapped. +- **host\_port**: port mapped to the host. + +### Removing a Pod from the CNI Network List + +When StopPodSandbox is called, the interface for removing a pod from the CNI network list will be called to clear network resources. + +> [!NOTE]NOTE + +1. Before calling the RemovePodSandbox interface, you must call the StopPodSandbox interface at least once. +2. If StopPodSandbox fails to call the CNI, residual network resources may exist. + +## Usage Restrictions + +- Currently, only CNI 0.3.0 and CNI 0.3.1 are supported. In later versions, CNI 0.1.0 and CNI 0.2.0 may need to be supported. Therefore, when error logs are displayed, the information about CNI 0.1.0 and CNI 0.2.0 is reserved. +- name: The value must contain lowercase letters, digits, hyphens \(-\), and periods \(.\) and cannot be started or ended with a hyphen or period. The value can contain a maximum of 200 characters. +- The number of configuration files cannot exceed 200, and the size of a single configuration file cannot exceed 1 MB. +- The extended parameters need to be configured based on the actual network requirements. Optional parameters do not need to be written into the netconf.json file. diff --git a/docs/en/docs/container_engine/isula_container_engine/isulad_support_cdi.md b/docs/en/docs/container_engine/isula_container_engine/isulad_support_cdi.md new file mode 100644 index 0000000000000000000000000000000000000000..ea68290a0ff7df3675bcfdf1f4972dc53cae3447 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/isulad_support_cdi.md @@ -0,0 +1,120 @@ +# iSulad Support for CDI + +## Overview + +Container Device Interface (CDI) is a container runtime specification used to support third-party devices. + +CDI solves the following problems: +In Linux, only one device node needed to be exposed in a container in the past to enable device awareness of the container. However, as devices and software become more complex, vendors want to perform more operations, such as: + +- Exposing multiple device nodes to a container, mounting files from a runtime namespace to a container, or hiding procfs entries. +- Checking the compatibility between containers and devices. For example, checking whether a container can run on a specified device. +- Performing runtime-specific operations, such as virtual machines and Linux container-based runtimes. +- Performing device-specific operations, such as GPU memory cleanup and FPGA re-programming. + +In the absence of third-party device standards, vendors often have to write and maintain multiple plugins for different runtimes, or even contribute vendor-specific code directly in a runtime. In addition, the runtime does not expose the plugin system in a unified manner (or even not at all), resulting in duplication of functionality in higher-level abstractions (such as Kubernetes device plugins). + +To solve the preceding problem, CDI provides the following features: +CDI describes a mechanism that allows third-party vendors to interact with devices without modifying the container runtime. + +The mechanism is exposed as a JSON file (similar to the container network interface CNI), which allows vendors to describe the operations that the container runtime should perform on the OCI-based container. + +Currently, iSulad supports the [CDI v0.6.0](https://github.com/cncf-tags/container-device-interface/blob/v0.6.0/SPEC.md) specification. + +## Configuring iSulad to Support CDI + +Modify the **daemon.json** file as follows and restart iSulad: + +```json +{ + ... + "enable-cri-v1": true, + "cdi-spec-dirs": ["/etc/cdi", "/var/run/cdi"], + "enable-cdi": true +} +``` + +**cdi-spec-dirs** specifies the directory where CDI specifications are stored. If this parameter is not specified, the default value **/etc/cdi** or **/var/run/cdi** is used. + +## Examples + +### CDI Specification Example + +For details about each field, see [CDI v0.6.0](https://github.com/cncf-tags/container-device-interface/blob/v0.6.0/SPEC.md). + +```bash +$ mkdir /etc/cdi +$ cat > /etc/cdi/vendor.json < + +![](./figures/en-us_image_0183048952.png) diff --git a/docs/en/docs/container_engine/isula_container_engine/privileged_container.md b/docs/en/docs/container_engine/isula_container_engine/privileged_container.md new file mode 100644 index 0000000000000000000000000000000000000000..f682212b7a21885e1e3102d00629cf7c2167bb9b --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/privileged_container.md @@ -0,0 +1,230 @@ +# Privileged Container + +## Scenarios + +By default, iSulad starts common containers that are suitable for starting common processes. However, common containers have only the default permissions defined by capabilities in the **/etc/default/isulad/config.json** directory. To perform privileged operations \(such as use devices in the **/sys** directory\), a privileged container is required. By using this feature, user **root** in the container has **root** permissions of the host. Otherwise, user **root** in the container has only common user permissions of the host. + +## Usage Restrictions + +Privileged containers provide all functions for containers and remove all restrictions enforced by the device cgroup controller. A privileged container has the following features: + +- Secomp does not block any system call. +- The **/sys** and **/proc** directories are writable. +- All devices on the host can be accessed in the container. + +- All system capabilities will be enabled. + +Default capabilities of a common container are as follows: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Capability Key

+

Description

+

SETPCAP

+

Modifies the process capabilities.

+

MKNOD

+

Allows using the system call mknod() to create special files.

+

AUDIT_WRITE

+

Writes records to kernel auditing logs.

+

CHOWN

+

Modifies UIDs and GIDs of files. For details, see the chown(2).

+

NET_RAW

+

Uses RAW and PACKET sockets and binds any IP address to the transparent proxy.

+

DAC_OVERRIDE

+

Ignores the discretionary access control (DAC) restrictions on files.

+

FOWNER

+

Ignores the restriction that the file owner ID must be the same as the process user ID.

+

FSETID

+

Allows setting setuid bits of files.

+

KILL

+

Allows sending signals to processes that do not belong to itself.

+

SETGID

+

Allows the change of the process group ID.

+

SETUID

+

Allows the change of the process user ID.

+

NET_BIND_SERVICE

+

Allows bounding to a port whose number is smaller than 1024.

+

SYS_CHROOT

+

Allows using the system call chroot().

+

SETFCAP

+

Allows transferring and deleting capabilities to other processes.

+
+ +When a privileged container is enabled, the following capabilities are added: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Capability Key

+

Description

+

SYS_MODULE

+

Loads and unloads kernel modules.

+

SYS_RAWIO

+

Allows direct access to /devport, /dev/mem, /dev/kmem, and original block devices.

+

SYS_PACCT

+

Allows the process BSD audit.

+

SYS_ADMIN

+

Allows executing system management tasks, such as loading or unloading file systems and setting disk quotas.

+

SYS_NICE

+

Allows increasing the priority and setting the priorities of other processes.

+

SYS_RESOURCE

+

Ignores resource restrictions.

+

SYS_TIME

+

Allows changing the system clock.

+

SYS_TTY_CONFIG

+

Allows configuring TTY devices.

+

AUDIT_CONTROL

+

Enables and disables kernel auditing, modifies audit filter rules, and extracts audit status and filtering rules.

+

MAC_ADMIN

+

Overrides the mandatory access control (MAC), which is implemented for the Smack Linux Security Module (LSM).

+

MAC_OVERRIDE

+

Allows MAC configuration or status change, which is implemented for Smack LSM.

+

NET_ADMIN

+

Allows executing network management tasks.

+

SYSLOG

+

Performs the privileged syslog(2) operation.

+

DAC_READ_SEARCH

+

Ignores the DAC access restrictions on file reading and catalog search.

+

LINUX_IMMUTABLE

+

Allows modifying the IMMUTABLE and APPEND attributes of a file.

+

NET_BROADCAST

+

Allows network broadcast and multicast access.

+

IPC_LOCK

+

Allows locking shared memory segments.

+

IPC_OWNER

+

Ignores the IPC ownership check.

+

SYS_PTRACE

+

Allows tracing any process.

+

SYS_BOOT

+

Allows restarting the OS.

+

LEASE

+

Allows modifying the FL_LEASE flag of a file lock.

+

WAKE_ALARM

+

Triggers the function of waking up the system, for example, sets the CLOCK_REALTIME_ALARM and CLOCK_BOOTTIME_ALARM timers.

+

BLOCK_SUSPEND

+

Allows blocking system suspension.

+
+ +## Usage Guide + +iSulad runs the **--privileged** command to enable the privilege mode for containers. Do not add privileges to containers unless necessary. Comply with the principle of least privilege to reduce security risks. + +```shell +isula run --rm -it --privileged busybox +``` diff --git a/docs/en/docs/container_engine/isula_container_engine/query_information.md b/docs/en/docs/container_engine/isula_container_engine/query_information.md new file mode 100644 index 0000000000000000000000000000000000000000..0467a52c6196f9dd25112a8b3a23ca7fde7f6fc4 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/query_information.md @@ -0,0 +1,89 @@ +# Querying Information + +## Querying the Service Version + +### Description + +The **isula version** command is run to query the version of the iSulad service. + +### Usage + +```shell +isula version +``` + +### Example + +Query the version information. + +```shell +isula version +``` + +If the iSulad service is running properly, you can view the information about versions of the client, server, and **OCI config**. + +```text +Client: + Version: 1.0.31 + Git commit: fa7f9902738e8b3d7f2eb22768b9a1372ddd1199 + Built: 2019-07-30T04:21:48.521198248-04:00 + +Server: + Version: 1.0.31 + Git commit: fa7f9902738e8b3d7f2eb22768b9a1372ddd1199 + Built: 2019-07-30T04:21:48.521198248-04:00 + +OCI config: + Version: 1.0.0-rc5-dev + Default file: /etc/default/isulad/config.json +``` + +If the iSulad service is not running, only the client information is queried and a message is displayed indicating that the connection times out. + +```text +Client: + Version: 1.0.31 + Git commit: fa7f9902738e8b3d7f2eb22768b9a1372ddd1199 + Built: 2019-07-30T04:21:48.521198248-04:00 + +Can not connect with server.Is the iSulad daemon running on the host? +``` + +Therefore, the **isula version** command is often used to check whether the iSulad service is running properly. + +## Querying System-level Information + +### Description + +The **isula info** command is run to query the system-level information, number of containers, and number of images. + +### Usage + +```shell +isula info +``` + +### Example + +Query system-level information, including the number of containers, number of images, kernel version, and operating system \(OS\). + +```shell +$ isula info +Containers: 2 + Running: 0 + Paused: 0 + Stopped: 2 +Images: 8 +Server Version: 1.0.31 +Logging Driver: json-file +Cgroup Driverr: cgroupfs +Hugetlb Pagesize: 2MB +Kernel Version: 4.19 +Operating System: Fedora 29 (Twenty Nine) +OSType: Linux +Architecture: x86_64 +CPUs: 8 +Total Memory: 7 GB +Name: localhost.localdomain +iSulad Root Dir: /var/lib/isulad +``` diff --git a/docs/en/docs/container_engine/isula_container_engine/security_features.md b/docs/en/docs/container_engine/isula_container_engine/security_features.md new file mode 100644 index 0000000000000000000000000000000000000000..75f953a6c49b6560c6bbe657466f8e0088549a1b --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/security_features.md @@ -0,0 +1,238 @@ +# Security Features + +## Seccomp Security Configuration + +### Scenarios + +Secure computing mode \(seccomp\) is a simple sandboxing mechanism introduced to the Linux kernel from version 2.6.23. In some specific scenarios, you may want to perform some privileged operations in a container without starting the privileged container. You can add **--cap-add** at runtime to obtain some small-scope permissions. For container instances with strict security requirements, th capability granularity may not meet the requirements. You can use some methods to control the permission scope in a refined manner. + +- Example + + In a common container scenario, you can use the **-v** flag to map a directory \(including a binary file that cannot be executed by common users\) on the host to the container. + + In the container, you can add chmod 4777 \(the modification permission of the binary file\) to the S flag bit. In this way, on the host, common users who cannot run the binary file \(or whose running permission is restricted\) can obtain the permissions of the binary file \(such as the root permission\) when running the binary file after the action added to the S flag bit is performed, so as to escalate the permission or access other files. + + In this scenario, if strict security requirements are required, the chmod, fchmod, and fchmodat system calls need to be tailored by using seccomp. + +### Usage Restrictions + +- Seccomp may affect performance. Before setting seccomp, evaluate the scenario and add the configuration only if necessary. + +### Usage Guide + +Use **--security-opt** to transfer the configuration file to the container where system calls need to be filtered. + +```shell +isula run -itd --security-opt seccomp=/path/to/seccomp/profile.json rnd-dockerhub.huawei.com/official/busybox +``` + +>[!NOTE]NOTE + +1. When the configuration file is transferred to the container by using **--security-opt** during container creation, the default configuration file \(**/etc/isulad/seccomp\_default.json**\) is used. +2. When **--security-opt** is set to **unconfined** during container creation, system calls are not filtered for the container. +3. **/path/to/seccomp/profile.json** must be an absolute path. + +#### Obtaining the Default Seccomp Configuration of a Common Container + +- Start a common container \(or a container with **--cap-add**\) and check its default permission configuration. + + ```shell + cat /etc/isulad/seccomp_default.json | python -m json.tool > profile.json + ``` + + The **seccomp** field contains many **syscalls** fields. Then extract only the **syscalls** fields and perform the customization by referring to the customization of the seccomp configuration file. + + ```json + "defaultAction": "SCMP_ACT_ERRNO", + "syscalls": [ + { + "action": "SCMP_ACT_ALLOW", + "name": "accept" + }, + { + "action": "SCMP_ACT_ALLOW", + "name": "accept4" + }, + { + "action": "SCMP_ACT_ALLOW", + "name": "access" + }, + { + "action": "SCMP_ACT_ALLOW", + "name": "alarm" + }, + { + "action": "SCMP_ACT_ALLOW", + "name": "bind" + }, + ]... + ``` + +- Check the seccomp configuration that can be identified by the LXC. + + ```shell + cat /var/lib/isulad/engines/lcr/74353e38021c29314188e29ba8c1830a4677ffe5c4decda77a1e0853ec8197cd/seccomp + ``` + + ```text + ... + waitpid allow + write allow + writev allow + ptrace allow + personality allow [0,0,SCMP_CMP_EQ,0] + personality allow [0,8,SCMP_CMP_EQ,0] + personality allow [0,131072,SCMP_CMP_EQ,0] + personality allow [0,131080,SCMP_CMP_EQ,0] + personality allow [0,4294967295,SCMP_CMP_EQ,0] + ... + ``` + +#### Customizing the Seccomp Configuration File + +When starting a container, use **--security-opt** to introduce the seccomp configuration file. Container instances will restrict the running of system APIs based on the configuration file. Obtain the default seccomp configuration of common containers, obtain the complete template, and customize the configuration file by referring to this section to start the container. + +```shell +isula run --rm -it --security-opt seccomp:/path/to/seccomp/profile.json rnd-dockerhub.huawei.com/official/busybox +``` + +The configuration file template is as follows: + +```json +{ +"defaultAction": "SCMP_ACT_ALLOW", +"syscalls": [ +{ +"name": "syscall-name", +"action": "SCMP_ACT_ERRNO", +"args": null +} +] +} +``` + +>[!TIP]NOTICE +> +> - **defaultAction** and **syscalls**: The types of their corresponding actions are the same, but their values must be different. The purpose is to ensure that each syscall has a default action. Clear definitions in the syscall array shall prevail. As long as the values of **defaultAction** and **action** are different, no action conflicts will occur. The following actions are supported: +> **SCMP\_ACT\_ERRNO**: forbids calling syscalls and displays error information. +> **SCMP\_ACT\_ALLOW**: allows calling syscalls. +> - **syscalls**: array, which can contain one or more syscalls. **args** is optional. +> - **name**: syscalls to be filtered. +> - **args**: array. The definition of each object in the array is as follows: +> +> ```go +> type Arg struct { +> Index uint `json:"index"` // Parameter ID. Take open(fd, buf, len) as an example. The fd corresponds to 0 and buf corresponds to 1. +> Value uint64 `json:"value"` // Value to be compared with the parameter. +> ValueTwo uint64 `json:"value_two"` // It is valid only when Op is set to MaskEqualTo. After the bitwise AND operation is performed on the user-defined value and the value of Value, the result is compared with the value of ValueTwo. If they are the same, the action is executed. +> Op Operator `json:"op"` +> } +> ``` +> +> The value of **Op** in **args** can be any of the following: +> "SCMP\_CMP\_NE": NotEqualTo +> "SCMP\_CMP\_LT": LessThan +> "SCMP\_CMP\_LE": LessThanOrEqualTo +> "SCMP\_CMP\_EQ": EqualTo +> "SCMP\_CMP\_GE": GreaterThanOrEqualTo +> "SCMP\_CMP\_GT": GreaterThan +> "SCMP\_CMP\_MASKED\_EQ": MaskEqualTo + +## capabilities Security Configuration + +### Scenarios + +The capability mechanism is a security feature introduced to Linux kernel after version 2.2. The super administrator permission is controlled at a smaller granularity to prevent the root permission from being used. The root permission is divided based on different domains so that the divided permissions can be enabled or disabled separately. For details about capabilities, see the _Linux Programmer's Manual_ \([capabilities\(7\) - Linux man page](http://man7.org/linux/man-pages/man7/capabilities.7.html)\). + +```shell +man capabilities +``` + +### Usage Restrictions + +- The default capability list \(allowlist\) of the iSulad service, which is carried by common container processes by default, are as follows: + + ```text + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ``` + +- Default configurations of capabilities include **CAP\_SETUID** and **CAP\_FSETID**. If the host and a container share a directory, the container can set permissions for the binary file in the shared directory. Common users on the host can use this feature to elevate privileges. The container can write **CAP\_AUDIT\_WRITE** to the host, which may cause risks. If the application scenario does not require this capability, you are advised to use **--cap-drop** to delete the capability when starting the container. +- Adding capabilities means that the container process has greater capabilities than before. In addition, more system call APIs are opened. + +### Usage Guide + +iSulad uses **--cap-add** or **--cap-drop** to add or delete specific permissions for a container. Do not add extra permissions to the container unless necessary. You are advised to remove the default but unnecessary permissions from the container. + +```shell +isula run --rm -it --cap-add all --cap-drop SYS_ADMIN rnd-dockerhub.huawei.com/official/busybox +``` + +## SELinux Security Configuration + +### Scenarios + +Security-Enhanced Linux \(SELinux\) is a Linux kernel security module that provides a mechanism for supporting access control security policies. Through Multi-Category Security \(MCS\), iSulad labels processes in containers to control containers' access to resources, reducing privilege escalation risks and preventing further damage. + +### Usage Restrictions + +- Ensure that SELinux is enabled for the host and daemon \(the **selinux-enabled** field in the **/etc/isulad/daemon.json** file is set to **true** or **--selinux-enabled** is added to command line parameters\). +- Ensure that a proper SELinux policy has been configured on the host. container-selinux is recommended. +- The introduction of SELinux affects the performance. Therefore, evaluate the scenario before setting SELinux. Enable the SELinux function for the daemon and set the SELinux configuration in the container only when necessary. +- When you configure labels for a mounted volume, the source directory cannot be a subdirectory of **/**, **/usr**, **/etc**, **/tmp**, **/home**, **/run**, **/var**, **/root**, or **/usr**. + +>[!NOTE]NOTE +> +> - iSulad does not support labeling the container file system. To ensure that the container file system and configuration directory are labeled with the container access permission, run the **chcon** command to label them. +> - If SELinux access control is enabled for iSulad, you are advised to add a label to the **/var/lib/isulad** directory before starting daemon. Files and folders generated in the directory during container creation inherit the label by default. For example: +> +> ```shell +> chcon -R system_u:object_r:container_file_t:s0 /var/lib/isulad +> ``` + +### Usage Guide + +- Enable SELinux for daemon. + + ```shell + isulad --selinux-enabled + ``` + +- Configure SELinux security context labels during container startup. + + **--security-opt="label=user:USER"**: Set the label user for the container. + + **--security-opt="label=role:ROLE"**: Set the label role for the container. + + **--security-opt="label=type:TYPE"**: Set the label type for the container. + + **--security-opt="label=level:LEVEL"**: Set the label level for the container. + + **--security-opt="label=disable"**: Disable the SELinux configuration for the container. + + ```shell + $ isula run -itd --security-opt label=type:container_t --security-opt label=level:s0:c1,c2 rnd-dockerhub.huawei.com/official/centos + 9be82878a67e36c826b67f5c7261c881ff926a352f92998b654bc8e1c6eec370 + ``` + +- Add the selinux label to a mounted volume \(**z** indicates the shared mode\). + + ```shell + $ isula run -itd -v /test:/test:z rnd-dockerhub.huawei.com/official/centos + 9be82878a67e36c826b67f5c7261c881ff926a352f92998b654bc8e1c6eec370 + + $ls -Z /test + system_u:object_r:container_file_t:s0 file + ``` diff --git a/docs/en/docs/container_engine/isula_container_engine/supporting_oci_hooks.md b/docs/en/docs/container_engine/isula_container_engine/supporting_oci_hooks.md new file mode 100644 index 0000000000000000000000000000000000000000..49857482cd339544ea6a8e61aa015a034a58cea5 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/supporting_oci_hooks.md @@ -0,0 +1,77 @@ +# Supporting OCI hooks + +## Description + +The running of standard OCI hooks within the lifecycle of a container is supported. There are three types of standard hooks: + +- prestart hook: executed after the **isula start** command is executed and before the init process of the container is started. +- poststart hook: executed after the init process is started and before the **isula start** command is returned. +- poststop hook: executed after the container is stopped and before the stop command is returned. + +The configuration format specifications of OCI hooks are as follows: + +- **path**: \(Mandatory\) The value must be a character string and must be an absolute path. The specified file must have the execute permission. +- **args**: \(Optional\) The value must be a character string array. The syntax is the same as that of **args** in **execv**. +- **env**: \(Optional\) The value must be a character string array. The syntax is the same as that of environment variables. The content is a key-value pair, for example, **PATH=/usr/bin**. +- **timeout**: \(Optional\) The value must be an integer that is greater than 0. It indicates the timeout interval for hook execution. If the running time of the hook process exceeds the configured time, the hook process is killed. + +The hook configuration is in JSON format and usually stored in a file ended with **json**. An example is as follows: + +```json +{ + "prestart": [ + { + "path": "/usr/bin/echo", + "args": ["arg1", "arg2"], + "env": [ "key1=value1"], + "timeout": 30 + }, + { + "path": "/usr/bin/ls", + "args": ["/tmp"] + } + ], + "poststart": [ + { + "path": "/usr/bin/ls", + "args": ["/tmp"], + "timeout": 5 + } + ], + "poststop": [ + { + "path": "/tmp/cleanup.sh", + "args": ["cleanup.sh", "-f"] + } + ] +} +``` + +## APIs + +Both iSulad and iSula provide the hook APIs. The default hook configurations provided by iSulad apply to all containers. The hook APIs provided by iSula apply only to the currently created container. + +The default OCI hook configurations provided by iSulad are as follows: + +- Set the configuration item **hook-spec** in the **/etc/isulad/daemon.json** configuration file to specify the path of the hook configuration file. Example: **"hook-spec": "/etc/default/isulad/hooks/default.json"** +- Use the **isulad --hook-spec** parameter to set the path of the hook configuration file. + +The OCI hook configurations provided by iSula are as follows: + +- **isula create --hook-spec**: specifies the path of the hook configuration file in JSON format. +- **isula run --hook-spec**: specifies the path of the hook configuration file in JSON format. + +The configuration for **run** takes effect in the creation phase. + +## Usage Restrictions + +- The path specified by **hook-spec** must be an absolute path. +- The file specified by **hook-spec** must exist. +- The path specified by **hook-spec** must contain a common text file in JSON format. +- The file specified by **hook-spec** cannot exceed 10 MB. +- **path** configured for hooks must be an absolute path. +- The file that is designated by **path** configured for hooks must exist. +- The file that is designated by **path** configured for hooks must have the execute permission. +- The owner of the file that is designated by **path** configured for hooks must be user **root**. +- Only user **root** has the write permission on the file that is designated by **path** configured for hooks. +- The value of **timeout** configured for hooks must be greater than **0**. diff --git a/docs/en/docs/container_engine/isula_container_engine/uninstallation.md b/docs/en/docs/container_engine/isula_container_engine/uninstallation.md new file mode 100644 index 0000000000000000000000000000000000000000..dfe604c7f451f2c443d095c3a8834851eeab6a31 --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/uninstallation.md @@ -0,0 +1,22 @@ +# Uninstallation + +To uninstall iSulad, perform the following operations: + +1. Uninstall iSulad and its dependent software packages. + - If the **yum** command is used to install iSulad, run the following command to uninstall iSulad: + + ```shell + sudo yum remove iSulad + ``` + + - If the **rpm** command is used to install iSulad, uninstall iSulad and its dependent software packages. Run the following command to uninstall an RPM package. + + ```shell + sudo rpm -e iSulad-xx.xx.xx-YYYYmmdd.HHMMSS.gitxxxxxxxx.aarch64.rpm + ``` + +2. Images, containers, volumes, and related configuration files are not automatically deleted. The reference command is as follows: + + ```shell + sudo rm -rf /var/lib/iSulad + ``` diff --git a/docs/en/docs/container_engine/isula_container_engine/upgrade_methods.md b/docs/en/docs/container_engine/isula_container_engine/upgrade_methods.md new file mode 100644 index 0000000000000000000000000000000000000000..0a5dfda3ad222af401e81f96533369a49d097d3a --- /dev/null +++ b/docs/en/docs/container_engine/isula_container_engine/upgrade_methods.md @@ -0,0 +1,24 @@ +# Upgrade Methods + +- For an upgrade between patch versions of a major version, for example, upgrading 2.x.x to 2.x.x, run the following command: + + ```shell + sudo yum update -y iSulad + ``` + +- For an upgrade between major versions, for example, upgrading 1.x.x to 2.x.x, save the current configuration file **/etc/isulad/daemon.json**, uninstall the existing iSulad software package, install the iSulad software package to be upgraded, and restore the configuration file. + +> [!NOTE]NOTE +> +> - You can run the **sudo rpm -qa \|grep iSulad** or **isula version** command to check the iSulad version. +> - If you want to manually perform upgrade between patch versions of a major version, run the following command to download the RPM packages of iSulad and all its dependent libraries: +> +> ```shell +> sudo rpm -Uhv iSulad-xx.xx.xx-YYYYmmdd.HHMMSS.gitxxxxxxxx.aarch64.rpm +> ``` +> +> If the upgrade fails, run the following command to forcibly perform the upgrade: +> +> ```shell +> sudo rpm -Uhv --force iSulad-xx.xx.xx-YYYYmmdd.HHMMSS.gitxxxxxxxx.aarch64.rpm +> ``` diff --git a/docs/en/docs/container_form/secure_container/_toc.yaml b/docs/en/docs/container_form/secure_container/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8cc3fe58423b796b43c6bbe4d67cf242fe9af3c6 --- /dev/null +++ b/docs/en/docs/container_form/secure_container/_toc.yaml @@ -0,0 +1,19 @@ +label: Secure Container +isManual: true +description: Secure containers integrate virtualization and container technologies to provide enhanced isolation. +sections: + - label: Overview + href: ./overview.md + - label: Installation and Deployment + href: ./installation_and_deployment_2.md + - label: Application Scenarios + href: ./application_scenarios_2.md + sections: + - label: Managing the Lifecycle of a Secure Container + href: ./managing_the_lifecycle_of_a_secure_container.md + - label: Configuring Resources for a Secure Container + href: ./configuring_resources_for_a_secure_container.md + - label: Monitoring Secure Containers + href: ./monitoring_secure_containers.md + - label: Appendix + href: ./appendix_2.md diff --git a/docs/en/docs/container_form/secure_container/appendix_2.md b/docs/en/docs/container_form/secure_container/appendix_2.md new file mode 100644 index 0000000000000000000000000000000000000000..fe0daee801bfb5ea61ecb1a144f2acebf82e7768 --- /dev/null +++ b/docs/en/docs/container_form/secure_container/appendix_2.md @@ -0,0 +1,483 @@ +# Appendix + +## configuration.toml + +> [!NOTE]NOTE +> The value of each field in the **configuration.toml** file is subject to the **configuration.toml** file in the **kata-containers-<**_version_**\>.rpm package**. You cannot set any field in the configuration file. + +```conf +[hypervisor.qemu] +path: specifies the execution path of the virtualization QEMU. +kernel: specifies the execution path of the guest kernel. +initrd: specifies the guest initrd execution path. +image: specifies the execution path of the guest image (not applicable). +machine_type: specifies the type of the analog chip. The value is virt for the ARM architecture and pc for the x86 architecture. +kernel_params: specifies the running parameters of the guest kernel. +firmware: specifies the firmware path. If this parameter is left blank, the default firmware is used. +machine_accelerators: specifies an accelerator. +default_vcpus: specifies the default number of vCPUs for each SB/VM. +default_maxvcpus: specifies the default maximum number of vCPUs for each SB/VM. +default_root_ports: specifies the default number of root ports for each SB/VM. +default_bridges: specifies the default number of bridges for each SB/VM. +default_memory: specifies the default memory size of each SB/VM. The default value is 1024 MiB. +memory_slots: specifies the number of memory slots for each SB/VM. The default value is 10. +memory_offset: specifies the memory offset. The default value is 0. +disable_block_device_use: disables the block device from being used by the rootfs of the container. +shared_fs: specifies the type of the shared file system. The default value is virtio-9p. +virtio_fs_daemon: specifies the path of the vhost-user-fs daemon process. +virtio_fs_cache_size: specifies the default size of the DAX cache. +virtio_fs_cache: specifies the cache mode. +block_device_driver: specifies the driver of a block device. +block_device_cache_set: specifies whether to set cache-related options for a block device. The default value is false. +block_device_cache_direct: specifies whether to enable O_DIRECT. The default value is false. +block_device_cache_noflush: specifies whether to ignore device update requests. The default value is false. +enable_iothreads: enables iothreads. +enable_mem_prealloc: enables VM RAM pre-allocation. The default value is false. +enable_hugepages: enables huge pages. The default value is false. +enable_swap: enables the swap function. The default value is false. +enable_debug: enables QEMU debugging. The default value is false. +disable_nesting_checks: disables nested check. +msize_9p = 8192: specifies the number of bytes transmitted in each 9p packet. +use_vsock: uses vsocks to directly communicate with the agent (the prerequisite is that vsocks is supported). The default value is false. +hotplug_vfio_on_root_bus: enables the hot swap of the VFIO device on the root bus. The default value is false. +disable_vhost_net: disables vhost_net. The default value is false. +entropy_source: specifies the default entropy source. +guest_hook_path: specifies the binary path of the guest hook. + +[factory] +enable_template: enables the VM template. The default value is false. +template_path: specifies the template path. +vm_cache_number: specifies the number of VM caches. The default value is 0. +vm_cache_endpoint: specifies the address of the Unix socket used by the VMCache. The default value is /var/run/kata-containers/cache.sock. + +[proxy.kata] +path: specifies the kata-proxy running path. +enable_debug: enables proxy debugging. The default value is false. + +[shim.kata] +path: specifies the running path of kata-shim. +enable_debug: enables shim debugging. The default value is false. +enable_tracing: enables shim opentracing. + +[agent.kata] +enable_debug: enables the agent debugging function. The default value is false. +enable_tracing: enables the agent tracing function. +trace_mode: specifies the trace mode. +trace_type: specifies the trace type. +enable_blk_mount: enables guest mounting of the block device. + +[netmon] +enable_netmon: enables network monitoring. The default value is false. +path: specifies the kata-netmon running path. +enable_debug: enables netmon debugging. The default value is false. + +[runtime] +enable_debug: enables runtime debugging. The default value is false. +enable_cpu_memory_hotplug: enables CPU and memory hot swap. The default value is false. +internetworking_model: specifies the network interconnection mode between VMs and containers. +disable_guest_seccomp: disables the seccemp security mechanism in the guest application. The default value is true. +enable_tracing: enables runtime opentracing. The default value is false. +disable_new_netns: disables network namespace creation for the shim and hypervisor processes. The default value is false. +experimental: enables the experimental feature, which does not support user-defined configurations. +``` + +## APIs + +**Table 1** Commands related to the kata-runtime network + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Command

+

Subcommand

+

File Example

+

Field

+

Description

+

Remarks

+

kata-network

+
NOTE:
  • The kata-network command must be used in groups. Network devices that are not added using kata-runtime kata-network cannot be deleted or listed using kata-runtime kata-network. The reverse is also true.
  • kata-runtime kata-network imports configuration parameters through a file or stdin.
+
+

add-iface

+
NOTE:
  • An interface can be added to only one container.
  • The execution result is subject to the returned value (non-zero return value).
+
+

  

+

{

+

"device":"tap1",

+

"name":"eth1",

+

"IPAddresses":[{"address":"172.17.1.10","mask":"24"}],

+

"mtu":1300,

+

"hwAddr":"02:42:20:6f:a2:80"

+

"vhostUserSocket":"/usr/local/var/run/openvswitch/vhost-user1"

+

}

+

  

+

device

+

Sets the name of the NIC on a host.

+

Mandatory. The value can contain a maximum of 15 characters, including letters, digits, underscores (\_), hyphens (-), and periods (.). It must start with a letter. The device name must be unique on the same host.

+

name

+

Sets the name of the NIC in the container.

+

Mandatory. The value can contain a maximum of 15 characters, including letters, digits, underscores (\_), hyphens (-), and periods (.). It must start with a letter. Ensure that the name is unique in the same sandbox.

+

IPAddresses

+

Sets the IP address of an NIC.

+

Optional.

+

Currently, one IP address can be configured for each NIC. If no IP address is configured for the NIC, no IP address will be configured in the container, either.

+

mtu

+

Sets the MTU of an NIC.

+

Mandatory.

+

The value ranges from 46 to 9600.

+

hwAddr

+

Sets the MAC address of an NIC.

+

Mandatory.

+

vhostUserSocket

+

Sets the DPDK polling socket path.

+

Optional.

+

The path contains a maximum of 128 bytes. The naming rule can contain digits, letters, and hyphens (-). The path name must start with a letter.

+

del-iface

+

{

+

"name":"eth1"

+

}

+

None

+

Deletes an NIC from a container.

+
NOTE:

When deleting a NIC, you can only delete it based on the name field in the NIC container. Kata does not identify other fields.

+
+

list-ifaces

+

None

+

None

+

Queries the NIC list in a container.

+

None

+

add-route

+

{

+

"dest":"172.17.10.10/24",

+

"gateway":"",

+

"device":"eth1"

+

}

+

dest

+

Sets the network segment corresponding to the route.

+

The value is in the format of <ip>/<mask>. <ip> is mandatory.

+

There are three cases:

+

1. Both IP address and mask are configured.

+

2. If only an IP address is configured, the default mask is 32.

+

3. If "dest":"default" is configured, there is no destination by default. In this case, the gateway needs to be configured.

+

gateway

+

Sets the next-hop gateway of the route.

+

When "dest":"default" is configured, the gateway is mandatory. In other cases, this parameter is optional.

+

device

+

Sets the name of the NIC corresponding to the route.

+

Mandatory.

+

The value contains a maximum of 15 characters.

+

del-route

+

{

+

"dest":"172.17.10.10/24"

+

}

+

None

+

Deletes a container routing rule.

+

dest is mandatory, and both device and gateway are optional.

+
NOTE:

Kata performs fuzzy match based on different fields and deletes the corresponding routing rules.

+
+

list-routes

+

None

+

None

+

Queries the route list in a container.

+

None

+
+ +**Table 2** kata-ipvs command line interfaces + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Command

+

Subcommand

+

Field

+

Parameter

+

Sub-parameter

+

Description

+

Remarks

+

kata-ipvs

+

ipvsadm

+

--parameters

+

-A, --add-service

+

-t, --tcp-service

+

-u, --udp-service

+

Virtual service type.

+

Mandatory. You can select --tcp-service or --udp-service. The format is ip:port. The value of port ranges from 1 to 65535.

+

Example:

+
kata-runtime kata-ipvs ipvsadm --parameters "--add-service --tcp-service 172.17.0.7:80 --scheduler rr --persistent 3000" <container-id>
+

-s, --scheduler

+

Load balancing scheduling algorithm.

+

Mandatory. Value range: rr|wrr|lc|wlc|lblc|lblcr|dh|sh|sed|nq.

+

-p, --persistent

+

Service duration.

+

Mandatory. The value ranges from 1 to 2678400, in seconds.

+

-E, --edit-service

+

-t, --tcp-service

+

-u, --udp-service

+

Virtual service type.

+

Mandatory. You can select --tcp-service or --udp-service. The format is ip:port. The value of port ranges from 1 to 65535.

+

-s, --scheduler

+

Load balancing scheduling algorithm.

+

Mandatory. Value range: rr|wrr|lc|wlc|lblc|lblcr|dh|sh|sed|nq.

+

-p, --persistent

+

Service duration.

+

Mandatory. The value ranges from 1 to 2678400, in seconds.

+

-D, --delete-service

+

-t, --tcp-service

+

-u, --udp-service

+

Virtual service type.

+

Mandatory. You can select --tcp-service or --udp-service. The format is ip:port. The value of port ranges from 1 to 65535.

+

-a, --add-server

+

-t, --tcp-service

+

-u, --udp-service

+

Virtual service type.

+

Mandatory. You can select --tcp-service or --udp-service. The format is ip:port. The value of port ranges from 1 to 65535.

+

Example:

+
kata-runtime kata-ipvs ipvsadm --parameters "--add-server --tcp-service 172.17.0.7:80 --real-server 172.17.0.4:80 --weight 100" <container-id>
+

-r, --real-server

+

Real server address.

+

Mandatory. The format is ip:port. The value of port ranges from 1 to 65535.

+

-w, --weight

+

Weight

+

Optional. The value ranges from 0 to 65535.

+

-e, --edit-server

+

-t, --tcp-service

+

-u, --udp-service

+

Virtual service type.

+

Mandatory. You can select --tcp-service or --udp-service. The format is ip:port. The value of port ranges from 1 to 65535.

+

-r, --real-server

+

Real server address.

+

Mandatory. The format is ip:port. The value of port ranges from 1 to 65535.

+

-w, --weight

+

Weight

+

Optional. The value ranges from 0 to 65535.

+

-d, --delete-server

+

-t, --tcp-service

+

-u, --udp-service

+

Virtual service type.

+

Mandatory. You can select --tcp-service or --udp-service. The format is ip:port. The value of port ranges from 1 to 65535.

+

-r, --real-server

+

Real server address.

+

Mandatory. The format is ip:port. The value of port ranges from 1 to 65535.

+

-L, --list

+

-t, --tcp-service

+

-u, --udp-service

+

Queries virtual service information.

+

Optional.

+

Example:

+
kata-runtime kata-ipvs ipvsadm --parameters "--list --tcp-service ip:port" <container-id>
+

--set

+

--tcp

+

TCP timeout.

+

Mandatory. The value ranges from 0 to 1296000.

+

Example:

+
kata-runtime kata-ipvs ipvsadm --parameters "--set 100 100 200" <container-id>
+

--tcpfin

+

TCP FIN timeout.

+

Mandatory. The value ranges from 0 to 1296000.

+

--udp

+

UDP timeout.

+

Mandatory. The value ranges from 0 to 1296000.

+

--restore

+

-

+

Imports standard inputs in batches.

+

Rule files can be specified.

+

Example:

+
kata-runtime kata-ipvs ipvsadm --restore - < <rule file path> <container-id>
+
NOTE:

By default, the NAT mode is used for adding a single real server. To add real servers in batches, you need to manually add the -m option to use the NAT mode.

+

The following is an example of the rule file content:

+

-A -t 10.10.11.12:100 -s rr -p 3000

+

-a -t 10.10.11.12:100 -r 172.16.0.1:80 -m

+

-a -t 10.10.11.12:100 -r 172.16.0.1:81 -m

+

-a -t 10.10.11.12:100 -r 172.16.0.1:82 -m

+
+

cleanup

+

--parameters

+

-d, --orig-dst

+

Specifies the IP address.

+

Mandatory.

+

Example:

+
kata-runtime kata-ipvs cleanup --parameters "--orig-dst 172.17.0.4 --protonum tcp" <container-id>
+

-p, --protonum

+

Protocol type.

+

Mandatory. The value can be tcp or udp.

+
diff --git a/docs/en/docs/container_form/secure_container/application_scenarios_2.md b/docs/en/docs/container_form/secure_container/application_scenarios_2.md new file mode 100644 index 0000000000000000000000000000000000000000..b9b27b73ea5850fd818daf18819246481c83eec5 --- /dev/null +++ b/docs/en/docs/container_form/secure_container/application_scenarios_2.md @@ -0,0 +1,6 @@ +# Application Scenarios + +This section describes how to use a secure container. + +> [!NOTE]Note +> Using secure containers require root privileges. diff --git a/docs/en/docs/container_form/secure_container/configuring_resources_for_a_secure_container.md b/docs/en/docs/container_form/secure_container/configuring_resources_for_a_secure_container.md new file mode 100644 index 0000000000000000000000000000000000000000..0195cd673781b5bdc219a05cf089331bb275f97f --- /dev/null +++ b/docs/en/docs/container_form/secure_container/configuring_resources_for_a_secure_container.md @@ -0,0 +1,31 @@ +# Configuring Resources for a Secure Container + +The secure container runs on a virtualized and isolated lightweight VM. Therefore, resource configuration is divided into two parts: resource configuration for the lightweight VM, that is, host resource configuration; resource configuration for containers in the VM, that is, guest container resource configuration. The following describes resource configuration for the two parts in detail. + +## Sharing Resources + +Because the secure container runs on a virtualized and isolated lightweight VM, resources in some namespaces on the host cannot be accessed. Therefore, **--net host**, **--ipc host**, **--pid host**, and **--uts host** are not supported during startup. + +When a pod is started, all containers in the pod share the same net namespace and ipc namespace by default. If containers in the same pod need to share the pid namespace, you can use Kubernetes to configure the pid namespace. In Kubernetes 1.11, the pid namespace is disabled by default. + +## Limiting Resources + +Limitations on sandbox resources should be configured in **configuration.toml**. +Common fields are: + +- **default_vcpus**: specifies the default number of virtual CPUs. +- **default_maxvcpus**: specifies the max number of virtual CPUs. +- **default_root_ports**: specifies the default number of Root Ports in SB/VM. +- **default_bridges**: specifies the default number of bridges. +- **default_memory**: specifies the size of memory. The default size is 1024 MiB. +- **memory_slots**: specifies the number of memory slots. The default number is **10**. + +## Limiting Memory Resources Through the Memory Hotplug Feature + +Memory hotplug is a key feature for containers to allocate memory dynamically in deployment. As Kata containers are based on VMs, this feature needs support both from VMM and guest kernel. Luckily, it has been fully supported for the current default version of QEMU and guest kernel used by Kata on ARM64. For other VMMs, e.g, Cloud Hypervisor, the enablement work is on the road. Apart from VMM and guest kernel, memory hotplug also depends on ACPI which depends on firmware. On x86, you can boot a VM using QEMU with ACPI enabled directly, because it boots up with firmware implicitly. For ARM64, however, you need specify firmware explicitly. That is to say, if you are ready to run a normal Kata container on ARM64, what you need extra to do is to install the UEFI ROM before using the memory hotplug feature. + +```shell +pushd $GOPATH/src/github.com/kata-containers/tests +sudo .ci/aarch64/install_rom_aarch64.sh # For Ubuntu only +popd +``` diff --git a/docs/en/docs/container_form/secure_container/figures/kata-arch.png b/docs/en/docs/container_form/secure_container/figures/kata-arch.png new file mode 100644 index 0000000000000000000000000000000000000000..60fbb602d94cf7a8e13bd6ecb520c99e574037e6 Binary files /dev/null and b/docs/en/docs/container_form/secure_container/figures/kata-arch.png differ diff --git a/docs/en/docs/container_form/secure_container/figures/relationship-between-the-secure-container-and-peripheral-components.png b/docs/en/docs/container_form/secure_container/figures/relationship-between-the-secure-container-and-peripheral-components.png new file mode 100644 index 0000000000000000000000000000000000000000..454fc025ecb88fef09472eef7cb29ca7a8164856 Binary files /dev/null and b/docs/en/docs/container_form/secure_container/figures/relationship-between-the-secure-container-and-peripheral-components.png differ diff --git a/docs/en/docs/container_form/secure_container/installation_and_deployment_2.md b/docs/en/docs/container_form/secure_container/installation_and_deployment_2.md new file mode 100644 index 0000000000000000000000000000000000000000..10a75a9ac2113ad4d7348d9bab7f3b999673baa5 --- /dev/null +++ b/docs/en/docs/container_form/secure_container/installation_and_deployment_2.md @@ -0,0 +1,114 @@ +# Installation and Deployment + +## Installation Methods + +### Prerequisites + +- The root permission is required for installing a Kata container. +- For better performance experience, a Kata container needs to run on the bare metal server and cannot run on VMs. +- A Kata container depends on the following components \(openEuler 1.0 version\). Ensure that the required components have been installed in the environment. To install iSulad, refer to [Installation Configuration](../../container_engine/isula_container_engine/installation_configuration.md). + - docker-engine + - qemu + +### Installation Procedure + +Released Kata container components are integrated in the **kata-containers-**_version_**.rpm** package. You can run the **rpm** command to install the corresponding software. + +```shell +rpm -ivh kata-containers-.rpm +``` + +## Deployment Configuration + +### Configuring the Docker Engine + +To enable the Docker engine to support kata-runtime, perform the following steps to configure the Docker engine: + +1. Ensure that all software packages \(**docker-engine** and **kata-containers**\) have been installed in the environment. +2. Stop the Docker engine. + + ```shell + systemctl stop docker + ``` + +3. Modify the configuration file **/etc/docker/daemon.json** of the Docker engine and add the following configuration: + + ```json + { + "runtimes": { + "kata-runtime": { + "path": "/usr/bin/kata-runtime", + "runtimeArgs": [ + "--kata-config", + "/usr/share/defaults/kata-containers/configuration.toml" + ] + } + } + } + ``` + +4. Restart the Docker engine. + + ```shell + systemctl start docker + ``` + +### iSulad Configuration + +To enable the iSulad to support the new container runtime kata-runtime, perform the following steps which are similar to those for the container engine docker-engine: + +1. Ensure that all software packages \(iSulad and kata-containers\) have been installed in the environment. +2. Stop iSulad. + + ```shell + systemctl stop isulad + ``` + +3. Modify the **/etc/isulad/daemon.json** configuration file of the iSulad and add the following configurations: + + ```json + { + "runtimes": { + "kata-runtime": { + "path": "/usr/bin/kata-runtime", + "runtime-args": [ + "--kata-config", + "/usr/share/defaults/kata-containers/configuration.toml" + ] + } + } + } + ``` + +4. Restart iSulad. + + ```shell + systemctl start isulad + ``` + +### Configuration.toml + +The Kata container provides a global configuration file configuration.toml. Users can also customize the path and configuration options of the Kata container configuration file. + +In the **runtimeArges** field of Docker engine, you can use **--kata-config** to specify a private file. The default configuration file path is **/usr/share/defaults/kata-containers/configuration.toml**. + +The following lists the common fields in the configuration file. For details about the configuration file options, see [configuration.toml](./appendix_2.md#configurationtoml). + +1. hypervisor.qemu + - **path**: specifies the execution path of the virtualization QEMU. + - **kernel**: specifies the execution path of the guest kernel. + - **initrd**: specifies the guest initrd execution path. + - **machine\_type**: specifies the type of the analog chip. The value is **virt** for the ARM architecture and **pc** for the x86 architecture. + - **kernel\_params**: specifies the running parameters of the guest kernel. + +2. proxy.kata + - **path**: specifies the kata-proxy running path. + - **enable\_debug**: enables the debugging function for the kata-proxy process. + +3. agent.kata + - **enable\_blk\_mount**: enables guest mounting of the block device. + - **enable\_debug**: enables the debugging function for the kata-agent process. + +4. runtime + - **enable\_cpu\_memory\_hotplug**: enables CPU and memory hot swap. + - **enable\_debug**: enables debugging for the kata-runtime process. diff --git a/docs/en/docs/container_form/secure_container/managing_the_lifecycle_of_a_secure_container.md b/docs/en/docs/container_form/secure_container/managing_the_lifecycle_of_a_secure_container.md new file mode 100644 index 0000000000000000000000000000000000000000..d8b2ac745ca4c9de680d5abcc6f36647105bd3ee --- /dev/null +++ b/docs/en/docs/container_form/secure_container/managing_the_lifecycle_of_a_secure_container.md @@ -0,0 +1,93 @@ +# Managing the Lifecycle of a Secure Container + +## Starting a Secure Container + +You can use the Docker engine or iSulad as the container engine of the secure container. The invoking methods of the two engines are similar. You can select either of them to start a secure container. + +To start a secure container, perform the following steps: + +1. Ensure that the secure container component has been correctly installed and deployed. +2. Prepare the container image. If the container image is busybox, run the following commands to download the container image using the Docker engine or iSulad: + + ```shell + docker pull busybox + ``` + + ```shell + isula pull busybox + ``` + +3. Start a secure container. Run the following commands to start a secure container using the Docker engine and iSulad: + + ```shell + docker run -tid --runtime kata-runtime --network none busybox + ``` + + ```shell + isula run -tid --runtime kata-runtime --network none busybox + ``` + + > [!NOTE]NOTE + > The secure container supports the CNI network only and does not support the CNM network. The **-p** and **--expose** options cannot be used to expose container ports. When using a secure container, you need to specify the **--net=none** option + +4. Start a pod. + 1. Start the pause container and obtain the sandbox ID of the pod based on the command output. Run the following commands to start a pause container using the Docker engine and iSulad: + + ```shell + docker run -tid --runtime kata-runtime --network none --annotation io.kubernetes.docker.type=podsandbox + ``` + + ```shell + isula run -tid --runtime kata-runtime --network none --annotation io.kubernetes.cri.container-type=sandbox + ``` + + 2. Create a service container and add it to the pod. Run the following commands to create a service container using the Docker engine and iSulad: + + ```shell + docker run -tid --runtime kata-runtime --network none --annotation io.kubernetes.docker.type=container --annotation io.kubernetes.sandbox.id= busybox + ``` + + ```shell + isula run -tid --runtime kata-runtime --network none --annotation io.kubernetes.cri.container-type=container --annotation io.kubernetes.cri.sandbox-id= busybox + ``` + + **--annotation** is used to mark the container type, which is provided by the Docker engine and iSulad, but not provided by the open-source Docker engine in the upstream community. + +## Stopping a Secure Container + +- Run the following command to stop a secure container: + + ```shell + docker stop + ``` + +- Stop a pod. + + When stopping a pod, note that the lifecycle of the pause container is the same as that of the pod. Therefore, stop service containers before the pause container. + +## Deleting a Secure Container + +Ensure that the container has been stopped. + +```shell +docker rm +``` + +To forcibly delete a running container, run the **-f** command. + +```shell +docker rm -f +``` + +## Running a New Command in the Container + +The pause container functions only as a placeholder container. Therefore, if you start a pod, run a new command in the service container. The pause container does not execute the corresponding command. If only one container is started, run the following command directly: + +```shell +docker exec -ti +``` + +> [!NOTE]NOTE + +1. If the preceding command has no response because another host runs the **docker restart** or **docker stop** command to access the same container, you can press **Ctrl**+**P**+**Q** to exit the operation. +2. If the **-d** option is used, the command is executed in the background and no error information is displayed. The exit code cannot be used to determine whether the command is executed correctly. diff --git a/docs/en/docs/container_form/secure_container/monitoring_secure_containers.md b/docs/en/docs/container_form/secure_container/monitoring_secure_containers.md new file mode 100644 index 0000000000000000000000000000000000000000..c6a2ef3285ea5b3504521315d711ab81f0644dbf --- /dev/null +++ b/docs/en/docs/container_form/secure_container/monitoring_secure_containers.md @@ -0,0 +1,54 @@ +# Monitoring Secure Containers + +## Description + +In kata 2.x, events subcommand is removed and replaced by **kata-runtime metrics**, which can be used to gather metrics associated with infrastructure used to run a sandbox, including virtual machine stats, shim v2 CPU seconds and CPU stat of guest OS and so on. Metrics are organized in a Prometheus compatible format so that they can be easily uploaded to Prometheus when work with kata-monitor. + +## Usage + +```shell +kata-runtime metrics +``` + +## Prerequisites + +The sandbox ID must be the full ID. The sandbox to be queried must be in the **running** state. Otherwise, the following error message will be displayed: "Container ID \(\) does not exist". + +When using annotation to make a container run in a specific sandbox, clients should not use kata-runtime metrics to gather metrics of that container. The correct way is to query the corresponding sandbox. + +This command can be used to query the status of only one container. + +## Example + +```shell +$ kata-runtime metrics e2270357d23f9d3dd424011e1e70aa8defb267d813c3d451db58f35aeac97a04 + +# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles. +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds{quantile="0"} 2.656e-05 +go_gc_duration_seconds{quantile="0.25"} 3.345e-05 +go_gc_duration_seconds{quantile="0.5"} 3.778e-05 +go_gc_duration_seconds{quantile="0.75"} 4.657e-05 +go_gc_duration_seconds{quantile="1"} 0.00023001 +go_gc_duration_seconds_sum 0.00898126 +go_gc_duration_seconds_count 195 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 27 +# HELP go_info Information about the Go environment. +# TYPE go_info gauge +go_info{version="go1.17.3"} 1 +# HELP kata_hypervisor_netdev Net devices statistics. +# TYPE kata_hypervisor_netdev gauge +kata_hypervisor_netdev{interface="lo",item="recv_bytes"} 0 +kata_hypervisor_netdev{interface="lo",item="recv_compressed"} 0 +kata_hypervisor_netdev{interface="lo",item="recv_drop"} 0 +kata_hypervisor_netdev{interface="lo",item="recv_errs"} 0 +kata_hypervisor_netdev{interface="lo",item="recv_fifo"} 0 +kata_hypervisor_netdev{interface="lo",item="recv_frame"} 0 +kata_hypervisor_netdev{interface="lo",item="recv_multicast"} 0 +kata_hypervisor_netdev{interface="lo",item="recv_packets"} 0 +kata_hypervisor_netdev{interface="lo",item="sent_bytes"} 0 +kata_hypervisor_netdev{interface="lo",item="sent_carrier"} 0 +kata_hypervisor_netdev{interface="lo",item="sent_colls"} 0 +``` diff --git a/docs/en/docs/container_form/secure_container/overview.md b/docs/en/docs/container_form/secure_container/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..888acc462c78f44e3b8d7fa09eac4141d14111b7 --- /dev/null +++ b/docs/en/docs/container_form/secure_container/overview.md @@ -0,0 +1,29 @@ +# Secure Container + +## Overview + +The secure container technology is an organic combination of virtualization and container technologies. Compared with a common Linux container, a secure container has better isolation performance. + +Common Linux containers use namespaces to isolate the running environment between processes and use cgroups to limit resources. Essentially, these common Linux containers share the same kernel. Therefore, if a single container affects the kernel intentionally or unintentionally, the containers on the same host will be affected. + +Secure containers are isolated by the virtualization layers. Containers on the same host do not affect each other. + +**Figure 1** Secure container architecture + +![](./figures/kata-arch.png) + +Secure containers are closely related to the concept of pod in Kubernetes. Kubernetes is the open-source ecosystem standard for the container scheduling management platform. It defines a group of container runtime interfaces \(CRIs\). + +In the CRI standards, a pod is a logical grouping of one or more containers, which are scheduled together and share interprocess communication \(IPC\) and network namespaces. As the smallest unit for scheduling, a pod must contain a pause container and one or more service containers. The lifecycle of a pause container is the same as that of the pod. + +A lightweight virtual machine \(VM\) in a secure container is a pod. The first container started in the VM is the pause container, and the containers started later are service containers. + +In a secure container, you can start a single container or start a pod. + +[Figure 2](#fig17734185518269) shows the relationship between the secure container and peripheral components. + +**Figure 2** Relationship between the secure container and peripheral components +![](./figures/relationship-between-the-secure-container-and-peripheral-components.png) + +> [!NOTE]Note +> Root privileges are necessary for installing and operating secure containers. diff --git a/docs/en/docs/container_form/system_container/_toc.yaml b/docs/en/docs/container_form/system_container/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..787973f150ba9ad0f1218d84c8494d52c441cd35 --- /dev/null +++ b/docs/en/docs/container_form/system_container/_toc.yaml @@ -0,0 +1,35 @@ +label: System Container +isManual: true +description: System containers tackle the issues of migrating heavy applications and services to the cloud in scenarios requiring intensive computation, high performance, and massive concurrency. +sections: + - label: Overview + href: ./overview.md + - label: Installation Guideline + href: ./installation_guideline.md + - label: Usage Guide + href: ./usage_guide.md + sections: + - label: Specifying Rootfs to Create a Container + href: ./specifying_rootfs_to_create_a_container.md + - label: Using systemd to Start a Container + href: ./using_systemd_to_start_a_container.md + - label: Reboot or Shutdown in a Container + href: ./reboot_or_shutdown_in_a_container.md + - label: Configurable Cgroup Path + href: ./configurable_cgroup_path.md + - label: Writable Namespace Kernel Parameters + href: ./writable_namespace_kernel_parameters.md + - label: Shared Memory Channels + href: ./shared_memory_channels.md + - label: Dynamically Loading the Kernel Module + href: ./dynamically_loading_the_kernel_module.md + - label: Environment Variable Persisting + href: ./environment_variable_persisting.md + - label: Maximum Number of Handles + href: ./maximum_number_of_handles.md + - label: Security and Isolation + href: ./security_and_isolation.md + - label: Dynamically Managing Container Resources (syscontainer_tools) + href: ./dynamically_managing_container_resources_syscontainer_tools.md + - label: Appendix + href: ./appendix_1.md diff --git a/docs/en/docs/container_form/system_container/appendix_1.md b/docs/en/docs/container_form/system_container/appendix_1.md new file mode 100644 index 0000000000000000000000000000000000000000..cc93feee79fbfbabfc57d3b8c8ad28e0d06c46e9 --- /dev/null +++ b/docs/en/docs/container_form/system_container/appendix_1.md @@ -0,0 +1,88 @@ +# Appendix + +## Command Line Interface List + +This section lists commands in system containers, which are different from those in common containers. For details about other commands, refer to sections related to the iSulad container engine or run the **isula _XXX_ --help** command. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Command

+

Parameters

+

Value Description

+

isula create/run

+

--external-rootfs

+
  • Variable of the string type.
  • Absolute path on the host.
  • Specifies the rootfs of a VM when running a system container.
+

--system-container

+
  • Boolean variable.
  • Specifies whether a container is a system container. In a system container scenario, this function must be enabled.
+

--add-host

+
  • Variable of the string type.
  • Specifies the hosts configuration for a container. The format is hostname:ip. Multiple values can be set.
+

--dns, --dns-option, --dns-search

+
  • Variable of the string type.
  • Specifies the DNS configuration for a container. Multiple values can be set.
+

--ns-change-opt

+
  • Variable of the string type.
  • Container namespace kernel parameter. The value can only be net or ipc. If multiple values are set, separate them with commas (,), for example, --ns-change-opt=net,ipc.
+

--oom-kill-disable

+
  • Boolean variable.
  • Indicates whether to enable the oom-kill-disable function.
+

--shm-size

+
  • Variable of the string type.
  • Sets the size of /dev/shm. The default value is 64 MB. The unit can be byte (B), kilobyte (KB), megabyte (MB), gigabyte (GB), terabyte (TB), or petabyte (PB).
+

--sysctl

+
  • Variable of the string type.
  • Specifies container kernel parameters. The format is key=value. Multiple values can be set. The sysctl whitelist is as follows:
+

kernel.msgmax, kernel.msgmnb, kernel.msgmni, kernel.sem, kernel.shmall, kernel.shmmax, kernel.shmmni, kernel.shm_rmid_forced, kernel.pid_max, net., and fs.mqueue

+
NOTE:

The kernel.pid_max kernel parameter in a container must be able to be namespaced. Otherwise, an error is reported.

+

Parameter restrictions (including the parameter types and value ranges) of the sysctl whitelist in a container must be the same as those of kernel parameters in the physical machine.

+
+

--env-target-file

+
  • Variable of the string type.
  • Specifies the env persistent file path. (The path must be an absolute path and the file must be in the rootfs directory.) The file size cannot exceed 10 MB. If the value of --env conflicts with that of env in the file, the value of --env takes effect.
  • The root directory of the absolute path is the rootfs root directory. That is, to set the file path to /etc/environment in the container, you need to specify env-target-file=/etc/environment only.
+

--cgroup-parent

+
  • Variable of the string type.
  • Specifies the cgroup parent directory of a container. The cgroup root directory is /sys/fs/cgroup/controller.
+

--host-channel

+
  • Variable of the string type.
  • Specifies the memory space shared between the host and a container (tmpfs). The format is as follows:
+

host path:container path:rw/ro:size limit

+

--files-limit

+
  • Variable of the string type.
  • Specifies the maximum number of file handles in a container. The value must be an integer.
+

--user-remap

+
  • Variable of the string type.
  • The parameter format is uid:gid:offset.
+
diff --git a/docs/en/docs/container_form/system_container/configurable_cgroup_path.md b/docs/en/docs/container_form/system_container/configurable_cgroup_path.md new file mode 100644 index 0000000000000000000000000000000000000000..d8579d723b8ad836f93fc039efa03460b70625da --- /dev/null +++ b/docs/en/docs/container_form/system_container/configurable_cgroup_path.md @@ -0,0 +1,94 @@ +# Configurable Cgroup Path + +## Function Description + +System containers provide the capabilities of isolating and reserving container resources on hosts. You can use the **--cgroup-parent** parameter to specify the cgroup directory used by a container to another directory, thereby flexibly allocating host resources. For example, if the cgroup parent path of containers A, B, and C is set to **/lxc/cgroup1**, and the cgroup parent path of containers D, E, and F is set to **/lxc/cgroup2**, the containers are divided into two groups through the cgroup paths, implementing resource isolation at the cgroup level. + +## Parameter Description + + + + + + + + + + + +

Command

+

Parameter

+

Value Description

+

isula create/run

+

--cgroup-parent

+
  • Variable of the string type.
  • Specifies the cgroup parent path of the container.
+
+ +In addition to specifying the cgroup parent path for a system container using commands, you can also specify the cgroup paths of all containers by modifying the startup configuration files of the iSulad container engine. + + + + + + + + + + + + +

Configuration File Path

+

Parameter

+

Description

+

/etc/isulad/daemon.json

+

--cgroup-parent

+
  • Variable of the string type.
  • Specifies the default cgroup parent path of the container.
  • Example: "cgroup-parent": "/lxc/mycgroup"
+
+ +## Constraints + +- If the **cgroup parent** parameter is set on both the daemon and client, the value specified on the client takes effect. +- If container A is started before container B, the cgroup parent path of container B is specified as the cgroup path of container A. When deleting a container, you need to delete container B and then container A. Otherwise, residual cgroup resources exist. + +## Example + +Start a system container and specify the **--cgroup-parent** parameter. + +```shell +[root@localhost ~]# isula run -tid --cgroup-parent /lxc/cgroup123 --system-container --external-rootfs /root/myrootfs none init +115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +``` + +Check the cgroup information of the init process in the container. + +```shell +[root@localhost ~]# isula inspect -f "{{json .State.Pid}}" 11 +22167 +[root@localhost ~]# cat /proc/22167/cgroup +13:blkio:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +12:perf_event:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +11:cpuset:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +10:pids:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +9:rdma:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +8:devices:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +7:hugetlb:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +6:memory:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +5:net_cls,net_prio:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +4:cpu,cpuacct:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +3:files:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +2:freezer:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +1:name=systemd:/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e/init.scope +0::/lxc/cgroup123/115878a4dfc7c5b8c62ef8a4b44f216485422be9a28f447a4b9ecac4609f332e +``` + +The cgroup parent path of the container is set to **/sys/fs/cgroup/**__**/lxc/cgroup123**. + +In addition, you can configure the container daemon file to set the cgroup parent paths for all containers. For example: + +```text +{ + "cgroup-parent": "/lxc/cgroup123", +} +``` + +Restart the container engine for the configuration to take effect. diff --git a/docs/en/docs/container_form/system_container/dynamically_loading_the_kernel_module.md b/docs/en/docs/container_form/system_container/dynamically_loading_the_kernel_module.md new file mode 100644 index 0000000000000000000000000000000000000000..9a7b159f56f5ed8efc21109b3edf4b3bfe969e8d --- /dev/null +++ b/docs/en/docs/container_form/system_container/dynamically_loading_the_kernel_module.md @@ -0,0 +1,52 @@ +# Dynamically Loading the Kernel Module + +## Function Description + +Services in a container may depend on some kernel modules. You can set environment variables to dynamically load the kernel modules required by services in the container to the host before the system container starts. This feature must be used together with isulad-hooks. For details, see **Dynamically Managing Container Resources (syscontainer-tools)**. + +## Parameter Description + + + + + + + + + + + +

Command

+

Parameter

+

Value Description

+

isula create/run

+

-e KERNEL_MODULES=module_name1,module_name

+
  • Variable of the string type.
  • This parameter can be set to multiple modules. Use commas (,) to separate module names.
+
+ +## Constraints + +- If loaded kernel modules are not verified or conflict with existing modules on the host, an unpredictable error may occur on the host. Therefore, exercise caution when loading kernel modules. +- Dynamic kernel module loading transfers kernel modules to be loaded to containers. This function is implemented by capturing environment variables for container startup using isulad-tools. Therefore, this function relies on the proper installation and deployment of isulad-tools. +- Loaded kernel modules need to be manually deleted. + +## Example + +When starting a system container, specify the **-e KERNEL\_MODULES** parameter. After the system container is started, the ip\_vs module is successfully loaded to the kernel. + +```shell +[root@localhost ~]# lsmod | grep ip_vs +[root@localhost ~]# isula run -tid -e KERNEL_MODULES=ip_vs,ip_vs_wrr --hook-spec /etc/isulad-tools/hookspec.json --system-container --external-rootfs /root/myrootfs none init +ae18c4281d5755a1e153a7bff6b3b4881f36c8e528b9baba8a3278416a5d0980 +[root@localhost ~]# lsmod | grep ip_vs +ip_vs_wrr 16384 0 +ip_vs 176128 2 ip_vs_wrr +nf_conntrack 172032 7 xt_conntrack,nf_nat,nf_nat_ipv6,ipt_MASQUERADE,nf_nat_ipv4,nf_conntrack_netlink,ip_vs +nf_defrag_ipv6 20480 2 nf_conntrack,ip_vs +libcrc32c 16384 3 nf_conntrack,nf_nat,ip_vs +``` + +>[!NOTE]NOTE + +- isulad-tools must be installed on the host. +- **--hooks-spec** must be set to **isulad hooks**. diff --git a/docs/en/docs/container_form/system_container/dynamically_managing_container_resources_syscontainer_tools.md b/docs/en/docs/container_form/system_container/dynamically_managing_container_resources_syscontainer_tools.md new file mode 100644 index 0000000000000000000000000000000000000000..f58f738605349cea307220a02a7a0debb9a350ac --- /dev/null +++ b/docs/en/docs/container_form/system_container/dynamically_managing_container_resources_syscontainer_tools.md @@ -0,0 +1,474 @@ +# Dynamically Managing Container Resources \(syscontainer-tools\) + +Resources in common containers cannot be managed. For example, a block device cannot be added to a common container, and a physical or virtual NIC cannot be inserted to a common container. In the system container scenario, the syscontainer-tools can be used to dynamically mount or unmount block devices, network devices, routes, and volumes for containers. + +To use this function, you need to install the syscontainer-tools first. + +```shell +[root@localhost ~]# yum install syscontainer-tools +``` + +## Device Management + +### Function Description + +syscontainer-tools allows you to add block devices \(such as disks and logical volume managers\) or character devices \(such as GPUs, binners, and FUSEs\) on the host to a container. The devices can be used in the container. For example, you can run the **fdisk** command to format the disk and write data to the file system. If the devices are not required, syscontainer-tools allows you to delete them from the container and return them to the host. + +### Command Format + +```shell +syscontainer-tools [COMMAND][OPTIONS] [ARG...] +``` + +In the preceding format: + +**COMMAND**: command related to device management. + +**OPTIONS**: option supported by the device management command. + +**container\_id**: container ID. + +**ARG**: parameter corresponding to the command. + +### Parameter Description + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Command

+

Function Description

+

Option Description

+

Parameter Description

+

add-device

+

Adds block devices or character devices on the host to a container.

+

Supported options are as follows:

+
  • --blkio-weight-device: sets the I/O weight (relative weight, ranging from 10 to 100) of a block device.
  • --device-read-bps: sets the read rate limit for the block device (byte/s).
  • --device-read-iops: sets the read rate limit for the block device (I/O/s).
  • --device-write-bps: sets the write rate limit for the block device (byte/s).
  • --device-write-iops: sets the write rate limit for the block device (I/O/s).
  • --follow-partition: If a block device is a basic block device (primary SCSI block disk), set this parameter to add all partitions of the primary disk.
  • --force: If any block device or character device already exists in the container, use this parameter to overwrite the old block device or character device files.
  • --update-config-only: updates configuration files only and does not add disks.
+

Parameter format: hostdevice[:containerdevice][:permission] [hostdevice[:containerdevice][:permission]]

+

In the preceding format:

+

hostdevice: path on the host for storing a device.

+

containerdevice: path on the container for storing a device.

+

permission: operation permission on a device within the container.

+

remove-device

+

Deletes block devices or character devices from a container and restores them to the host.

+

Supported options are as follows:

+

--follow-partition: If a block device is a basic block device (primary SCSI block disk), set this parameter to delete all partitions of the primary disk in the container, and restore them to the host.

+

Parameter format: hostdevice[:containerdevice] [hostdevice[:containerdevice]]

+

In the preceding format:

+

hostdevice: path on the host for storing a device.

+

containerdevice: path on the container for storing a device.

+

list-device

+

Lists all block devices or character devices in a container.

+

Supported options are as follows:

+
  • --pretty: outputs data in JSON format.
  • --sub-partition: For a primary disk, add this flag to display the primary disk and its sub-partitions.
+

None

+

update-device

+

Updates the disk QoS.

+

Supported options are as follows:

+
  • --device-read-bps: sets the read rate limit for the block device (byte/s). You are advised to set this parameter to a value greater than or equal to 1024.
  • --device-read-iops: sets the read rate limit for the block device (I/O/s).
  • --device-write-bps: sets the write rate limit for the block device (byte/s). You are advised to set this parameter to a value greater than or equal to 1024.
  • --device-write-iops: sets the write rate limit for the block device (I/O/s).
+

None

+
+ +### Constraints + +- You can add or delete devices when container instances are not running. After the operation is complete, you can start the container to view the device status. You can also dynamically add a device when the container is running. +- Do not concurrently run the **fdisk** command to format disks in a container and on the host. Otherwise, the container disk usage will be affected. +- When you run the **add-device** command to add a disk to a specific directory of a container, if the parent directory in the container is a multi-level directory \(for example, **/dev/a/b/c/d/e**\) and the directory level does not exist, syscontainer-tools will automatically create the corresponding directory in the container. When the disk is deleted, the created parent directory is not deleted. If you run the **add-device** command to add a device to this parent directory again, a message is displayed, indicating that a device already exists and cannot be added. +- When you run the**add-device** command to add a disk or update disk parameters, you need to configure the disk QoS. Do not set the write or read rate limit for the block device \(I/O/s or byte/s\) to a small value. If the value is too small, the disk may be unreadable \(the actual reason is the speed is too slow\), affecting service functions. +- When you run the **--blkio-weight-device** command to limit the weight of a specified block device, if the block device supports only the BFQ mode, an error may be reported, prompting you to check whether the current OS environment supports setting the weight of the BFQ block device. + +### Example + +- Start a system container, and set **hook spec** to the isulad hook execution script. + + ```shell + [root@localhost ~]# isula run -tid --hook-spec /etc/syscontainer-tools/hookspec.json --system-container --external-rootfs /root/root-fs none init + eed1096c8c7a0eca6d92b1b3bc3dd59a2a2adf4ce44f18f5372408ced88f8350 + ``` + +- Add a block device to a container. + + ```shell + [root@localhost ~]# syscontainer-tools add-device ee /dev/sdb:/dev/sdb123 + Add device (/dev/sdb) to container(ee,/dev/sdb123) done. + [root@localhost ~]# isula exec ee fdisk -l /dev/sdb123 + Disk /dev/sdb123: 50 GiB, 53687091200 bytes, 104857600 sectors + Units: sectors of 1 * 512 = 512 bytes + Sector size (logical/physical): 512 bytes / 512 bytes + I/O size (minimum/optimal): 512 bytes / 512 bytes + Disklabel type: dos + Disk identifier: 0xda58a448 + + Device Boot Start End Sectors Size Id Type + /dev/sdb123p1 2048 104857599 104855552 50G 5 Extended + /dev/sdb123p5 4096 104857599 104853504 50G 83 Linux + ``` + +- Update the device information. + + ```shell + [root@localhost ~]# syscontainer-tools update-device --device-read-bps /dev/sdb:10m ee + Update read bps for device (/dev/sdb,10485760) done. + ``` + +- Delete a device. + + ```shell + [root@localhost ~]# syscontainer-tools remove-device ee /dev/sdb:/dev/sdb123 + Remove device (/dev/sdb) from container(ee,/dev/sdb123) done. + Remove read bps for device (/dev/sdb) done. + ``` + +## NIC Management + +### Function Description + +syscontainer-tools allows you to insert physical or virtual NICs on the host to a container. If the NICs are not required, syscontainer-tools allows you to delete them from the container and return them to the host. In addition, the NIC configurations can be dynamically modified. To insert a physical NIC, add the NIC on the host to the container. To insert a virtual NIC, create a veth pair and insert its one end to the container. + +### Command Format + +```shell +syscontainer-tools [COMMAND][OPTIONS] +``` + +In the preceding format: + +**COMMAND**: command related to NIC management. + +**OPTIONS**: option supported by the NIC management command. + +**container\_id**: container ID. + +### Parameter Description + + + + + + + + + + + + + + + + + + + + + + + +

Command

+

Function Description

+

Option Description

+

add-nic

+

Creates an NIC for a container.

+

Supported options are as follows:

+
  • --type: specifies the NIC type. Only eth and veth are supported.
  • --name: specifies the NIC name. The format is [host:]container. If host is not specified, a random value is used.
  • --ip: specifies the NIC IP address.
  • --mac: specifies the NIC MAC address.
  • --bridge: specifies the network bridge bound to the NIC.
  • --mtu: specifies the MTU value of the NIC. The default value is 1500.
  • --update-config-only: If this flag is set, only configuration files are updated and NICs are not added.
  • --qlen: specifies the value of QLEN. The default value is 1000.
+

remove-nic

+

Deletes NICs from a container and restores them to the host.

+

Supported options are as follows:

+
  • --type: specifies the NIC type.
  • --name: specifies the name of the NIC. The format is [host:]container.
+

list-nic

+

Lists all NICs in a container.

+

Supported options are as follows:

+
  • --pretty: outputs data in JSON format.
  • --filter: outputs filtered data in the specific format, for example, --filter' {"ip":"192.168.3.4/24", "Mtu":1500}'.
+

update-nic

+

Modifies configuration parameters of a specified NIC in a container.

+

Supported options are as follows:

+
  • --name: specifies the name of the NIC in the container. This parameter is mandatory.
  • --ip: specifies the NIC IP address.
  • --mac: specifies the NIC MAC address.
  • --bridge: specifies the network bridge bound to the NIC.
  • --mtu: specifies the MTU value of the NIC.
  • --update-config-only: If this flag is set, configuration files are updated and NICs are not updated.
  • --qlen: specifies the value of QLEN.
+
+ +### Constraints + +- Physical NICs \(eth\) and virtual NICs \(veth\) can be added. +- When adding a NIC, you can also configure the NIC. The configuration parameters include **--ip**, **--mac**, **--bridge**, **--mtu**, **--qlen**. +- A maximum of eight physical NICs can be added to a container. +- If you run the **syscontainer-tools add-nic** command to add an eth NIC to a container and do not add a hook, you must manually delete the NIC before the container exits. Otherwise, the name of the eth NIC on the host will be changed to the name of that in the container. +- For a physical NIC \(except 1822 VF NIC\), use the original MAC address when running the **add-nic** command. Do not change the MAC address in the container, or when running the **update-nic** command. +- When using the **syscontainer-tools add-nic** command, set the MTU value. The value range depends on the NIC model. +- When using syscontainer-tools to add NICs and routes to containers, you are advised to run the **add-nic** command to add NICs and then run the **add-route** command to add routes. When using syscontainer-tools to delete NICs and routes from a container, you are advised to run the **remove-route** command to delete routes and then run the **remove-nic** command to delete NICs. +- When using syscontainer-tools to add NICs, add a NIC to only one container. + +### Example + +- Start a system container, and set **hook spec** to the isulad hook execution script. + + ```shell + [root@localhost ~]# isula run -tid --hook-spec /etc/syscontainer-tools/hookspec.json --system-container --external-rootfs /root/root-fs none init + 2aaca5c1af7c872798dac1a468528a2ccbaf20b39b73fc0201636936a3c32aa8 + ``` + +- Add a virtual NIC to a container. + + ```shell + [root@localhost ~]# syscontainer-tools add-nic --type "veth" --name abc2:bcd2 --ip 172.17.28.5/24 --mac 00:ff:48:13:xx:xx --bridge docker0 2aaca5c1af7c + Add network interface to container 2aaca5c1af7c (bcd2,abc2) done + ``` + +- Add a physical NIC to a container. + + ```shell + [root@localhost ~]# syscontainer-tools add-nic --type "eth" --name eth3:eth1 --ip 172.17.28.6/24 --mtu 1300 --qlen 2100 2aaca5c1af7c + Add network interface to container 2aaca5c1af7c (eth3,eth1) done + ``` + + > [!NOTE]NOTE + > When adding a virtual or physical NIC, ensure that the NIC is in the idle state. Adding a NIC in use will disconnect the system network. + +## Route Management + +### Function Description + +syscontainer-tools can be used to dynamically add or delete routing tables for system containers. + +### Command Format + +```shell +syscontainer-tools [COMMAND][OPTIONS] [ARG...] +``` + +In the preceding format: + +**COMMAND**: command related to route management. + +**OPTIONS**: option supported by the route management command. + +**container\_id**: container ID. + +**ARG**: parameter corresponding to the command. + +### API Description + + + + + + + + + + + + + + + + + + + + + + + +

Command

+

Function Description

+

Option Description

+

Parameter Description

+

add-route

+

Adds the network routing rules to a container.

+

Supported options are as follows:

+

--update-config-only: If this parameter is configured, configuration files are updated and routing tables are not updated.

+

Parameter format: [{rule1},{rule2}]

+

Example of rule:

+

'[{"dest":"default", "gw":"192.168.10.1"},{"dest":"192.168.0.0/16","dev":"eth0","src":"192.168.1.2"}]'

+
  • dest: target network. If this parameter is left blank, the default gateway is used.
  • src: source IP address of a route.
  • gw: route gateway.
  • dev: network device.
+

remove-route

+

Deletes a route from a container.

+

Supported options are as follows:

+

--update-config-only: If this parameter is configured, only configuration files are updated and routes are not deleted from the container.

+

Parameter format: [{rule1},{rule2}]

+

Example of rule:

+

'[{"dest":"default", "gw":"192.168.10.1"},{"dest":"192.168.0.0/16","dev":"eth0","src":"192.168.1.2"}]'

+
  • dest: target network. If this parameter is left blank, the default gateway is used.
  • src: source IP address of a route.
  • gw: route gateway.
  • dev: network device.
+

list-route

+

Lists all routing rules in a container.

+

Supported options are as follows:

+
  • --pretty: outputs data in JSON format.
  • --filter: outputs filtered data in the specific format, for example, --filter' {"ip":"192.168.3.4/24", "Mtu":1500}'.
+

None

+
+ +### Constraints + +- When using syscontainer-tools to add NICs and routes to containers, you are advised to run the **add-nic** command to add NICs and then run the **add-route** command to add routes. When using syscontainer-tools to delete NICs and routes from a container, you are advised to run the **remove-route** command to delete routes and then run the **remove-nic** command to delete NICs. +- When adding a routing rule to a container, ensure that the added routing rule does not conflict with existing routing rules in the container. + +### Example + +- Start a system container, and set **hook spec** to the isulad hook execution script. + + ```shell + [root@localhost ~]# isula run -tid --hook-spec /etc/syscontainer-tools/hookspec.json --system-container --external-rootfs /root/root-fs none init + 0d2d68b45aa0c1b8eaf890c06ab2d008eb8c5d91e78b1f8fe4d37b86fd2c190b + ``` + +- Use syscontainer-tools to add a physical NIC to the system container. + + ```shell + [root@localhost ~]# syscontainer-tools add-nic --type "eth" --name enp4s0:eth123 --ip 172.17.28.6/24 --mtu 1300 --qlen 2100 0d2d68b45aa0 + Add network interface (enp4s0) to container (0d2d68b45aa0,eth123) done + ``` + +- syscontainer-tools adds a routing rule to the system container. Format example: **\[\{"dest":"default", "gw":"192.168.10.1"\},\{"dest":"192.168.0.0/16","dev":"eth0","src":"192.168.1.2"\}\]**. If **dest** is left blank, its value will be **default**. + + ```shell + [root@localhost ~]# syscontainer-tools add-route 0d2d68b45aa0 '[{"dest":"172.17.28.0/32", "gw":"172.17.28.5","dev":"eth123"}]' + Add route to container 0d2d68b45aa0, route: {dest:172.17.28.0/32,src:,gw:172.17.28.5,dev:eth123} done + ``` + +- Check whether a routing rule is added in the container. + + ```shell + [root@localhost ~]# isula exec -it 0d2d68b45aa0 route + Kernel IP routing table + Destination Gateway Genmask Flags Metric Ref Use Iface + 172.17.28.0 172.17.28.5 255.255.255.255 UGH 0 0 0 eth123 + 172.17.28.0 0.0.0.0 255.255.255.0 U 0 0 0 eth123 + ``` + +## Volume Mounting Management + +### Function Description + +In a common container, you can set the **--volume** parameter during container creation to mount directories or volumes of the host to the container for resource sharing. However, during container running, you cannot unmount directories or volumes that are mounted to the container, or mount directories or volumes of the host to the container. Only the system container can use the syscontainer-tools tool to dynamically mount directories or volumes of the host to the container and unmount directories or volumes from the container. + +### Command Format + +```shell +syscontainer-tools [COMMAND][OPTIONS] [ARG...] +``` + +In the preceding format: + +**COMMAND**: command related to route management. + +**OPTIONS**: option supported by the route management command. + +**container\_id**: container ID. + +**ARG**: parameter corresponding to the command. + +### API Description + +**Table 1** + + + + + + + + + + + + + + + + + + + + + + + + +

Command

+

Function Description

+

Option Description

+

Parameter Description

+

add-path

+

Adds files or directories on the host to a container.

+

None

+

The parameter format is as follows:

+

hostpath:containerpath:permission [hostpath:containerpath:permission ...]

+

In the preceding format:

+

hostpath: path on the host for storing a volume.

+

containerpath: path on the container for storing a volume.

+

permission: operation permission on a mount path within the container.

+

remove-path

+

Deletes directories or files from the container and restores them to the host.

+

None

+

Parameter format: hostpath:containerpath[hostpath:containerpath ]

+

In the preceding format:

+

hostpath: path on the host for storing a volume.

+

containerpath: path on the container for storing a volume.

+

list-path

+

Lists all path directories in a container.

+

Supported options are as follows:

+

--pretty: outputs data in JSON format.

+

None

+
+ +### Constraints + +- When running the **add-path** command, specify an absolute path as the mount path. +- The mount point /.sharedpath is generated on the host after the mount path is specified by running the **add-path** command. +- A maximum of 128 volumes can be added to a container. +- Do not overwrite the root directory \(/\) in a container with the host directory by running the **add-path** command. Otherwise, the function is affected. + +### Example + +- Start a system container, and set **hook spec** to the isulad hook execution script. + + ```shell + [root@localhost ~]# isula run -tid --hook-spec /etc/syscontainer-tools/hookspec.json --system-container --external-rootfs /root/root-fs none init + e45970a522d1ea0e9cfe382c2b868d92e7b6a55be1dd239947dda1ee55f3c7f7 + ``` + +- Use syscontainer-tools to mount a directory on the host to a container, implementing resource sharing. + + ```shell + [root@localhost ~]# syscontainer-tools add-path e45970a522d1 /home/test123:/home/test123 + Add path (/home/test123) to container(e45970a522d1,/home/test123) done. + ``` + +- Create a file in the **/home/test123** directory on the host and check whether the file can be accessed in the container. + + ```shell + [root@localhost ~]# echo "hello world" > /home/test123/helloworld + [root@localhost ~]# isula exec e45970a522d1 bash + [root@localhost /]# cat /home/test123/helloworld + hello world + ``` + +- Use syscontainer-tools to delete the mount directory from the container. + + ```shell + [root@localhost ~]# syscontainer-tools remove-path e45970a522d1 /home/test123:/home/test123 + Remove path (/home/test123) from container(e45970a522d1,/home/test123) done + [root@localhost ~]# isula exec e45970a522d1 bash + [root@localhost /]# ls /home/test123/helloworld + ls: cannot access '/home/test123/helloworld': No such file or directory + ``` diff --git a/docs/en/docs/container_form/system_container/environment_variable_persisting.md b/docs/en/docs/container_form/system_container/environment_variable_persisting.md new file mode 100644 index 0000000000000000000000000000000000000000..33837f2c6ae5d34bc685d50468a2bef455e94eb0 --- /dev/null +++ b/docs/en/docs/container_form/system_container/environment_variable_persisting.md @@ -0,0 +1,46 @@ +# Environment Variable Persisting + +## Function Description + +In a system container, you can make the **env** variable persistent to the configuration file in the rootfs directory of the container by specifying the **--env-target-file** interface parameter. + +## Parameter Description + + + + + + + + + + + +

Command

+

Parameter

+

Value Description

+

isula create/run

+

--env-target-file

+
  • Variable of the string type.
  • The env persistent file must be in the rootfs directory and must be an absolute path.
+
+ +## Constraints + +- If the target file specified by **--env-target-file** exists, the size cannot exceed 10 MB. +- The parameter specified by **--env-target-file** must be an absolute path in the rootfs directory. +- If the value of **--env** conflicts with that of **env** in the target file, the value of **--env** prevails. + +## Example + +Start a system container and specify the **env** environment variable and **--env-target-file** parameter. + +```shell +[root@localhost ~]# isula run -tid -e abc=123 --env-target-file /etc/environment --system-container --external-rootfs /root/myrootfs none init +b75df997a64da74518deb9a01d345e8df13eca6bcc36d6fe40c3e90ea1ee088e +[root@localhost ~]# isula exec b7 cat /etc/environment +PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +TERM=xterm +abc=123 +``` + +The preceding information indicates that the **env** variable \(**abc=123**\) of the container has been made persistent to the **/etc/environment** configuration file. diff --git a/docs/en/docs/container_form/system_container/installation_guideline.md b/docs/en/docs/container_form/system_container/installation_guideline.md new file mode 100644 index 0000000000000000000000000000000000000000..0d917827b384d38e33f1b5bba96a98911b2cb52f --- /dev/null +++ b/docs/en/docs/container_form/system_container/installation_guideline.md @@ -0,0 +1,26 @@ +# Installation Guideline + +1. Install the container engine iSulad. + + ```shell + # yum install iSulad + ``` + +2. Install dependent packages of system containers. + + ```shell + # yum install isulad-tools authz isulad-lxcfs-toolkit lxcfs + ``` + +3. Run the following command to check whether iSulad is started: + + ```shell + # systemctl status isulad + ``` + +4. Enable the lxcfs and authz services. + + ```shell + # systemctl start lxcfs + # systemctl start authz + ``` diff --git a/docs/en/docs/container_form/system_container/maximum_number_of_handles.md b/docs/en/docs/container_form/system_container/maximum_number_of_handles.md new file mode 100644 index 0000000000000000000000000000000000000000..b582e3be00e66328b754c7fe1e1f03c969a88d4e --- /dev/null +++ b/docs/en/docs/container_form/system_container/maximum_number_of_handles.md @@ -0,0 +1,55 @@ +# Maximum Number of Handles + +## Function Description + +System containers support limit on the number of file handles. File handles include common file handles and network sockets. When starting a container, you can specify the **--files-limit** parameter to limit the maximum number of handles opened in the container. + +## Parameter Description + + + + + + + + + + + + +

Command

+

Parameter

+

Value Description

+

isula create/run

+

--files-limit

+

  

+
  • The value cannot be negative and must be an integer.
  • The value 0 indicates that the number is not limited by the parameter. The maximum number is determined by the current kernel files cgroup.
+
+ +## Constraints + +- If the value of **--files-limit** is too small, the system container may fail to run the **exec** command and the error "open temporary files" is reported. Therefore, you are advised to set the parameter to a large value. +- File handles include common file handles and network sockets. + +## Example + +To use **--files-limit** to limit the number of file handles opened in a container, run the following command to check whether the kernel supports files cgroup: + +```shell +[root@localhost ~]# cat /proc/1/cgroup | grep files +10:files:/ +``` + +If **files** is displayed, files cgroup is supported. + +Start the container, specify the **--files-limit** parameter, and check whether the **files.limit** parameter is successfully written. + +```shell +[root@localhost ~]# isula run -tid --files-limit 1024 --system-container --external-rootfs /tmp/root-fs empty init 01e82fcf97d4937aa1d96eb8067f9f23e4707b92de152328c3fc0ecb5f64e91d +[root@localhost ~]# isula exec -it 01e82fcf97d4 bash +[root@localhost ~]# cat /sys/fs/cgroup/files/files.limit +1024 + +``` + +The preceding information indicates that the number of file handles is successfully limited in the container. diff --git a/docs/en/docs/container_form/system_container/overview.md b/docs/en/docs/container_form/system_container/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..d7b09fa5963a6216c0a8c9b3932c334479878b77 --- /dev/null +++ b/docs/en/docs/container_form/system_container/overview.md @@ -0,0 +1,3 @@ +# System Container + +System containers are used for heavyweight applications and cloud-based services in scenarios with re-computing, high performance, and high concurrency. Compared with the VM technology, system containers can directly inherit physical machine features and has better performance and less overhead. In addition, system containers can be allocated more computing units of limited resources, reducing costs. Therefore, system containers can be used to build differentiated product competitiveness and provide computing unit instances with higher computing density, lower price, and better performance. diff --git a/docs/en/docs/container_form/system_container/reboot_or_shutdown_in_a_container.md b/docs/en/docs/container_form/system_container/reboot_or_shutdown_in_a_container.md new file mode 100644 index 0000000000000000000000000000000000000000..8705e47f68f94c7aefcefced00293b68d60bfe67 --- /dev/null +++ b/docs/en/docs/container_form/system_container/reboot_or_shutdown_in_a_container.md @@ -0,0 +1,75 @@ +# Reboot or Shutdown in a Container + +## Function Description + +The **reboot** and **shutdown** commands can be executed in a system container. You can run the **reboot** command to restart a container, and run the **shutdown** command to stop a container. + +## Parameter Description + + + + + + + + + + + +

Command

+

Parameter

+

Value Description

+

isula create/run

+

--restart

+
  • Variable of the string type.
  • Supported option is as follows:

    on-reboot: restarts the system container.

    +

      

    +
+
+ +## Constraints + +- The shutdown function relies on the actual OS of the container running environment. +- When you run the **shutdown -h now** command to shut down the system, do not open multiple consoles. For example, if you run the **isula run -ti** command to open a console and run the **isula attach** command for the container in another host bash, another console is opened. In this case, the **shutdown** command fails to be executed. + +## Example + +- Specify the **--restart on-reboot** parameter when starting a container. For example: + + ```shell + [root@localhost ~]# isula run -tid --restart on-reboot --system-container --external-rootfs /root/myrootfs none init + 106faae22a926e22c828a0f2b63cf5c46e5d5986ea8a5b26de81390d0ed9714f + ``` + +- In the container, run the **reboot** command. + + ```shell + [root@localhost ~]# isula exec -it 10 bash + [root@localhost /]# reboot + ``` + + Check whether the container is restarted. + + ```shell + [root@localhost ~]# isula exec -it 10 ps aux + USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND + root 1 0.1 0.0 21588 9504 ? Ss 12:11 0:00 init + root 14 0.1 0.0 27024 9376 ? Ss 12:11 0:00 /usr/lib/system + root 17 0.0 0.0 18700 5876 ? Ss 12:11 0:00 /usr/lib/system + dbus 22 0.0 0.0 9048 3624 ? Ss 12:11 0:00 /usr/bin/dbus-d + root 26 0.0 0.0 8092 3012 ? Rs+ 12:13 0:00 ps aux + ``` + +- In the container, run the **shutdown** command. + + ```shell + [root@localhost ~]# isula exec -it 10 bash + [root@localhost /]# shutdown -h now + [root@localhost /]# [root@localhost ~]# + ``` + + Check whether the container is stopped. + + ```shell + [root@localhost ~]# isula exec -it 10 bash + Error response from daemon: Exec container error;Container is not running:106faae22a926e22c828a0f2b63cf5c46e5d5986ea8a5b26de81390d0ed9714f + ``` diff --git a/docs/en/docs/container_form/system_container/security_and_isolation.md b/docs/en/docs/container_form/system_container/security_and_isolation.md new file mode 100644 index 0000000000000000000000000000000000000000..d5131522fe932083533bee314c22b5ccf178d952 --- /dev/null +++ b/docs/en/docs/container_form/system_container/security_and_isolation.md @@ -0,0 +1,334 @@ +# Security and Isolation + +## Many-to-Many User Namespaces + +### Function Description + +User namespaces are used to map user **root** of a container to a common user of the host and allow the processes and user in the container \(that are unprivileged on the host\) to have privilege. This can prevent the processes in the container from escaping to the host and performing unauthorized operations. In addition, after user namespaces are used, the container and host use different UIDs and GIDs. This ensures that user resources in the container such as file descriptors are isolated from those on the host. + +In system containers, you can configure the **--user-remap** API parameter to map user namespaces of different containers to different user namespaces on the host, isolating the user namespaces of containers. + +### Parameter Description + + + + + + + + + + + +

Command

+

Parameter

+

Value Description

+

isula create/run

+

--user-remap

+

The parameter format is uid:gid:offset. The parameter is described as follows:

+
  • uid and gid must be integers greater than or equal to 0.
  • offset must be an integer greater than 0 and less than 65536. The value cannot be too small. Otherwise, the container cannot be started.
  • Either the sum of uid and offset or the sum of gid and offset must be less than or equal to 232 - 1. Otherwise, an error is reported during container startup.
+
+ +### Constraints + +- If **--user-remap** is specified in a system container, the rootfs directory must be accessible to users specified by _uid_ or _gid_ in **--user-remap**. Otherwise, user namespaces of containers cannot access rootfs. As a result, the containers fail to be started. +- All IDs in the container can be mapped to the host rootfs. Some directories or files may be mounted from the host to containers, for example, device files in the **/dev/pts** directory. If _offset_ is too small, the mounting may fail. +- _uid_, _gid_, and _offset_ are controlled by the upper-layer scheduling platform. The container engine only checks the validity of them. +- **--user-remap** is available only in system containers. +- **--user-remap** and **--privileged** cannot be set simultaneously. Otherwise, an error is reported during container startup. +- If _uid_ or _gid_ is set to **0**, **--user-remap** does not take effect. +- If **--user-map** is specified for a system container, ensure that the user corresponding to the specified UID or GID can access the isulad metadata directories (**/var/lib/isulad/**, **/var/lib/isulad/engines/**, and **/var/lib/isulad/engines/lcr**). +- **--user-remap** and **--userns** cannot be specified at the same time. + +### Usage Guide + +> [!NOTE]NOTE +> Before specifying the **--user-remap** parameter, configure an offset value for UIDs and GIDs of all directories and files in rootfs. The offset value should be equal to that for _uid_ and _gid_ in **--user-remap**. +> For example, run the following command to offset UIDs and GIDs of all files in the **dev** directory with 100000: +> chown 100000:100000 dev + +Specify the **--user-remap** parameter when the system container is started. + +```shell +[root@localhost ~]# chmod 751 /var/lib/isulad/ +[root@localhost ~]# chmod 751 /var/lib/isulad/engines/ +[root@localhost ~]# chmod 751 /var/lib/isulad/engines/lcr +[root@localhost ~]# isula run -tid --user-remap 100000:100000:65535 --system-container --external-rootfs /home/root-fs none /sbin/init +eb9605b3b56dfae9e0b696a729d5e1805af900af6ce24428fde63f3b0a443f4a +``` + +Check the /sbin/init process information on the host and in a container. + +```shell +[root@localhost ~]# isula exec eb ps aux | grep /sbin/init +root 1 0.6 0.0 21624 9624 ? Ss 15:47 0:00 /sbin/init +[root@localhost ~]# ps aux | grep /sbin/init +100000 4861 0.5 0.0 21624 9624 ? Ss 15:47 0:00 /sbin/init +root 4948 0.0 0.0 213032 808 pts/0 S+ 15:48 0:00 grep --color=auto /sbin/init +``` + +The owner of the /sbin/init process in the container is user **root**, but the owner of the host is the user whose UID is **100000**. + +Create a file in a container and view the file owner on the host. + +```shell +[root@localhost ~]# isula exec -it eb bash +[root@localhost /]# echo test123 >> /test123 +[root@localhost /]# exit +exit +[root@localhost ~]# ll /home/root-fs/test123 +-rw-------. 1 100000 100000 8 Aug 2 15:52 /home/root-fs/test123 +``` + +The owner of the file that is generated in the container is user **root**, but the file owner displayed on the host is the user whose ID is **100000**. + +## User Permission Control + +### Function Description + +A container engine supports TLS for user identity authentication, which is used to control user permissions. Currently, container engines can connect to the authz plug-in to implement permission control. + +### API Description + +You can configure the startup parameters of the iSulad container engine to specify the permission control plug-in. The default daemon configuration file is **/etc/isulad/daemon.json**. + + + + + + + + + + + +

Parameter

+

Example

+

Description

+

--authorization-plugin

+

"authorization-plugin": "authz-broker"

+

User permission authentication plug-in. Currently, only authz-broker is supported.

+
+ +### Constraints + +- User permission policies need to be configured for authz. The default policy file is **/var/lib/authz-broker/policy.json**. This file can be dynamically modified and the modification will take effect immediately without restarting the plug-in service. +- A container engine can be started by user **root**. If some commands used are enabled for by common users, common users may obtain excessive permissions. Therefore, exercise caution when performing such operations. Currently, running the **container\_attach**, **container\_create**, and **container\_exec\_create** commands may cause risks. +- Some compound operations, such as running **isula exec** and **isula inspect** or running and **isula attach** and **isula inspect**, depend on the permission of **isula inspect**. If a user does not have this permission, an error is reported. +- Using SSL/TLS encryption channels hardens security but also reduces performance. For example, the delay increases, more CPU resources are consumed, and encryption and decryption require higher throughput. Therefore, the number of concurrent executions decreases compared with non-TLS communication. According to the test result, when the ARM server \(Cortex-A72 64-core\) is almost unloaded, TLS is used to concurrently start a container. The maximum number of concurrent executions is 200 to 250. +- If **--tlsverify** is specified on the server, the default path where authentication files store is **/etc/isulad**. The default file names are **ca.pem**, **cert.pem**, and **key.pem**. + +### Example + +1. Ensure that the authz plug-in is installed on the host. If the authz plug-in is not installed, run the following command to install and start the authz plug-in service: + + ```shell + [root@localhost ~]# yum install authz + [root@localhost ~]# systemctl start authz + ``` + +2. To enable this function, configure the container engine and TLS certificate. You can use OpenSSL to generate the required certificate. + + ```shell + #SERVERSIDE + + # Generate CA key + openssl genrsa -aes256 -passout "pass:$PASSWORD" -out "ca-key.pem" 4096 + # Generate CA + openssl req -new -x509 -days $VALIDITY -key "ca-key.pem" -sha256 -out "ca.pem" -passin "pass:$PASSWORD" -subj "/C=$COUNTRY/ST=$STATE/L=$CITY/O=$ORGANIZATION/OU=$ORGANIZATIONAL_UNIT/CN=$COMMON_NAME/emailAddress=$EMAIL" + # Generate Server key + openssl genrsa -out "server-key.pem" 4096 + + # Generate Server Certs. + openssl req -subj "/CN=$COMMON_NAME" -sha256 -new -key "server-key.pem" -out server.csr + + echo "subjectAltName = DNS:localhost,IP:127.0.0.1" > extfile.cnf + echo "extendedKeyUsage = serverAuth" >> extfile.cnf + + openssl x509 -req -days $VALIDITY -sha256 -in server.csr -passin "pass:$PASSWORD" -CA "ca.pem" -CAkey "ca-key.pem" -CAcreateserial -out "server-cert.pem" -extfile extfile.cnf + + #CLIENTSIDE + + openssl genrsa -out "key.pem" 4096 + openssl req -subj "/CN=$CLIENT_NAME" -new -key "key.pem" -out client.csr + echo "extendedKeyUsage = clientAuth" > extfile.cnf + openssl x509 -req -days $VALIDITY -sha256 -in client.csr -passin "pass:$PASSWORD" -CA "ca.pem" -CAkey "ca-key.pem" -CAcreateserial -out "cert.pem" -extfile extfile.cnf + ``` + + If you want to use the preceding content as the script, replace the variables with the configured values. If the parameter used for generating the CA is empty, set it to **"**. **PASSWORD**, **COMMON\_NAME**, **CLIENT\_NAME**, and **VALIDITY** are mandatory. + +3. When starting the container engine, add parameters related to the TLS and authentication plug-in and ensure that the authentication plug-in is running properly. In addition, to use TLS authentication, the container engine must be started in TCP listening mode instead of the Unix socket mode. The configuration on the container daemon is as follows: + + ```json + { + "tls": true, + "tls-verify": true, + "tls-config": { + "CAFile": "/root/.iSulad/ca.pem", + "CertFile": "/root/.iSulad/server-cert.pem", + "KeyFile":"/root/.iSulad/server-key.pem" + }, + "authorization-plugin": "authz-broker" + } + ``` + +4. Configure policies. For the basic authorization process, all policies are stored in the **/var/lib/authz-broker/policy.json** configuration file. The configuration file can be dynamically modified without restarting the plug-in. Only the SIGHUP signal needs to be sent to the authz process. In the file, a line contains one JSON policy object. The following provides policy configuration examples: + + - All users can run all iSuald commands: **\{"name":"policy\_0","users":\[""\],"actions":\[""\]\}** + - Alice can run all iSulad commands: **\{"name":"policy\_1","users":\["alice"\],"actions":\[""\]\}** + - A blank user can run all iSulad commands: **\{"name":"policy\_2","users":\[""\],"actions":\[""\]\}** + - Alice and Bob can create new containers: **\{"name":"policy\_3","users":\["alice","bob"\],"actions":\["container\_create"\]\}** + - service\_account can read logs and run **docker top**: **\{"name":"policy\_4","users":\["service\_account"\],"actions":\["container\_logs","container\_top"\]\}** + - Alice can perform any container operations: **\{"name":"policy\_5","users":\["alice"\],"actions":\["container"\]\}** + - Alice can perform any container operations, but the request type can only be **get**: **\{"name":"policy\_5","users":\["alice"\],"actions":\["container"\], "readonly":true\}** + + > [!NOTE]NOTE + > - **actions** supports regular expressions. + > - **users** does not support regular expressions. + > - A users cannot be repeatedly specified by **users**. That is, a user cannot match multiple rules. + +5. After updating the configurations, configure TLS parameters on the client to connect to the container engine. That is, access the container engine with restricted permissions. + + ```shell + [root@localhost ~]# isula version --tlsverify --tlscacert=/root/.iSulad/ca.pem --tlscert=/root/.iSulad/cert.pem --tlskey=/root/.iSulad/key.pem -H=tcp://127.0.0.1:2375 + ``` + + If you want to use the TLS authentication for default client connection, move the configuration file to **\~/.iSulad** and set the **ISULAD\_HOST** and **ISULAD\_TLS\_VERIFY** variables \(rather than transferring **-H=tcp://$HOST:2375** and -**-tlsverify** during each call\). + + ```shell + [root@localhost ~]# mkdir -pv ~/.iSulad + [root@localhost ~]# cp -v {ca,cert,key}.pem ~/.iSulad + [root@localhost ~]# export ISULAD_HOST=localhost:2375 ISULAD_TLS_VERIFY=1 + [root@localhost ~]# isula version + ``` + +## proc File System Isolation + +### Application Scenario + +Container virtualization is lightweight and efficient, and can be quickly deployed. However, containers are not strongly isolated, which causes great inconvenience to users. Containers have some defects in isolation because the namespace feature of the Linux kernel is not perfect. For example, you can view the proc information on the host \(such as meminfo, cpuinfo, stat, and uptime\) in the proc file system of a container. You can use the lxcfs tool to replace the /proc content of instances in the container with the content in the /proc file system of the host so that services in the container can obtain the correct resource value. + +### API Description + +A system container provides two tool packages: lxcfs and lxcfs-toolkit, which are used together. Lxcfs resides on the host as the daemon process. lxcfs-toolkit mounts the lxcfs file system of the host to containers through the hook mechanism. + +The command line of lxcfs-toolkit is as follows: + +```shell +lxcfs-toolkit [OPTIONS] COMMAND [COMMAND_OPTIONS] +``` + + + + + + + + + + + + + + + + + + + + + + + +

Command

+

Function

+

Parameter

+

remount

+

Remounts lxcfs to containers.

+

--all: remounts lxcfs to all containers.

+

--container-id: remounts lxcfs to a specified container.

+

umount

+

Unmounts lxcfs from containers.

+

--all: unmounts lxcfs from all containers.

+

--container-id: unmounts lxcfs from a specified container.

+

check-lxcfs

+

Checks whether the lxcfs service is running properly.

+

None

+

prestart

+

Mounts the /var/lib/lxcfs directory to the container before the lxcfs service starts.

+

None

+
+ +### Constraints + +- Currently, only the **cpuinfo**, **meminfo**, **stat**, **diskstats**, **partitions**, **swaps**, and **uptime** files in the proc file system are supported. Other files are not isolated from other kernel API file systems \(such as sysfs\). +- After an RPM package is installed, a sample JSON file is generated in **/var/lib/lcrd/hooks/hookspec.json**. To add the log function, you need to add the **--log** configuration during customization. +- The **diskstats** file displays only information about disks that support CFQ scheduling, instead of partition information. Devices in containers are displayed as names in the **/dev** directory. If a device name does not exist, the information is left blank. In addition, the device where the container root directory is located is displayed as **sda**. +- The **slave** parameter is required when lxcfs is mounted. If the **shared** parameter is used, the mount point in containers may be leaked to the host, affecting the host running. +- Lxcfs supports graceful service degradation. If the lxcfs service crashes or becomes unavailable, the **cpuinfo**, **meminfo**, **stat**, **diskstats**, **partitions**, **swaps**and **uptime** files in containers are about host information, and other service functions of containers are not affected. +- Bottom layer of lxcfs depends on the FUSE kernel module and libfuse library. Therefore, the kernel needs to support FUSE. +- Lxcfs supports only the running of 64-bit applications in containers. If a 32-bit application is running in a container, the CPU information \(**cpuinfo**\) read by the application may fail to meet expectations. +- Lxcfs simulates the resource view only of container control groups \(cgroups\). Therefore, system calls \(such as sysconf\) in containers can obtain only host information. Lxcfs cannot implement the kernel isolation. +- The CPU information \(**cpuinfo**\) displayed after lxcfs implements the isolation has the following features: + - **processor**: The value increases from 0. + - **physical id**: The value increases from 0. + - **sibliing**: It has a fixed value of **1**. + - **core id**: It has a fixed value of **0**. + - **cpu cores**: It has a fixed value of **1**. + +### Example + +1. Install the lxcfs and lxcfs-toolkit packages and start the lxcfs service. + + ```shell + [root@localhost ~]# yum install lxcfs lxcfs-toolkit + [root@localhost ~]# systemctl start lxcfs + ``` + +2. After a container is started, check whether the lxcfs mount point exists in the container. + + ```shell + [root@localhost ~]# isula run -tid -v /var/lib/lxc:/var/lib/lxc --hook-spec /var/lib/isulad/hooks/hookspec.json --system-container --external-rootfs /home/root-fs none init + a8acea9fea1337d9fd8270f41c1a3de5bceb77966e03751346576716eefa9782 + [root@localhost ~]# isula exec a8 mount | grep lxcfs + lxcfs on /var/lib/lxc/lxcfs type fuse.lxcfs (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other) + lxcfs on /proc/cpuinfo type fuse.lxcfs (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other) + lxcfs on /proc/diskstats type fuse.lxcfs (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other) + lxcfs on /proc/meminfo type fuse.lxcfs (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other) + lxcfs on /proc/partitions type fuse.lxcfs (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other) + lxcfs on /proc/stat type fuse.lxcfs (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other) + lxcfs on /proc/swaps type fuse.lxcfs (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other) + lxcfs on /proc/uptime type fuse.lxcfs (rw,nosuid,nodev,relatime,user_id=0,group_id=0,allow_other) + ``` + +3. Run the **update** command to update the CPU and memory resource configurations of the container and check the container resources. As shown in the following command output, the container resource view displays the actual container resource data instead of data of the host. + + ```shell + [root@localhost ~]# isula update --cpuset-cpus 0-1 --memory 1G a8 + a8 + [root@localhost ~]# isula exec a8 cat /proc/cpuinfo + processor : 0 + BogoMIPS : 100.00 + cpu MHz : 2400.000 + Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid + CPU implementer : 0x41 + CPU architecture: 8 + CPU variant : 0x0 + CPU part : 0xd08 + CPU revision : 2 + + processor : 1 + BogoMIPS : 100.00 + cpu MHz : 2400.000 + Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid + CPU implementer : 0x41 + CPU architecture: 8 + CPU variant : 0x0 + CPU part : 0xd08 + CPU revision : 2 + + [root@localhost ~]# isula exec a8 free -m + total used free shared buff/cache available + Mem: 1024 17 997 7 8 1006 + Swap: 4095 0 4095 + ``` diff --git a/docs/en/docs/container_form/system_container/shared_memory_channels.md b/docs/en/docs/container_form/system_container/shared_memory_channels.md new file mode 100644 index 0000000000000000000000000000000000000000..624fe12515928fbf421e64bab4b05c1d0f1ef03d --- /dev/null +++ b/docs/en/docs/container_form/system_container/shared_memory_channels.md @@ -0,0 +1,55 @@ +# Shared Memory Channels + +## Function Description + +System containers enable the communication between container and host processes through shared memory. You can set the **--host-channel** parameter when creating a container to allow the host to share the same tmpfs with the container so that they can communicate with each other. + +## Parameter Description + + + + + + + + + + + +

Command

+

Parameter

+

Value Description

+

isula create/run

+

--host-channel

+
  • Variable of the string type. Its format is as follows:
    <host path>:<container path>:<rw/ro>:<size limit>
    +
  • The parameter is described as follows:

    <host path>: path to which tmpfs is mounted on the host, which must be an absolute path.

    +

    <container path>: path to which tmpfs is mounted in a container, which must be an absolute path.

    +

    <rw/ro>: permissions on the file system mounted to the container. The value can only be rw (read and write) or ro (read only). The default value is rw.

    +

    <size limit>: maximum size used by the mounted tmpfs. The minimum value is one 4 KB physical page, and the maximum value is half of the total physical memory in the system. The default value is 64MB.

    +
+
+ +## Constraints + +- The lifecycle of tmpfs mounted on the host starts from the container startup to the container deletion. After a container is deleted and its occupied space is released, the space is removed. +- When a container is deleted, the path to which tmpfs is mounted on the host is deleted. Therefore, an existing directory on the host cannot be used as the mount path. +- To ensure that processes running by non-root users on the host can communicate with containers, the permission for tmpfs mounted on the host is 1777. + +## Example + +Specify the **--host-channel** parameter when creating a container. + +```shell +[root@localhost ~]# isula run --rm -it --host-channel /testdir:/testdir:rw:32M --system-container --external-rootfs /root/myrootfs none init +root@3b947668eb54:/# dd if=/dev/zero of=/testdir/test.file bs=1024 count=64K +dd: error writing '/testdir/test.file': No space left on device +32769+0 records in +32768+0 records out +33554432 bytes (34 MB, 32 MiB) copied, 0.0766899 s, 438 MB/s +``` + +> [!NOTE]NOTE +> +> - If **--host-channel** is used for size limit, the file size is constrained by the memory limit in the container. \(The OOM error may occur when the memory usage reaches the upper limit.\) +> - If a user creates a shared file on the host, the file size is not constrained by the memory limit in the container. +> - If you need to create a shared file in the container and the service is memory-intensive, you can add the value of **--host-channel** to the original value of the container memory limit, eliminating the impact. diff --git a/docs/en/docs/container_form/system_container/specifying_rootfs_to_create_a_container.md b/docs/en/docs/container_form/system_container/specifying_rootfs_to_create_a_container.md new file mode 100644 index 0000000000000000000000000000000000000000..16861966779f14e1232bfaca06bac5c25f7ee457 --- /dev/null +++ b/docs/en/docs/container_form/system_container/specifying_rootfs_to_create_a_container.md @@ -0,0 +1,45 @@ +# Specifying Rootfs to Create a Container + +## Function Description + +Different from a common container that needs to be started by specifying a container image, a system container is started by specifying a local root file system \(rootfs\) using the **--external-rootfs** parameter. The rootfs contains the operating system environment on which the container depends during running. + +## Parameter Description + + + + + + + + + + + + +

Command

+

Parameter

+

Value Description

+

isula create/run

+

--external-rootfs

+
  • Variable of the string type.
  • Absolute path in the root file system of the container, that is, the path of the rootfs.
+
+ +## Constraints + +- The rootfs directory specified using the **--external-rootfs** parameter must be an absolute path. +- The rootfs directory specified using the **--external-rootfs** parameter must be a complete OS environment including **systemd** package. Otherwise, the container fails to be started. +- When a container is deleted, the rootfs directory specified using **--external-rootfs** is not deleted. +- Containers based on an ARM rootfs cannot run in the x86 environment. Containers based on an x86 rootfs cannot run in the ARM environment. +- You are advised not to start multiple container instances in the same rootfs. That is, one rootfs is used by only one container instance that is in the lifecycle. + +## Example + +Assuming the local rootfs path is **/root/myrootfs**, run the following command to start a system container: + +```shell +# isula run -tid --system-container --external-rootfs /root/myrootfs none init +``` + +> [!NOTE]NOTE +> The rootfs is a user-defined file system. Prepare it by yourself. For example, a rootfs is generated after the TAR package of a container image is decompressed. diff --git a/docs/en/docs/container_form/system_container/usage_guide.md b/docs/en/docs/container_form/system_container/usage_guide.md new file mode 100644 index 0000000000000000000000000000000000000000..ddf32a1f4de2ccdc5f17a473b3d05a3e0f4b44fa --- /dev/null +++ b/docs/en/docs/container_form/system_container/usage_guide.md @@ -0,0 +1,20 @@ +# Usage Guide + +System container functions are enhanced based on the iSula container engine. The container management function and the command format of the function provided by system containers are the same as those provided by the iSula container engine. + +The following sections describe how to use the enhanced functions provided by system containers. For details about other command operations, see iSulad container engine documents. + +The system container functions involve only the **isula create/run** command. Unless otherwise specified, this command is used for all functions. The command format is as follows: + +```shell +isula create/run [OPTIONS] [COMMAND] [ARG...] +``` + +In the preceding format: + +- **OPTIONS**: one or more command parameters. For details about supported parameters, see iSulad container engine [appendix](../../container_engine/isula_container_engine/appendix.md#command-line-parameters). +- **COMMAND**: command executed after a system container is started. +- **ARG**: parameter corresponding to the command executed after a system container is started. + +>[!NOTE]Note +> Root privileges are necessary for using system containers. diff --git a/docs/en/docs/container_form/system_container/using_systemd_to_start_a_container.md b/docs/en/docs/container_form/system_container/using_systemd_to_start_a_container.md new file mode 100644 index 0000000000000000000000000000000000000000..05ce9a53e2a05354517530d4e80232fdd8beab13 --- /dev/null +++ b/docs/en/docs/container_form/system_container/using_systemd_to_start_a_container.md @@ -0,0 +1,85 @@ +# Using systemd to Start a Container + +## Function Description + +The init process started in system containers differs from that in common containers. Common containers cannot start system services through systemd. However, system containers have this capability. You can enable the systemd service by specifying the **--system-container** parameter when starting a system container. + +## Parameter Description + + + + + + + + + + + + +

Command

+

Parameter

+

Value Description

+

isula create/run

+

--system-container

+
  • The value is of a Boolean data type and can be true or false. The default value is true.
  • Specifies whether it is a system container. This function must be enabled.
+
+ +## Constraints + +- The systemd service needs to call some special system APIs, including mount, umount2, unshare, reboot, and name\_to\_handle\_at. Therefore, permissions to call the preceding APIs are enabled for system containers when the privileged container tag is disabled. +- All system containers are started by the init process. The init process does not respond to the SIGTERM signal which indicates normal exit. By default, the **stop** command forcibly kills the container 10 seconds later. If you need a quicker stop, you can manually specify the timeout duration of the **stop** command. +- **--system-container** must be used together with **--external-rootfs**. +- Various services can run in a system container. The **systemctl** command is used to manage the service starting and stopping. Services may depend on each other. As a result, when an exception occurs, some service processes are in the D or Z state so that the container cannot exit properly. +- Some service processes in a system container may affect other operation results. For example, if the NetworkManager service is running in the container, adding NICs to the container may be affected \(the NICs are successfully added but then stopped by the NetworkManger\), resulting in unexpected results. +- Currently, system containers and hosts cannot be isolated by using udev events. Therefore, the **fstab** file cannot be configured. +- The systemd service may conflict with the cgconfig service provided by libcgroup. You are advised to delete the libcgroup-related packages from a container or set **Delegate** of the cgconfig service to **no**. + +## Example + +- Specify the **--system-container** and **--external-rootfs** parameters to start a system container. + + ```shell + [root@localhost ~]# isula run -tid -n systest01 --system-container --external-rootfs /root/myrootfs none init + ``` + +- After the preceding commands are executed, the container is running properly. You can run the **exec** command to access the container and view the process information. The command output indicates that the systemd service has been started. + + ```shell + [root@localhost ~]# isula exec -it systest01 bash + [root@localhost /]# ps -ef + UID PID PPID C STIME TTY TIME CMD + root 1 0 2 06:49 ? 00:00:00 init + root 14 1 2 06:49 ? 00:00:00 /usr/lib/systemd/systemd-journal + root 16 1 0 06:49 ? 00:00:00 /usr/lib/systemd/systemd-network + dbus 23 1 0 06:49 ? 00:00:00 /usr/bin/dbus-daemon --system -- + root 25 0 0 06:49 ? 00:00:00 bash + root 59 25 0 06:49 ? 00:00:00 ps -ef + ``` + +- Run the **systemctl** command in the container to check the service status. The command output indicates that the service is managed by systemd. + + ```shell + [root@localhost /]# systemctl status dbus + ● dbus.service - D-Bus System Message Bus + Loaded: loaded (/usr/lib/systemd/system/dbus.service; static; vendor preset: + disabled) + Active: active (running) since Mon 2019-07-22 06:49:38 UTC; 2min 5 + 8s ago + Docs: man:dbus-daemon(1) + Main PID: 23 (dbus-daemon) + CGroup: /system.slice/dbus.service + └─23 /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidf + ile --systemd-activation --syslog-only + + Jul 22 06:49:38 localhost systemd[1]: Started D-Bus System Message Bus. + ``` + +- Run the **systemctl** command in the container to stop or start the service. The command output indicates that the service is managed by systemd. + + ```shell + [root@localhost /]# systemctl stop dbus + Warning: Stopping dbus.service, but it can still be activated by: + dbus.socket + [root@localhost /]# systemctl start dbus + ``` diff --git a/docs/en/docs/container_form/system_container/writable_namespace_kernel_parameters.md b/docs/en/docs/container_form/system_container/writable_namespace_kernel_parameters.md new file mode 100644 index 0000000000000000000000000000000000000000..deb1ea78772ae1919147ee2df56ce8f1d4da9e36 --- /dev/null +++ b/docs/en/docs/container_form/system_container/writable_namespace_kernel_parameters.md @@ -0,0 +1,86 @@ +# Writable Namespace Kernel Parameters + +## Function Description + +For services running in containers, such as databases, big data, and common applications, some kernel parameters need to be set and adjusted to obtain the optimal performance and reliability. The modification permission of all kernel parameters must be disabled or enabled simultaneously \(by using privileged container\). + +When the modification permission is disabled, only the --sysctl external interface is provided and parameters cannot be flexibly modified in a container. + +When the modification permission is enabled, some kernel parameters are globally valid. If some parameters are modified in a container, all programs on the host will be affected, harming security. + +System containers provide the **--ns-change-opt** parameter, which can be used to dynamically set namespace kernel parameters in a container. The parameter value can be **net** or **ipc**. + +## Parameter Description + + + + + + + + + + + +

Command

+

Parameter

+

Value Description

+

isula create/run

+

--ns-change-opt

+
  • Variable of the string type.
  • The parameter value can be net or ipc.

    net: All namespace parameters in the /proc/sys/net directory are supported.

    +

    ipc: Supported namespace parameters are as follows:

    +

    /proc/sys/kernel/msgmax

    +

    /proc/sys/kernel/msgmnb

    +

    /proc/sys/kernel/msgmni

    +

    /proc/sys/kernel/sem

    +

    /proc/sys/kernel/shmall

    +

    /proc/sys/kernel/shmmax

    +

    /proc/sys/kernel/shmmni

    +

    /proc/sys/kernel/shm_rmid_forced

    +

    /proc/sys/fs/mqueue/msg_default

    +

    /proc/sys/fs/mqueue/msg_max

    +

    /proc/sys/fs/mqueue/msgsize_default

    +

    /proc/sys/fs/mqueue/msgsize_max

    +

    /proc/sys/fs/mqueue/queues_max

    +
  • You can specify multiple namespace configurations and separate them with commas (,). For example, --ns-change-opt=net,ipc.
+
+ +## Constraints + +- If both **--privileged** \(privileged container\) and **--ns-change-opt** are specified during container startup, **--ns-change-opt** does not take effect. + +## Example + +Start a container and set **--ns-change-opt** to **net**. + +```shell +[root@localhost ~]# isula run -tid --ns-change-opt net --system-container --external-rootfs /root/myrootfs none init +4bf44a42b4a14fdaf127616c90defa64b4b532b18efd15b62a71cbf99ebc12d2 +[root@localhost ~]# isula exec -it 4b mount | grep /proc/sys +proc on /proc/sys type proc (ro,nosuid,nodev,noexec,relatime) +proc on /proc/sysrq-trigger type proc (ro,nosuid,nodev,noexec,relatime) +proc on /proc/sys/net type proc (rw,nosuid,nodev,noexec,relatime) +``` + +The mount point **/proc/sys/net** in the container has the **rw** option, indicating that the **net**-related namespace kernel parameters have the read and write permissions. + +Start another container and set **--ns-change-opt** to **ipc**. + +```shell +[root@localhost ~]# isula run -tid --ns-change-opt ipc --system-container --external-rootfs /root/myrootfs none init +c62e5e5686d390500dab2fa76b6c44f5f8da383a4cbbeac12cfada1b07d6c47f +[root@localhost ~]# isula exec -it c6 mount | grep /proc/sys +proc on /proc/sys type proc (ro,nosuid,nodev,noexec,relatime) +proc on /proc/sysrq-trigger type proc (ro,nosuid,nodev,noexec,relatime) +proc on /proc/sys/kernel/shmmax type proc (rw,nosuid,nodev,noexec,relatime) +proc on /proc/sys/kernel/shmmni type proc (rw,nosuid,nodev,noexec,relatime) +proc on /proc/sys/kernel/shmall type proc (rw,nosuid,nodev,noexec,relatime) +proc on /proc/sys/kernel/shm_rmid_forced type proc (rw,nosuid,nodev,noexec,relatime) +proc on /proc/sys/kernel/msgmax type proc (rw,nosuid,nodev,noexec,relatime) +proc on /proc/sys/kernel/msgmni type proc (rw,nosuid,nodev,noexec,relatime) +proc on /proc/sys/kernel/msgmnb type proc (rw,nosuid,nodev,noexec,relatime) +proc on /proc/sys/kernel/sem type proc (rw,nosuid,nodev,noexec,relatime) +proc on /proc/sys/fs/mqueue type proc (rw,nosuid,nodev,noexec,relatime) +``` + +The mount point information of **ipc**-related kernel parameters in the container contains the **rw** option, indicating that the **ipc**-related namespace kernel parameters have the read and write permissions. diff --git a/docs/en/docs/container_runtime/kuasar/_toc.yaml b/docs/en/docs/container_runtime/kuasar/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a4c04e860cecc50612e2fc8557082929bba59043 --- /dev/null +++ b/docs/en/docs/container_runtime/kuasar/_toc.yaml @@ -0,0 +1,12 @@ +label: Kuasar Multi_Sandbox Container Runtime +isManual: true +description: Kuasar enables unified management of diverse sandbox types and supports multiple leading sandbox isolation technologies used in the industry. +sections: +- label: Overview + href: ./overview.md +- label: Installation and Configuration + href: ./kuasar_install_config.md +- label: Usage Instructions + href: ./kuasar_usage.md +- label: Appendix + href: ./kuasar_appendix.md diff --git a/docs/en/docs/container_runtime/kuasar/figures/kuasar_arch.png b/docs/en/docs/container_runtime/kuasar/figures/kuasar_arch.png new file mode 100644 index 0000000000000000000000000000000000000000..69fbf889891c2f678590a929d1f91d5139569c27 Binary files /dev/null and b/docs/en/docs/container_runtime/kuasar/figures/kuasar_arch.png differ diff --git a/docs/en/docs/container_runtime/kuasar/kuasar_appendix.md b/docs/en/docs/container_runtime/kuasar/kuasar_appendix.md new file mode 100644 index 0000000000000000000000000000000000000000..655014ec6a25cc60f4c7cd7c1f9f5a74edb73e25 --- /dev/null +++ b/docs/en/docs/container_runtime/kuasar/kuasar_appendix.md @@ -0,0 +1,24 @@ +# Appendix + +Fields in the **/var/lib/kuasar/config_stratovirt.toml** configuration file: + +```conf +[sandbox] +log_level: Kuasar log level. The default value is info. + +[hypervisor] +path: path of the StratoVirt binary file +machine_type: the processor type to be simulated (virt for the Arm architecture and q35 for the x86 architecture) +kernel_path: execution path of the guest kernel +image_path: execution path of the guest image +initrd_path: execution path of the guest initrd (Configure either initrd_path or image_path.) +kernel_params: guest kernel parameters +vcpus: default number of vCPUs for each sandbox (default: 1) +memory_in_mb: default memory size of each sandbox (default: 1024 MiB) +block_device_driver: block device driver +debug: whether to enable debug mode +enable_mem_prealloc: whether to enable memory pre-allocation + +[hypervisor.virtiofsd_conf] +path: path of vhost_user_fs +``` diff --git a/docs/en/docs/container_runtime/kuasar/kuasar_install_config.md b/docs/en/docs/container_runtime/kuasar/kuasar_install_config.md new file mode 100644 index 0000000000000000000000000000000000000000..05bff40352fda500cca386c41368296ecdb3f4bc --- /dev/null +++ b/docs/en/docs/container_runtime/kuasar/kuasar_install_config.md @@ -0,0 +1,126 @@ +# Installation and Configuration + +## Installation + +### Prerequisites + +- To obtain better performance experience, Kuasar must run on bare metal servers. **Currently, Kuasar cannot run on VMs.** +- The running of Kuasar depends on the following openEuler components. Ensure that the dependent components of the required versions have been installed in the environment. + - iSulad (See [Installation and Configuration](../../container_engine/isula_container_engine/installation_configuration.md) of iSulad.) + - StratoVirt (See [Installing StratoVirt](https://docs.openeuler.org/en/docs/24.03_LTS_SP2/virtualization/virtualization_platform/stratovirt/install_stratovirt.html)) + +### Procedure + +1. The Kuasar deliverables are included in the **kuasar** RPM package. Run the `yum` command to directly install Kuasar. + + ```sh + yum install kuasar + ``` + +2. Install the CRI command line tool crictl required for starting sandboxes and containers. + + ```sh + # Arm environment + $ wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.25.0/crictl-v1.25.0-linux-arm64.tar.gz + $ tar -zxvf crictl-v1.25.0-linux-arm64.tar.gz -C /usr/local/bin + # x86 environment + $ wget https://github.com/kubernetes-sigs/cri-tools/releases/download/v1.25.0/crictl-v1.25.0-linux-amd64.tar.gz + $ tar -zxvf crictl-v1.25.0-linux-amd64.tar.gz -C /usr/local/bin + ``` + +3. Install the CNI plugins required for CRI to configure the network. + + ```sh + $ mkdir -p /opt/cni/bin && mkdir -p /etc/cni/net.d + + # Arm environment + $ wget https://github.com/containernetworking/plugins/releases/download/v1.3.0/cni-plugins-linux-arm64-v1.3.0.tgz + $ tar -zxvf cni-plugins-linux-arm64-v1.3.0.tgz -C /opt/cni/bin/ + # x86 environment + $ wget https://github.com/containernetworking/plugins/releases/download/v1.3.0/cni-plugins-linux-amd64-v1.3.0.tgz + $ tar -zxvf cni-plugins-linux-amd64-v1.3.0.tgz -C /opt/cni/bin/ + ``` + +## Configuration + +### Configuring iSulad + +Modify the iSulad configuration file **/etc/isulad/daemon.json** so that iSulad can invoke the container runtime of the Kuasar VMM type. Add the following information: + +```sh +$ cat /etc/isulad/daemon.json +... + "cri-sandboxers": { + "vmm": { + "name": "vmm", + "address": "/run/vmm-sandboxer.sock" + } + }, + "cri-runtimes": { + "vmm": "io.containerd.vmm.v1" + }, +... +``` + +Restart iSulad. + +```sh +systemctl restart isulad +``` + +### crictl Configuration + +Modify the crictl configuration file **/etc/crictl.yaml** to connect to iSulad. + +```sh +$ cat /etc/crictl.yaml +runtime-endpoint: unix:///var/run/isulad.sock +image-endpoint: unix:///var/run/isulad.sock +timeout: 10 +``` + +### Kuasar configuration + +Modify the configuration file to connect Kuasar to StratoVirt. (You can use the default configuration. For details about the fields in the configuration file, see [Appendix](./kuasar_appendix.md).) + +```sh +$ cat /var/lib/kuasar/config_stratovirt.toml +[sandbox] +log_level = "info" + +[hypervisor] +path = "/usr/bin/stratovirt" +machine_type = "virt,mem-share=on" +kernel_path = "/var/lib/kuasar/vmlinux.bin" +image_path = "" +initrd_path = "/var/lib/kuasar/kuasar.initrd" +kernel_params = "task.log_level=debug task.sharefs_type=virtiofs" +vcpus = 1 +memory_in_mb = 1024 +block_device_driver = "virtio-blk" +debug = true +enable_mem_prealloc = false + +[hypervisor.virtiofsd_conf] +path = "/usr/bin/vhost_user_fs" +``` + +Start the kuasar-vmm service. + +```sh +systemctl start kuasar-vmm +``` + +Check whether the service is running. + +```sh +$ systemctl status kuasar-vmm +● kuasar-vmm.service - Kuasar microVM type sandboxer daemon process + Loaded: loaded (/usr/lib/systemd/system/kuasar-vmm.service; disabled; vendor preset: disabled) + Active: active (running) since Sat 2023-08-26 14:57:08 CST; 1h 25min ago + Main PID: 1000445 (vmm-sandboxer) + Tasks: 99 (limit: 814372) + Memory: 226.4M + CGroup: /system.slice/kuasar-vmm.service + └─ 1000445 /usr/local/bin/vmm-sandboxer --listen /run/vmm-sandboxer.sock --dir /run/kuasar-vmm +``` diff --git a/docs/en/docs/container_runtime/kuasar/kuasar_usage.md b/docs/en/docs/container_runtime/kuasar/kuasar_usage.md new file mode 100644 index 0000000000000000000000000000000000000000..3b2e08746bd231ab2340c25ec580451b444ac3c2 --- /dev/null +++ b/docs/en/docs/container_runtime/kuasar/kuasar_usage.md @@ -0,0 +1,92 @@ +# Usage Instructions + +Start a Kuasar sandbox. + +1. Ensure that Kuasar and related components have been correctly installed and configured. + +2. Prepare the service container image. Assume that the container image is **busybox**. Use the iSula container engine to download the container image. + + ```sh + isula pull busybox + ``` + +3. Prepare the YAML files for the pod and container. The file examples are as follows: + + ```sh + $ cat podsandbox.yaml + metadata: + name: busybox-sandbox + namespace: default + uid: hdishd83djaidwnduwk28bcsc + log_directory: /tmp + linux: + namespaces: + options: {} + + $ cat pod-container.yaml + metadata: + name: busybox + image: + image: docker.io/library/busybox:latest + command: + - top + log_path: busybox.log + ``` + +4. Start a pod. + + ```sh + $ crictl runp --runtime=vmm podsandbox.yaml + 5cbcf744949d8500e7159d6bd1e3894211f475549c0be15d9c60d3c502c7ede3 + ``` + + Check the pod list. The pod is in the **Ready** state. + + ```sh + $ crictl pods + POD ID CREATED STATE NAME NAMESPACE ATTEMPT + 5cbcf744949d8 About a minute ago Ready busybox-sandbox default 1 + ``` + +5. Create a service container in the pod. + + ```sh + $ crictl create 5cbcf744949d8500e7159d6bd1e3894211f475549c0be15d9c60d3c502c7ede3 pod-container.yaml podsandbox.yaml + c11df540f913e57d1e28372334c028fd6550a2ba73208a3991fbcdb421804a50 + ``` + + View the container list. The container is in the **Created** state. + + ```sh + $ crictl ps -a + CONTAINER IMAGE CREATED STATE NAME ATTEMPT POD ID + c11df540f913e docker.io/library/busybox:latest 15 seconds ago Created busybox 0 5cbcf744949d + ``` + +6. Start the service container. + + ```sh + crictl start c11df540f913e57d1e28372334c028fd6550a2ba73208a3991fbcdb421804a50 + ``` + + Check the container list. The container is in the **Running** state. + + ```sh + $ crictl ps + CONTAINER IMAGE CREATED STATE NAME ATTEMPT POD ID + c11df540f913e docker.io/library/busybox:latest 2 minutes ago Running busybox 0 5cbcf744949d8 + ``` + + > [!NOTE]NOTE + > You can also run a `crictl run` command to start a pod with a service container. + + ```sh + crictl run -r vmm --no-pull container-config.yaml podsandbox-config.yaml + ``` + +7. Stop and delete the container and the pod. + + ```sh + crictl rm -f c11df540f913e + crictl rmp -f 5cbcf744949d8 + ``` diff --git a/docs/en/docs/container_runtime/kuasar/overview.md b/docs/en/docs/container_runtime/kuasar/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..0695de3ecb31327a1a12fba3b09e016cce5dc655 --- /dev/null +++ b/docs/en/docs/container_runtime/kuasar/overview.md @@ -0,0 +1,12 @@ +# Kuasar Multi-Sandbox Container Runtime + +## Overview + +Kuasar is a container runtime that supports unified management of multiple types of sandboxes. It supports multiple mainstream sandbox isolation technologies, including the kernel-based native container sandbox, lightweight virtualization technology-based microVM sandbox, application kernel sandbox based on process-level virtualization, and the emerging WebAssembly sandbox. +Based on the Kuasar unified container runtime combined with the iSulad container engine and StratoVirt virtualization engine, openEuler builds lightweight full-stack self-developed secure containers for cloud native scenarios, delivering key competitiveness of ultra-low overhead and ultra-fast startup. + +**Figure 1** Kuasar architecture +![](./figures/kuasar_arch.png) + +> [!NOTE]NOTE +> Root privileges are necessary for installing and using Kuasar. diff --git a/docs/en/docs/hybrid_deployment/oncn_bwm/_toc.yaml b/docs/en/docs/hybrid_deployment/oncn_bwm/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8b3ae7d564fe72d5233a3b0d0b26a0e57cd2f08 --- /dev/null +++ b/docs/en/docs/hybrid_deployment/oncn_bwm/_toc.yaml @@ -0,0 +1,6 @@ +label: oncn-bwm User Guide +isManual: true +description: Bandwidth management solution for pods in hybrid service environments. +sections: + - label: Overview + href: ./overview.md diff --git a/docs/en/docs/hybrid_deployment/oncn_bwm/overview.md b/docs/en/docs/hybrid_deployment/oncn_bwm/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..777bc9977db77904edb117c5965ad6803c17270d --- /dev/null +++ b/docs/en/docs/hybrid_deployment/oncn_bwm/overview.md @@ -0,0 +1,240 @@ +# oncn-bwm User Guide + +## Introduction + +With the rapid development of technologies such as cloud computing, big data, artificial intelligence, 5G, and the Internet of Things (IoT), data center construction becomes more and more important. However, the server resource utilization of the data center is very low, resulting in a huge waste of resources. To improve the utilization of server resources, oncn-bwm emerges. + +oncn-bwm is a pod bandwidth management tool applicable to hybrid deployment of offline services. It properly schedules network resources for nodes based on QoS levels to ensure online service experience and greatly improve the overall network bandwidth utilization of nodes. + +The oncn-bwm tool supports the following functions: + +- Enabling/Disabling/Querying pod bandwidth management +- Setting the pod network priority +- Setting the offline service bandwidth range and online service waterline +- Querying internal statistics + +## Installation + +### Environmental Requirements + +- Operating system: openEuler 24.03 LTS with the Yum repository of openEuler 24.03 LTS + +### Installation Procedure + +Run the following command: + +```shell +yum install oncn-bwm +``` + +## How to Use + +The oncn-bwm tool provides the `bwmcli` command line tool to enable pod bandwidth management or perform related configurations. The overall format of the `bwmcli` command is as follows: + +**bwmcli** \< option(s) > + +> Note: +> +> The root permission is required for running the `bwmcli` command. +> +> Pod bandwidth management is supported only in the outbound direction of a node (packets are sent from the node to other nodes). +> +> Pod bandwidth management cannot be enabled for NICs for which tc qdisc rules have been configured. +> +> Upgrading the oncn-bwm package does not affect the enabling status before the upgrade. Uninstalling the oncn-bwm package disables pod bandwidth management for all NICs. + +### Command Interfaces + +#### Pod Bandwidth Management + +##### Commands and Functions + +| Command Format | Function | +| --------------------------- | ------------------------------------------------------------ | +| **bwmcli -e** \ | Enables pod bandwidth management for a specified NIC.| +| **bwmcli -d** \ | Disables pod bandwidth management for a specified NIC.| +| **bwmcli -p devs** | Queries pod bandwidth management of all NICs on a node.| + +> Note: +> +> - If no NIC name is specified, the preceding commands take effect for all NICs on a node. +> +> - Enable pod bandwidth management before running other `bwmcli` commands. + +##### Examples + +- Enable pod bandwidth management for NICs eth0 and eth1. + + ```shell + # bwmcli -e eth0 -e eth1 + enable eth0 success + enable eth1 success + ``` + +- Disable pod bandwidth management for NICs eth0 and eth1. + + ```shell + # bwmcli -d eth0 -d eth1 + disable eth0 success + disable eth1 success + ``` + +- Query pod bandwidth management of all NICs on a node. + + ```shell + # bwmcli -p devs + eth0 : enabled + eth1 : disabled + eth2 : disabled + docker0 : disabled + lo : disabled + ``` + +#### Pod Network Priority + +##### Commands and Functions + +| Command Format | Function | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| **bwmcli -s** *path* \ | Sets the network priority of a pod. *path* indicates the cgroup path corresponding to the pod, and *prio* indicates the priority. The value of *path* can be a relative path or an absolute path. The default value of *prio* is **0**. The optional values are **0** and **-1**. The value **0** indicates online services, and the value **-1** indicates offline services.| +| **bwmcli -p** *path* | Queries the network priority of a pod. | + +> Note: +> +> Online and offline network priorities are supported. The oncn-bwm tool controls the bandwidth of pods in real time based on the network priority. The specific policy is as follows: For online pods, the bandwidth is not limited. For offline pods, the bandwidth is limited within the offline bandwidth range. + +##### Examples + +- Set the priority of the pod whose cgroup path is **/sys/fs/cgroup/net_cls/test_online** to **0**. + + ```shell + # bwmcli -s /sys/fs/cgroup/net_cls/test_online 0 + set prio success + ``` + +- Query the priority of the pod whose cgroup path is **/sys/fs/cgroup/net_cls/test_online**. + + ```shell + # bwmcli -p /sys/fs/cgroup/net_cls/test_online + 0 + ``` + +#### Offline Service Bandwidth Range + +| Command Format | Function | +| ---------------------------------- | ------------------------------------------------------------ | +| **bwmcli -s bandwidth** \ | Sets the offline bandwidth for a host or VM. **low** indicates the minimum bandwidth, and **high** indicates the maximum bandwidth. The unit is KB, MB, or GB, and the value range is \[1 MB, 9999 GB].| +| **bwmcli -p bandwidth** | Queries the offline bandwidth of a host or VM. | + +> Note: +> +> - All NICs with pod bandwidth management enabled on a host are considered as a whole, that is, the configured online service waterline and offline service bandwidth range are shared. +> +> - The pod bandwidth configured using `bwmcli` takes effect for all offline services on a node. The total bandwidth of all offline services cannot exceed the bandwidth range configured for the offline services. There is no network bandwidth limit for online services. +> +> - The offline service bandwidth range and online service waterline are used together to limit the offline service bandwidth. When the online service bandwidth is lower than the configured waterline, the offline services can use the configured maximum bandwidth. When the online service bandwidth is higher than the configured waterline, the offline services can use the configured minimum bandwidth. + +##### Examples + +- Set the offline bandwidth to 30 Mbit/s to 100 Mbit/s. + + ```shell + # bwmcli -s bandwidth 30mb,100mb + set bandwidth success + ``` + +- Query the offline bandwidth range. + + ```shell + # bwmcli -p bandwidth + bandwidth is 31457280(B),104857600(B) + ``` + +#### Online Service Waterline + +##### Commands and Functions + +| Command Format | Function | +| ---------------------------------------------- | ------------------------------------------------------------ | +| **bwmcli -s waterline** \ | Sets the online service waterline for a host or VM. *val* indicates the waterline value. The unit is KB, MB, or GB, and the value range is [20 MB, 9999 GB].| +| **bwmcli -p waterline** | Queries the online service waterline of a host or VM. | + +> Note: +> +> - When the total bandwidth of all online services on a host is higher than the waterline, the bandwidth that can be used by offline services is limited. When the total bandwidth of all online services on a host is lower than the waterline, the bandwidth that can be used by offline services is increased. +> - The system determines whether the total bandwidth of online services exceeds or is lower than the configured waterline every 10 ms. Then the system determines the bandwidth limit for offline services based on whether the online bandwidth collected within each 10 ms is higher than the waterline. + +##### Examples + +- Set the online service waterline to 20 MB. + + ```shell + # bwmcli -s waterline 20mb + set waterline success + ``` + +- Query the online service waterline. + + ```shell + # bwmcli -p waterline + waterline is 20971520(B) + ``` + +#### Statistics + +##### Commands and Functions + +| Command Format | Function | +| ------------------- | ------------------ | +| **bwmcli -p stats** | Queries internal statistics.| + +> Note: +> +> - **offline_target_bandwidth**: target bandwidth for offline services. +> +> - **online_pkts**: total number of online service packets after pod bandwidth management is enabled. +> +> - **offline_pkts**: total number of offline service packets after pod bandwidth management is enabled. +> +> - **online_rate**: current online service rate. +> +> - **offline_rate**: current offline service rate. + +##### Examples + +Query internal statistics. + +```shell +# bwmcli -p stats +offline_target_bandwidth: 2097152 +online_pkts: 2949775 +offline_pkts: 0 +online_rate: 602 +offline_rate: 0 +``` + +### Typical Use Case + +To configure pod bandwidth management on a node, perform the following steps: + +```shell +bwmcli -p devs #Query the pod bandwidth management status of the NICs in the system. +bwmcli -e eth0 # Enable pod bandwidth management for the eth0 NIC. +bwmcli -s /sys/fs/cgroup/net_cls/online 0 # Set the network priority of the online service pod to 0 +bwmcli -s /sys/fs/cgroup/net_cls/offline -1 # Set the network priority of the offline service pod to -1. +bwmcli -s bandwidth 20mb,1gb # Set the bandwidth range for offline services. +bwmcli -s waterline 30mb # Set the waterline for online services. +``` + +### Constraints + +1. Only the **root** user is allowed to run the bwmcli command. +2. Currently, this feature supports only two network QoS priorities: offline and online. +3. If the tc qdisc rules have been configured for a NIC, the network QoS function will fail to be enabled for the NIC. +4. After a NIC is removed and then inserted, the original QoS rules will be lost. In this case, you need to manually reconfigure the network QoS function. +5. When you run one command to enable or disable multiple NICs at the same time, if any NIC fails to be operated, operations on subsequent NICs will be stopped. +6. When SELinux is enabled in the environment, if the SELinux policy is not configured for the bwmcli program, some commands (such as setting or querying the waterline, bandwidth, and priority) may fail. You can confirm the failure in SELinux logs. To solve this problem, disable SELinux or configure the SELinux policy for the bwmcli program. +7. Upgrading the software package does not change the enabling status before the upgrade. Uninstalling the software package disables the function for all devices. +8. The NIC name can contain only digits, letters, hyphens (-), and underscores (_). NICs whose names contain other characters cannot be identified. +9. In actual scenarios, bandwidth limiting may cause protocol stack memory overstock. In this case, backpressure depends on transport-layer protocols. For protocols that do not have backpressure mechanisms, such as UDP, packet loss, ENOBUFS, and rate limiting deviation may occur. +10. After using bwmcli to enable the network QoS function of a certain network card, the tc command cannot be used to modify the tc rules of the network card. Otherwise, it may affect the network QoS function of the network card, leading to abnormal functionality. diff --git a/docs/en/docs/hybrid_deployment/rubik/_toc.yaml b/docs/en/docs/hybrid_deployment/rubik/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a71db39df01a0af53e0d68b734b4257402aee179 --- /dev/null +++ b/docs/en/docs/hybrid_deployment/rubik/_toc.yaml @@ -0,0 +1,16 @@ +label: Rubik User Guide +isManual: true +description: QoS-based resource allocation in hybrid service deployment scenarios +sections: + - label: Overview + href: ./overview.md + - label: Installation and Deployment + href: ./installation_and_deployment.md + - label: Feature Introduction + href: ./feature_introduction.md + - label: Configuration + href: configuration.md + - label: Example of Isolation for Hybrid Deployed Services + href: ./example_of_isolation_for_hybrid_deployed_services.md + - label: Appendix + href: ./appendix.md diff --git a/docs/en/docs/hybrid_deployment/rubik/appendix.md b/docs/en/docs/hybrid_deployment/rubik/appendix.md new file mode 100644 index 0000000000000000000000000000000000000000..ddd2e0c7d32ca323ebc3ce12a8a0784f3755197c --- /dev/null +++ b/docs/en/docs/hybrid_deployment/rubik/appendix.md @@ -0,0 +1,252 @@ +# Appendix + +## DaemonSet Configuration Template + +```yaml +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: rubik +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["list", "watch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: rubik +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: rubik +subjects: + - kind: ServiceAccount + name: rubik + namespace: kube-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: rubik + namespace: kube-system +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: rubik-config + namespace: kube-system +data: + config.json: | + { + "agent": { + "logDriver": "stdio", + "logDir": "/var/log/rubik", + "logSize": 1024, + "logLevel": "info", + "cgroupRoot": "/sys/fs/cgroup", + "enabledFeatures": [ + "preemption" + ] + }, + "preemption": { + "resource": [ + "cpu" + ] + } + } +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: rubik-agent + namespace: kube-system + labels: + k8s-app: rubik-agent +spec: + selector: + matchLabels: + name: rubik-agent + template: + metadata: + namespace: kube-system + labels: + name: rubik-agent + spec: + serviceAccountName: rubik + hostPID: true + containers: + - name: rubik-agent + image: hub.oepkgs.net/cloudnative/rubik:latest + imagePullPolicy: IfNotPresent + env: + - name: RUBIK_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + capabilities: + add: + - SYS_ADMIN + resources: + limits: + memory: 200Mi + requests: + cpu: 100m + memory: 200Mi + volumeMounts: + - name: rubiklog + mountPath: /var/log/rubik + readOnly: false + - name: runrubik + mountPath: /run/rubik + readOnly: false + - name: sysfs + mountPath: /sys/fs + readOnly: false + - name: devfs + mountPath: /dev + readOnly: false + - name: config-volume + mountPath: /var/lib/rubik + terminationGracePeriodSeconds: 30 + volumes: + - name: rubiklog + hostPath: + path: /var/log/rubik + - name: runrubik + hostPath: + path: /run/rubik + - name: sysfs + hostPath: + path: /sys/fs + - name: devfs + hostPath: + path: /dev + - name: config-volume + configMap: + name: rubik-config + items: + - key: config.json + path: config.json +``` + +## Dockerfile Template + +```dockerfile +FROM scratch +COPY ./build/rubik /rubik +ENTRYPOINT ["/rubik"] +``` + +## Image Build Script + +```bash +#!/bin/bash +set -e + +CURRENT_DIR=$(cd "$(dirname "$0")" && pwd) +BINARY_NAME="rubik" + +RUBIK_FILE="${CURRENT_DIR}/build/rubik" +DOCKERFILE="${CURRENT_DIR}/Dockerfile" +YAML_FILE="${CURRENT_DIR}/rubik-daemonset.yaml" + +# Get version and release number of rubik binary +VERSION=$(${RUBIK_FILE} -v | grep ^Version | awk '{print $NF}') +RELEASE=$(${RUBIK_FILE} -v | grep ^Release | awk '{print $NF}') +IMG_TAG="${VERSION}-${RELEASE}" + +# Get rubik image name and tag +IMG_NAME_AND_TAG="${BINARY_NAME}:${IMG_TAG}" + +# Build container image for rubik +docker build -f "${DOCKERFILE}" -t "${IMG_NAME_AND_TAG}" "${CURRENT_DIR}" + +echo -e "\n" +# Check image existence +docker images | grep -E "REPOSITORY|${BINARY_NAME}" + +# Modify rubik-daemonset.yaml file, set rubik image name +sed -i "/image:/s/:.*/: ${IMG_NAME_AND_TAG}/" "${YAML_FILE}" +``` + +## Communication Matrix + +- The Rubik service process communicates with the Kubernetes API server as a client through the list-watch mechanism to obtain information about Pods. + +|Source IP Address|Source Port|Destination IP Address|Destination Port|Protocol|Port Description|Listening Port Modifiable|Authentication Method| +|----|----|----|----|----|----|----|----| +|Rubik node|32768-61000|api-server node|443|TCP|Kubernetes external resource port |No|Token| + +## File Permissions + +- All Rubik operations require root permissions. + +- Related file permissions are as follows: + +|Path|Permissions|Description| +|----|----|----| +|/var/lib/rubik|750|Directory generated after the RPM package is installed, which stores Rubik-related files| +|/var/lib/rubik/build|550|Directory for storing the Rubik binary file| +|/var/lib/rubik/build/rubik|550|Rubik binary file| +|/var/lib/rubik/rubik-daemonset.yaml|550|Rubik DaemonSet configuration template to be used for Kubernetes deployment| +|/var/lib/rubik/Dockerfile|640|Dockerfile template| +|/var/lib/rubik/build_rubik_image.sh|550|Rubik container image build script.| +|/var/log/rubik|640|Directory for storing Rubik log files (requires logDriver=file)| +|/var/log/rubik/rubik.log*|600|Rubik log files| + +## Constraints + +### Specifications + +- Drive: More than 1 GB + +- Memory: More than 100 MB + +## Runtime + +- Only one Rubik instance can exist on a Kubernetes node. + +- Rubik cannot take any CLI parameters. Rubik will fail to be started if any CLI parameter is specified. + +- When the Rubik process is in the T (TASK_STOPPED or TASK_TRACED) OR D (TASK_UNINTERRUPTIBLE) state, the server is unavailable and does not respond. The service becomes available after the process recovers from the abnormal state. + +### Pod Priorities + +- Pod priorities cannot be raised. If the priority of service A is changed from -1 to 0, Rubik will report an error. + +- Adding or modifying annotations or re-applying Pod YAML configuration file does not trigger Pod rebuild. Rubik senses changes in Pod annotations through the list-watch mechanism. + +- After an online service is moved to the offline group, do not move it back to the online group, otherwise QoS exception may occur. + +- Do not add important system services and kernel processes to the offline group. Otherwise, they cannot be scheduled timely, causing system errors. + +- Online and offline configurations for the CPU and memory must be consistent to avoid QoS conflicts between the two subsystems. + +- In the scenario of hybrid service deployment, the original CPU share mechanism is restricted: + - When both online and offline services run on a CPU, the CPU share of the offline service does not take effect. + - If only an online or offline service runs on a CPU, its CPU share takes effect. + - You are advised to set the Pod priority of the offline service to BestEffort. + +- Priority inversion of user-mode processes, SMT, cache, NUMA load balancing, and offline service load balancing are not supported. + +### Other + +To prevent data inconsistency, do not manually modify cgroup or resctrl parameters of the pods, including: + +- CPU cgroup directory, such as **/sys/fs/cgroup/cpu/kubepods/burstable/\/\** + - cpu.qos_level + - cpu.cfs_burst_us + +- memory cgroup directory, such as **/sys/fs/cgroup/memory/kubepods/burstable/\/\** + - memory.qos_level + - memory.soft_limit_in_bytes + - memory.force_empty + - memory.limit_in_bytes + - memory.high + +- RDT cgroup directory, such as **/sys/fs/resctrl** diff --git a/docs/en/docs/hybrid_deployment/rubik/configuration.md b/docs/en/docs/hybrid_deployment/rubik/configuration.md new file mode 100644 index 0000000000000000000000000000000000000000..c614d4a4317d23ba172824c59173b379de77daa5 --- /dev/null +++ b/docs/en/docs/hybrid_deployment/rubik/configuration.md @@ -0,0 +1,228 @@ +# Rubik Configuration Description + +The Rubik program is written in Go and compiled into a static executable file to minimize the coupling with the system. + +## Commands + +Besides the `-v` option for querying version information, Rubik does not support other options. The following is an example of version query output: + +```bash +$ ./rubik -v +Version: 2.0.1 +Release: 2.oe2403sp1 +Go Version: go1.22.1 +Git Commit: bcaace8 +Built: 2024-12-10 +OS/Arch: linux/amd64 +``` + +## Configuration + +When the Rubik binary file is executed, Rubik parses configuration file **/var/lib/rubik/config.json**. + +> [!NOTE]Note +> +> Custom configuration file path is currently not supported to avoid confusion. +> When Rubik runs as a Daemonset in a Kubernetes cluster, modify the ConfigMap in the **hack/rubik-daemonset.yaml** file to configure Rubik. + +The configuration file is in JSON format and keys are in lower camel case. + +An example configuration file is as follows: + +```json +{ + "agent": { + "logDriver": "stdio", + "logDir": "/var/log/rubik", + "logSize": 2048, + "logLevel": "info", + "cgroupRoot": "/sys/fs/cgroup", + "enabledFeatures": [ + "preemption", + "dynCache", + "ioLimit", + "ioCost", + "quotaBurst", + "quotaTurbo", + "psi", + "cpuevict", + "memoryevict" + ] + }, + "preemption": { + "resource": [ + "cpu", + "memory" + ] + }, + "quotaTurbo": { + "highWaterMark": 50, + "syncInterval": 100 + }, + "dynCache": { + "defaultLimitMode": "static", + "adjustInterval": 1000, + "perfDuration": 1000, + "l3Percent": { + "low": 20, + "mid": 30, + "high": 50 + }, + "memBandPercent": { + "low": 10, + "mid": 30, + "high": 50 + } + }, + "ioCost": [ + { + "nodeName": "k8s-single", + "config": [ + { + "dev": "sdb", + "enable": true, + "model": "linear", + "param": { + "rbps": 10000000, + "rseqiops": 10000000, + "rrandiops": 10000000, + "wbps": 10000000, + "wseqiops": 10000000, + "wrandiops": 10000000 + } + } + ] + } + ], + "psi": { + "interval": 10, + "resource": [ + "cpu", + "memory", + "io" + ], + "avg10Threshold": 5.0 + }, + "cpuevict": { + "threshold": 60, + "interval": 1, + "windows": 2, + "cooldown": 20 + }, + "memoryevict": { + "threshold": 60, + "interval": 1, + "cooldown": 4 + } +} +``` + +Rubik configuration items include common items and feature items. Common items are under the **agent** section and are applied globally. Feature items are applied to sub-features that are enabled in the **enabledFeatures** field under **agent**. + +### agent + +The **agent** section stores common configuration items related to Rubik running, such as log configurations and cgroup mount points. + +| Key\[=Default Value] | Type | Description | Example Value | +| ------------------------- | ---------- | -------------------------------------- | --------------------------- | +| logDriver=stdio | string | Log driver, which can be the standard I/O or file | stdio, file | +| logDir=/var/log/rubik | string | Log directory | Anu readable and writable directory | +| logSize=1024 | int | Total size of logs in MB when logDriver=file | \[10, $2^{20}$] | +| logLevel=info | string | Log level | debug,info,warn,error | +| cgroupRoot=/sys/fs/cgroup | string | Mount point of the system cgroup | Mount point of the system cgroup | +| enabledFeatures=\[] | string array | List of Rubik features to be enabled | Rubik features. see [Feature Introduction](./feature_introduction.md) for details. | + +### preemption + +The **preemption** field stores configuration items of the absolute preemption feature, including CPU and memory preemption. You can configure this field to use either or both of CPU and memory preemption. + +| Key\[=Default Value] | Type | Description | Example Value | +| --------------- | ---------- | -------------------------------- | ----------- | +| resource=\[] | string array | Resource type to be accessed | cpu, memory | + +### dynCache + +The **dynCache** field stores configuration items related to pod memory bandwidth and last-level cache (LLC) limits. **l3Percent** indicates the watermarks of each LLC level. **memBandPercent** indicates watermarks of memory bandwidth in MB. + +| Key\[=Default Value] | Type | Description | Example Value | +| ----------------------- | ------ | ----------------------------------------------------------------- | --------------- | +| defaultLimitMode=static | string | dynCache control mode | static, dynamic | +| adjustInterval=1000 | int | Interval for dynCache control, in milliseconds | \[10, 10000] | +| perfDuration=1000 | int | perf execution duration for dynCache, in milliseconds | \[10, 10000] | +| l3Percent | map | Watermarks of each L3 cache level of dynCache in percents | | +| .low=20 | int | Watermark of the low L3 cache level | \[10, 100] | +| .mid=30 | int | Watermark of the middle L3 cache level | \[low, 100] | +| .high=50 | int | Watermark of the high L3 cache level | \[mid, 100] | +| memBandPercent | map | Watermarks of each memory bandwidth level of dynCache in percents | | +| .low=10 | int | Watermark of the low bandwidth level in MB | \[10, 100] | +| .mid=30 | int | Watermark of the middle bandwidth level in MB | \[low, 100] | +| .high=50 | int | Watermark of the high bandwidth level in MB | \[mid, 100] | + +### quotaTurbo + +The **quotaTurbo** field stores configuration items of the user-mode elastic traffic limiting feature. + +| Key\[=Default Value] | Type | Description | Example Value | +| ----------------- | ------ | -------------------------------- | -------------------- | +| highWaterMark=60 | int | High watermark of CPU load |\[0, alarmWaterMark) | +| alarmWaterMark=80 | int | Alarm watermark of CPU load | (highWaterMark,100\] | +| syncInterval=100 | int | Interval for triggering container quota updates, in milliseconds | \[100,10000] | + +### ioCost + +The **ioCost** field stores configuration items of the iocost-based I/O weight control feature. The field is an array whose elements are names of nodes (**nodeName**) and their device configuration arrays (**config**). + +| Key | Type | Description | Example Value | +| ----------------- | ------ | -------------------------------- | -------------------- | +| nodeName | string | Node name | Kubernetes cluster node name | +| config | array | Configurations of a block device | / | + +**config** parameters of a block device: + +| Key\[=Default Value] | Type | Description | Example Value | +| --------------- | ------ | --------------------------------------------- | -------------- | +| dev | string | Physical block device name | / | +| model | string | iocost model | linear | +| param | / | Device parameters specific to the model | / | + +For the **linear** model, the **param** field supports the following parameters: + +| Key\[=Default Value] | Type | Description | Example Value | +| --------------- | ---- | ---- | ------ | +| rbps | int64 | Maximum read bandwidth | (0, $2^{63}$) | +| rseqiops | int64 | Maximum sequential read IOPS | (0, $2^{63}$) | +| rrandiops | int64 | Maximum random read IOPS | (0, $2^{63}$) | +| wbps | int64 | Maximum write bandwidth | (0, $2^{63}$) | +| wseqiops | int64 | Maximum sequential write IOPS | (0, $2^{63}$) | +| wrandiops | int64 | Maximum random write IOPS | (0, $2^{63}$) | + +### psi + +The **psi** field stores configuration items of the PSI-based interference detection feature. This feature can monitor CPUs, memory, and I/O resources.You can configure this field to monitor the PSI of any or all of the resources. + +| Key\[=Default Value] | Type | Description | Example Value | +| --------------- | ---------- | -------------------------------- | ----------- | +| interval=10 |int|Interval for PSI monitoring, in seconds| \[10,30]| +| resource=\[] | string array | Resource type to be accessed | cpu, memory, io | +| avg10Threshold=5.0 | float | Average percentage of blocking time of a job in 10 seconds. If this threshold is reached, offline services are evicted. | \[5.0,100]| + +### CPU Eviction Watermark Control + +The **cpuevict** field is used to configure CPU eviction watermark control. This feature collects the node CPU utilization at specified intervals and calculates the average CPU utilization over a defined window. If the average CPU utilization exceeds the eviction watermark, offline Pods are evicted. Once Rubik evicts offline Pods, no further evictions occur during the cooldown period. + +| Key\[=Default Value] | Type | Description | Example Value | +|----------------------|---------|-----------------------------------------------------------------------------|----------------------| +| `threshold=60` | int | Threshold for average CPU utilization (%). If exceeded, offline Pods are evicted. | \[1, 99] | +| `interval=1` | int | Interval (in seconds) for collecting node CPU utilization. | \[1, 3600] | +| `windows=2` | int | Window period (in seconds) for calculating the average CPU utilization. The window must be greater than the interval. If not set, the window defaults to twice the interval. | \[1, 3600] | +| `cooldown=20` | int | Cooldown period (in seconds). No evictions occur during this period after an eviction. | \[1, 9223372036854775806] | + +### Memory Eviction Watermark Control + +The **memoryevict** field is used to configure memory eviction watermark control. This feature collects the node memory utilization at specified intervals. If the memory utilization exceeds the eviction watermark, offline Pods are evicted. Once Rubik evicts offline Pods, no further evictions occur during the cooldown period. + +| Key\[=Default Value] | Type | Description | Example Value | +|----------------------|---------|-----------------------------------------------------------------------------|----------------------| +| `threshold` | int | Threshold for memory utilization (%). If exceeded, offline Pods are evicted. If not specified, this feature is disabled. | \[1, 99] | +| `interval=1` | int | Interval (in seconds) for collecting node memory utilization. | \[1, 3600] | +| `cooldown=4` | int | Cooldown period (in seconds). No evictions occur during this period after an eviction. | \[1, 9223372036854775806] | diff --git a/docs/en/docs/hybrid_deployment/rubik/example_of_isolation_for_hybrid_deployed_services.md b/docs/en/docs/hybrid_deployment/rubik/example_of_isolation_for_hybrid_deployed_services.md new file mode 100644 index 0000000000000000000000000000000000000000..72d659bbfd5a27b20cb6bf75aa5e1dc57af4331c --- /dev/null +++ b/docs/en/docs/hybrid_deployment/rubik/example_of_isolation_for_hybrid_deployed_services.md @@ -0,0 +1,230 @@ +# Example of Isolation for Hybrid Deployed Services + +## Environment Preparation + +Check whether the kernel supports isolation of hybrid deployed services. + +```bash +# Check whether isolation of hybrid deployed services is enabled in the /boot/config- system configuration. +# If CONFIG_QOS_SCHED=y, the function is enabled. Example: +cat /boot/config-5.10.0-60.18.0.50.oe2203.x86_64 | grep CONFIG_QOS +CONFIG_QOS_SCHED=y +``` + +Install the Docker engine. + +```bash +yum install -y docker-engine +docker version +# The following shows the output of docker version. +Client: + Version: 18.09.0 + EulerVersion: 18.09.0.325 + API version: 1.39 + Go version: go1.17.3 + Git commit: ce4ae23 + Built: Mon Jun 26 12:56:54 2023 + OS/Arch: linux/arm64 + Experimental: false + +Server: + Engine: + Version: 18.09.0 + EulerVersion: 18.09.0.325 + API version: 1.39 (minimum version 1.12) + Go version: go1.17.3 + Git commit: ce4ae23 + Built: Mon Jun 26 12:56:10 2023 + OS/Arch: linux/arm64 + Experimental: false +``` + +## Hybrid Deployed Services + +**Online Service ClickHouse** + +Use the clickhouse-benchmark tool to test the performance and collect statistics on performance metrics such as QPS, P50, P90, and P99. For details, see . + +**Offline Service Stress** + +Stress is a CPU-intensive test tool. You can specify the **--cpu** option to start multiple concurrent CPU-intensive tasks to increase the stress on the system. + +## Usage Instructions + +1) Start a ClickHouse container (online service). + +2) Access the container and run the **clickhouse-benchmark** command. Set the number of concurrent queries to **10**, the number of queries to **10000**, and time limit to **30**. + +3) Start a Stress container (offline service) at the same time and concurrently execute 10 CPU-intensive tasks to increase the stress on the environment. + +4) After the **clickhouse-benchmark** command is executed, a performance test report is generated. + +The **test_demo.sh** script for the isolation test for hybrid deployed services is as follows: + +```bash +#!/bin/bash + +with_offline=${1:-no_offline} +enable_isolation=${2:-no_isolation} +stress_num=${3:-10} +concurrency=10 +timeout=30 +output=/tmp/result.json +online_container= +offline_container= + +exec_sql="echo \"SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000\" | clickhouse-benchmark -i 10000 -c $concurrency -t $timeout" + +function prepare() { + echo "Launch clickhouse container." + online_container=$(docker run -itd \ + -v /tmp:/tmp:rw \ + --ulimit nofile=262144:262144 \ + -p 34424:34424 \ + yandex/clickhouse-server) + + sleep 3 + echo "Clickhouse container launched." +} + +function clickhouse() { + echo "Start clickhouse benchmark test." + docker exec $online_container bash -c "$exec_sql --json $output" + echo "Clickhouse benchmark test done." +} + +function stress() { + echo "Launch stress container." + offline_container=$(docker run -itd joedval/stress --cpu $stress_num) + echo "Stress container launched." + + if [ $enable_isolation == "enable_isolation" ]; then + echo "Set stress container qos level to -1." + echo -1 > /sys/fs/cgroup/cpu/docker/$offline_container/cpu.qos_level + fi +} + +function benchmark() { + if [ $with_offline == "with_offline" ]; then + stress + sleep 3 + fi + clickhouse + echo "Remove test containers." + docker rm -f $online_container + docker rm -f $offline_container + echo "Finish benchmark test for clickhouse(online) and stress(offline) colocation." + echo "===============================clickhouse benchmark==================================================" + cat $output + echo "===============================clickhouse benchmark==================================================" +} + +prepare +benchmark +``` + +## Test Results + +Independently execute the online service ClickHouse. + +```bash +sh test_demo.sh no_offline no_isolation +``` + +The baseline QoS data (QPS/P50/P90/P99) of the online service is as follows: + +```json +{ + "localhost:9000": { + "statistics": { + "QPS": 1.8853412284364512, + ...... + } + }, + "query_time_percentiles": { + ...... + "50": 0.484905256, + "60": 0.519641313, + "70": 0.570876148, + "80": 0.632544937, + "90": 0.728295525, + "95": 0.808700418, + "99": 0.873945121, + ...... + } +} +``` + +Execute the **test_demo.sh** script to start the offline service Stress and run the test with the isolation function disabled. + +```bash +# **with_offline** indicates that the offline service Stress is enabled. +# **no_isolation** indicates that isolation of hybrid deployed services is disabled. +sh test_demo.sh with_offline no_isolation +``` + +**When isolation of hybrid deployed services is disabled**, the QoS data (QPS/P80/P90/P99) of the ClickHouse service is as follows: + +```json +{ + "localhost:9000": { + "statistics": { + "QPS": 0.9424028693636205, + ...... + } + }, + "query_time_percentiles": { + ...... + "50": 0.840476774, + "60": 1.304607373, + "70": 1.393591017, + "80": 1.41277543, + "90": 1.430316688, + "95": 1.457534764, + "99": 1.555646855, + ...... + } +} +``` + +Execute the **test_demo.sh** script to start the offline service Stress and run the test with the isolation function enabled. + +```bash +# **with_offline** indicates that the offline service Stress is enabled. +# **enable_isolation** indicates that isolation of hybrid deployed services is enabled. +sh test_demo.sh with_offline enable_isolation +``` + +**When isolation of hybrid deployed services is enabled**, the QoS data (QPS/P80/P90/P99) of the ClickHouse service is as follows: + +```json +{ + "localhost:9000": { + "statistics": { + "QPS": 1.8825798759270718, + ...... + } + }, + "query_time_percentiles": { + ...... + "50": 0.485725185, + "60": 0.512629901, + "70": 0.55656488, + "80": 0.636395956, + "90": 0.734695906, + "95": 0.804118275, + "99": 0.887807409, + ...... + } +} +``` + +The following table lists the test results. + +| Service Deployment Mode | QPS | P50 | P90 | P99 | +| -------------------------------------- | ------------- | ------------- | ------------- | ------------- | +| ClickHouse (baseline) | 1.885 | 0.485 | 0.728 | 0.874 | +| ClickHouse + Stress (isolation disabled)| 0.942 (-50%) | 0.840 (-42%) | 1.430 (-49%) | 1.556 (-44%) | +| ClickHouse + Stress (isolation enabled) | 1.883 (-0.11%) | 0.486 (-0.21%) | 0.735 (-0.96%) | 0.888 (-1.58%) | + +When isolation of hybrid deployed services is disabled, the QPS of ClickHouse decreases from approximately 1.9 to 0.9, the service response delay (P90) increases from approximately 0.7s to 1.4s, and the QoS decreases by about 50%. When isolation of hybrid deployed services is enabled, the QPS and response delay (P50/P90/P99) of ClickHouse decrease by less than 2% compared with the baseline, and the QoS remains unchanged. diff --git a/docs/en/docs/hybrid_deployment/rubik/feature_introduction.md b/docs/en/docs/hybrid_deployment/rubik/feature_introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..1bcd6e6d9ad874010a9d22555586c01c3829477d --- /dev/null +++ b/docs/en/docs/hybrid_deployment/rubik/feature_introduction.md @@ -0,0 +1,503 @@ +# Feature Introduction + +## Absolute Preemption + +Rubik allows you to configure priorities of services. In the hybrid deployment of online and offline services, Rubik ensures that online services preempt resources. CPU and memory resources can be preempted. + +You can enable preemption using the following configuration: + +```yaml +... + "agent": { + "enabledFeatures": [ + "preemption" + ] + }, + "preemption": { + "resource": [ + "cpu", + "memory" + ] + } +... +``` + +For details, see [Configuration Description](./configuration.md#preemption). + +In addition, you need to add **volcano.sh/preemptable** to the YAML annotation of the pod to specify service priorities. For example: + +```yaml +annotations: + volcano.sh/preemptable: true +``` + +> [!NOTE]Note +> +> The **volcano.sh/preemptable** annotation is used by all Rubik features to identify whether the service is online or offline. **true** indicates an online service. **false** indicates an offline service. + +### CPU Absolute Preemption + +**Prerequisites** + +- The kernel supports CPU priority configuration based on control groups (cgroups). The CPU subsystem provides the **cpu.qos_level** interface. The kernel of openEuler 22.03 or later is recommended. + +**Kernel interface** + +- The interface exists in the cgroup of the container in the **/sys/fs/cgroup/cpu** directory, for example, **/sys/fs/cgroup/cpu/kubepods/burstable/\/\**. + - **cpu.qos_level**: enables the CPU priority configuration. The value can be **0** or **-1**, with **0** being the default. + - **0** indicates an online service. + - **1** indicates an offline service. + +### Memory Absolute Preemption + +In the hybrid deployment of online and offline services, Rubik ensures that offline services are first terminated in the case of out-of-memory (OOM). + +**Prerequisites** + +- The kernel supports memory priority configuration based on cgroups. The memory subsystem provides the **memory.qos_level** interface. The kernel of openEuler 22.03 or later is recommended. +- To enable the memory priority feature, run `echo 1 > /proc/sys/vm/memcg_qos_enable`. + +**Kernel interface** + +- **/proc/sys/vm/memcg_qos_enable**: enables the memory priority feature. The value can be **0** or **1**, with **0** being the default. You can run `echo 1 > /proc/sys/vm/memcg_qos_enable` to enable the feature. + - **0**: The feature is disabled. + - **1**: The feature is enabled. + +- The interface exists in the cgroup of the container in the **/sys/fs/cgroup/memory** directory, for example, **/sys/fs/cgroup/memory/kubepods/burstable/\/\**. + - **memory.qos_level**: enables the memory priority configuration. The value can be **0** or **-1**, with **0** being the default. + - **0** indicates an online service. + - **1** indicates an offline service. + +## dynCache Memory Bandwidth and L3 Cache Access Limit + +Rubik can limit pod memory bandwidth and L3 cache access for offline services to reduce the impact on online services. + +**Prerequisites** + +- The cache access and memory bandwidth limit feature supports only physical machines. + - For x86 physical machines, the CAT and MBA functions of Intel RDT must be enabled in the OS by adding **rdt=l3cat,mba** to the kernel command line parameters (**cmdline**). + - For ARM physical machines, the MPAM function must be enabled in the OS by adding **mpam=acpi** to the kernel command line parameters (**cmdline**). + +- Due to kernel restrictions, RDT does not support the pseudo-locksetup mode. +- You need to manually mount **/sys/fs/resctrl**. Rubik reads and sets files in the **/sys/fs/resctrl** directory. This directory must be mounted before Rubik is started and cannot be unmounted during Rubik running. +- Rubik requires the **SYS_ADMIN** capability. To set files in the **/sys/fs/resctrl** directory on the host, the **SYS_ADMIN** capability must be assigned to the Rubik container. +- Rubik obtains the PID of the service container process on the host. Therefore, the Rubik container needs to share the PID namespace with the host. + +**Rubik RDT Cgroups** + +Rubik creates five cgroups (**rubik_max**, **rubik_high**, **rubik_middle**, **rubik_low** and **rubik_dynamic**) in the RDT resctrl directory (**/sys/fs/resctrl** by default). Rubik writes the watermarks to the **schemata** file of each corresponding cgroup upon startup. The low, middle, and high watermarks can be configured in **dynCache**. The max cgroup uses the default maximum value. The initial watermark of the dynamic cgroup is the same as that of the low cgroup. + +**Rubik dynamic Cgroup** + +When offline pods whose cache level is dynamic exist, Rubik collects the cache miss and LLC miss metrics of online service pods on the current node and adjusts the watermark of the rubik_dynamic cgroup. In this way, Rubik dynamically controls offline service pods in the dynamic cgroup. + +### Memory Bandwidth and LLC Limit of the Pod + +Rubik allows you to configure the memory bandwidth and LLC cgroup for a service pod in either of the following ways: + +- Global annotation + You can set **defaultLimitMode** in the global parameters of Rubik. Rubik automatically configures cgroups for offline service pods (marked by the **volcano.sh/preemptable** annotation in the absolute preemption configuration). + - If the value is **static**, the pod is added to the **rubik_max** cgroup. + - If the value is **dynamic**, the pod is added to the **rubik_dynamic** cgroup. +- Manual annotation + - You can set the cache level for a service pod using the **volcano.sh/cache-limit** annotation and the pod to the specified cgroup. For example, the pod with the following configuration is added to the **rubik_low** cgroup: + + ```yaml + annotations: + volcano.sh/cache-limit: "low" + ``` + +> [!NOTE]Note +> +> Cache limits apply to offline services only. +> +> The manual annotation overrides the global one. If you set **defaultLimitMode** in the global Rubik configuration and specify the cache level in the YAML configuration of a pod, the actual dynCache limit is the one specified in the pod YAML configuration. + +### dynCache Kernel Interface + +- Rubik creates five cgroup directories in **/sys/fs/resctrl** and modifies the **schemata** and **tasks** files of each cgroup. + +### dynCache Configuration + +The dynCache function is configured as follows: + +```json +"agent": { + "enabledFeatures": [ + "dynCache" + ] +}, +"dynCache": { + "defaultLimitMode": "static", + "adjustInterval": 1000, + "perfDuration": 1000, + "l3Percent": { + "low": 20, + "mid": 30, + "high": 50 + }, + "memBandPercent": { + "low": 10, + "mid": 30, + "high": 50 + } +} +``` + +For details, see [Configuration Description](./configuration.md#dyncache) + +- **l3Percent** and **memBandPercent**: + **l3Percent** and **memBandPercent** are used to configure the watermarks of the low, mid, and high cgroups. + + Assume that in the current environment **rdt bitmask=fffff** and **numa=2**. Based on the **low** value of **l3Percent** (20) and the **low** value of **memBandPercent** (10), Rubik configures **/sys/fs/resctrl/rubik_low** as follows: + + ```text + L3:0=f;1=f + MB:0=10;1=10 + ``` + +- defaultLimitMode: + - If the **volcano.sh/cache-limit** annotation is not specified for an offline pod, the **defaultLimitMode** of **cacheConfig** determines the cgroup to which the pod is added. +- **adjustInterval**: + - Interval for dynCache to dynamically adjust the **rubik_dynamic** cgroup, in milliseconds. The default value is **1000**. +- **perfDuration**: + - perf execution duration for dynCache, in milliseconds. The default value is **1000**. + +### Precautions for dynCache + +- dynCache takes affect only for offline pods. +- If a service container is manually restarted during running (the container ID remains unchanged but the container process ID changes), dynCache does not take effect for the container. +- After a service container is started and the dynCache level is set, the limit level cannot be changed. +- The sensitivity of adjusting the dynamic cgroup is affected **adjustInterval** and **perfDuration** values in the Rubik configuration file and the number of online service pods on the node. If the impact detection result indicates that adjustment is required, the adjustment interval fluctuates within the range **\[adjustInterval + perfDuration, adjustInterval + perfDuration x Number of pods\]**. You can set the configuration items based on your required sensitivity. + +## dynMemory Tiered Memory Reclamation + +Rubik supports multiple memory strategies. You can apply different memory allocation methods to different scenarios. + +### fssr + +fssr is kernel cgroup-based dynamic watermark control. **memory.high** is a memcg-level watermark interface provided by the kernel. Rubik continuously detects memory usage and dynamically adjusts the **memory.high** limit of offline services to suppress the memory usage of offline services, ensuring the quality of online services. + +The core logic of fssr is as follows: + +- Rubik calculates the memory to reserve upon startup. The default value is the smaller of 10% of total memory or 10 GB. +- Rubik sets the cgroup-level watermark of the offline container. The kernel provides the **memory.high** and **memory.high_async_ratio** interfaces for configuring the soft upper limit and alarm watermark of the cgroup. By default, **memory.high** is 80% of the total memory (**total_memory**). +- Rubik obtains the free memory (**free_memory**). +- When **free_memory** is less than **reserved_memory**, Rubik decreases **memory.high** for the offline container. The amount decreased each time is 10% of **total_memory**. +- If **free_memory** is more than double the amount of **reserved_memory**, Rubik increases **memory.high**. The amount increased each time is 1% of **total_memory**. + +**Kernel interface** + +- memory.high + +### dynMemory Configuration + +The strategy and check interval of the dynMemory module can be specified in **dynMemory**: + +```json +"dynMemory": { + "policy": "fssr" +} +``` + +- **policy** indicates the dynMemory policy, which supports **fssr**. + +## Flexible Bandwidth + +To effectively solve the problem of QoS deterioration caused by the CPU bandwidth limit of a service, the Rubik provides flexible bandwidth to allow the container to use extra CPU resources, ensuring stable service running. The flexible bandwidth solution is implemented in both kernel mode and user mode. They cannot be used at the same time. + +The user-mode solution is implemented through the CFS bandwidth control capability provided by the Linux kernel. On the premise that the load watermark of the entire system is secure and stable and does not affect the running of other services, the dual-watermark mechanism allows service containers to adaptively adjust the CPU bandwidth limit, relieving CPU resource bottlenecks and improving service performance. + +The kernel-mode solution is implemented through the CPU burst capability provided by the Linux kernel, which allows containers to temporarily exceed its CPU usage limit. You need to manually configure the kernel-mode configuration by setting the burst value for each pod. Rubik does not automatically sets the values. + +### quotaTurbo User-Mode Solution + +You need manually set the **volcano.sh/quota-turbo="true"** annotation for the service pod that requires flexible CPU bandwidth. This annotation takes effect only for the pod whose CPU quota is limited, that is, **CPULimit** is specified in the YAML file. +The user-mode flexible bandwidth policy periodically adjusts the CPU quota of an allowlist container based on the CPU load of the entire system and container running status, and automatically checks and restores the quota values of all containers when Rubik is started or stopped. (The CPU quota described in this section refers to the **cpu.cfs_quota_us** parameter of the container.) The adjustment policies are as follows: + +1. When the CPU load of the entire system is lower than the alarm threshold, if the allowlist container is suppressed by the CPU in the current period, Rubik slowly increases the CPU quota of the container based on the suppression status. The total container quota increase in a single period cannot exceed 1% of the total CPU quota of the current node. +2. When the CPU load of the entire system is higher than the high watermark, if the allowlist container is not suppressed by the CPU in the current period, Rubik slowly increases the container quota based on the watermark. +3. When the CPU load of the entire system is higher than the alarm threshold, if the current quota value of the allowlist container exceeds the configured value, Rubik quickly decreases the CPU quotas of all containers to ensure that the load is lower than the alarm watermark. +4. The maximum CPU quota that a container can have cannot exceed twice the configured value (for example, the **CPULimit** parameter specified in the pod YAML file), and cannot be less than the configured value. +5. The overall CPU usage of the container within 60 synchronization periods cannot exceed the configured value. +6. If the overall CPU usage of a node exceeds 10% within 1 minute, the container quota will not be increased in this period. + +**Kernel interface** + +The interface exists in the cgroup of the container in the **/sys/fs/cgroup/cpu** directory, for example, **/sys/fs/cgroup/cpu,cpuacct/kubepods/burstable/\/\**. The following files are involved: + +- **cpu.cfs_quota_us** +- **cpu.cfs_period_us** +- **cpu.stat** + +#### quotaTurbo Configuration + +The quotaTurbo function is configured as follows: + +```json +"agent": { + "enabledFeatures": [ + "quotaTurbo" + ] + }, +"quotaTurbo": { + "highWaterMark": 60, + "alarmWaterMark": 80, + "syncInterval": 100 +} +``` + +For details, see [Configuration Description](./configuration.md#quotaturbo). + +- **highWaterMark** is the high watermark of CPU load. +- **alarmWaterMark** is the alarm watermark of CPU load. +- **syncInterval** is the interval for triggering container quota updates, in milliseconds. + +You need to manually specify the **volcano.sh/quota-turbo="true"** annotation for the service pod. + +```yaml +metadata: + annotations: + # true means to add the pod to the allowlist of quotaTurbo + volcano.sh/quota-turbo : "true" +``` + +### quotaBurst Kernel-Mode Solution + +quotaBurst can be enabled through the **cpu.cfs_burst_us** kernel interface. Rubik allows a container to accumulate CPU resources when the CPU usage of the container is lower than the quota and uses the accumulated CPU resources when the CPU usage exceeds the quota. + +**Kernel interface** + +The interface exists in the cgroup of the container in the **/sys/fs/cgroup/cpu** directory, for example, **/sys/fs/cgroup/cpu/kubepods/burstable/\/\**. The annotation value is written into the following file: + +- **cpu.cfs_burst_us** + +> [!NOTE]Note +> +> The kernel-mode solution is implemented through the **cpu.cfs_burst_us** interface. The **cpu.cfs_burst_us** file must exist in the CPU subsystem directory of the cgroup. The value of **cpu.cfs_burst_us** can be as follows: +> +> - When **cpu.cfs_quota_us** is not -1, the sum of **cfs_burst_us** and **cfs_quota_us** must not be greater than $2^{44}$-1, and **cfs_burst_us** is less than or equal to **cfs_quota_us**. +> - When **cpu.cfs_quota_us** is -1, the CPU burst function is not enabled, and **cfs_burst_us** is 0. + +#### quotaBurst Configuration + +The quotaBurst function is configured as follows: + +```json +"agent": { + "enabledFeatures": [ + "quotaBurst" + ] +} +``` + +You need to manually specify the **volcano.sh/quota-burst-time** annotation for the service pod or run `kubectl annotate` to dynamically add the annotation. + +- In the YAML file upon pod creation: + + ```yaml + metadata: + annotations: + # The default unit is microsecond. + volcano.sh/quota-burst-time : "2000" + ``` + +- Annotation modification: You can run the `kubectl annotate` command to dynamically modify annotation. For example: + + ```bash + kubectl annotate --overwrite pods volcano.sh/quota-burst-time='3000' + ``` + +### Constraints + +- The user-mode CPU bandwidth control is implemented through the **cpu.cfs_period_us** (CFS bandwidth control) and **cpu.cfs_quota_us** parameters. The following restrictions apply: + - To avoid unknown errors, other users are not allowed to modify CFS bandwidth control parameters (including but not limited to **cpu.cfs_quota_us** and **cpu.cfs_period_us**). + - Do not use this function together with similar programs that limit CPU resources. Otherwise, the user-mode function cannot be used properly. + - If you monitor the metrics related to CFS bandwidth control, using this feature may affect the consistency of the monitored metrics. +- The following restrictions apply to the kernel-mode solution: + - Use the Kubernetes interface to set the burst value of the pod. Do not manually modify the **cpu.cfs_burst_us** file in the CPU cgroup directory of the container. +- Do not enable both kernel-mode and user-mode flexible bandwidth solutions at the same time. + +## I/O Weight Control Based on ioCost + +To solve the problem that the QoS of online services deteriorates due to high I/O usage of offline services, Rubik provides the I/O weight control function based on ioCost of cgroup v1. +For more, see the [ioCost description](https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#io:~:text=correct%20memory%20ownership.-,IO,-%C2%B6). + +**Prerequisites** + +Rubik can control the I/O weight distribution of different pods through iocost of cgroup v1. Therefore, the kernel must support the following features: + +- cgroup v1 blkcg iocost +- cgroup v1 writeback + +The **blkio.cost.qos** and **blkio.cost.model** file interfaces exist in the **blkcg** root system file. For details about the implementation and interface description, see the openEuler kernel document. + +### ioCost Implementation Description + +![](./figures/iocost.PNG) + +The procedure of the Rubik implementation is as follows: + +- When Rubik is deployed, Rubik parses the configuration and sets iocost parameters. +- Rubik registers the detection event to the Kubernetes API server. +- When a pod is deployed, the pod configuration information is write back to Rubik. +- Rubik parses the pod configuration information and configures the pod iocost weight based on the QoS level. + +### ioCost Configuration + +```json +"agent": { + "enabledFeatures": [ + "ioCost" + ] +} +"ioCost": [{ + "nodeName": "k8s-single", + "config": [ + { + "dev": "sdb", + "enable": true, + "model": "linear", + "param": { + "rbps": 10000000, + "rseqiops": 10000000, + "rrandiops": 10000000, + "wbps": 10000000, + "wseqiops": 10000000, + "wrandiops": 10000000 + } + } + ] +}] +``` + +For details, see [Configuration Description](./configuration.md#iocost). + +> [!NOTE]Note +> +> Parameters related to the ioCost linear model can be obtained through [**iocost_coef_gen.py**](https://github.com/torvalds/linux/blob/master/tools/cgroup/iocost_coef_gen.py). + +## Interference Detection Based on Pressure Stall Information Metrics + +Rubik can observe the pressure stall information (PSI) metrics of online pods to determine the pressure, evicts offline pods, and generates log alarms. Rubik uses **some avg10** as the indicator, which indicates the average blocking time proportion of any task within 10s. You can choose to monitor the CPU, memory, and I/O resources as required and set thresholds. If the blocking time proportion exceeds the threshold, Rubik evicts offline pods based on certain policies to release corresponding resources. If the CPU and memory usage of an online pod is high, Rubik evicts the offline service that occupies the most CPU or memory resources. If the I/O of offline services is high, Rubik evicts the offline service that occupies the most CPU resources. + +The **volcano.sh/preemptable="true"/"false"** annotation distinguishes online and offline services. + +```yaml +annotations: + volcano.sh/preemptable: true +``` + +**Prerequisites** + +Rubik depends on the PSI feature of cgroup v1. openEuler 22.03 LTS and later versions support the PSI interface of cgroup v1. +You can run the following command to check whether the PSI interface is enabled in the kernel: + +```bash +cat /proc/cmdline | grep "psi=1 psi_v1=1" +``` + +If no results are returned, add the boot parameter to the kernel cmdline: + +```bash +# View the kernel version. +uname -a +# View the boot file of the kernel. +grubby --update-kernel="$(grubby --default-kernel)" --args="psi=1 psi_v1=1" +# Reboot. +reboot +``` + +**Kernel interface** + +The interface exists in the cgroup of the container in the **/sys/fs/cgroup/cpuacct** directory, for example, **/sys/fs/cgroup/cpu,cpuacct/kubepods/burstable/\/\**. The following items are involved: + +- **cpu.pressure** +- **memory.pressure** +- **io.pressure** + +### psi Configuration + +```json +"agent": { + "enabledFeatures": [ + "psi" + ] +} +"psi": { + "interval": 10, + "resource": [ + "cpu", + "memory", + "io" + ], + "avg10Threshold": 5.0 +} +``` + +For details, see [Configuration Description](./configuration.md#psi). + +## CPU Eviction Watermark Control + +Rubik enables the eviction of offline pods based on node CPU utilization to avoid CPU resource overload. You can set a CPU eviction watermark. Rubik monitors the average CPU utilization of the node over a defined window. If the average CPU utilization exceeds the watermark, Rubik evicts offline pods with high resource usage and shorter durations to free up resources. + +> [!NOTE]Note +> +> The `volcano.sh/preemptable="true"/"false"` annotation distinguishes online and offline services. +> +> ```yaml +> annotations: +> volcano.sh/preemptable: true +> ``` + +**Configuration details** + +```json +{ + "agent": { + "enabledFeatures": [ + "cpuevict" + ] + } + "cpuevict": { + "threshold": 60, + "interval": 1, + "windows": 2, + "cooldown": 20 + } +} +``` + +For more details, see [Configuration Documentation](./configuration.md#cpu-eviction-watermark-control). + +## Memory Eviction Watermark Control + +Rubik allows the eviction of offline pods based on node memory utilization to prevent memory resource overload. You can configure a memory eviction watermark. If the node memory utilization surpasses the watermark, Rubik evicts offline pods with high resource usage and shorter durations to release resources. + +> [!NOTE]Note +> +> The `volcano.sh/preemptable="true"/"false"` annotation distinguishes online and offline services. +> +> ```yaml +> annotations: +> volcano.sh/preemptable: true +> ``` + +**Configuration details** + +```json +{ + "agent": { + "enabledFeatures": [ + "memoryevict" + ] + } + "memoryevict": { + "threshold": 60, + "interval": 1, + "cooldown": 4 + } +} +``` + +For more details, see [Configuration Documentation](./configuration.md#memory-eviction-watermark-control). diff --git a/docs/en/docs/hybrid_deployment/rubik/figures/icon-note.gif b/docs/en/docs/hybrid_deployment/rubik/figures/icon-note.gif new file mode 100644 index 0000000000000000000000000000000000000000..6314297e45c1de184204098efd4814d6dc8b1cda Binary files /dev/null and b/docs/en/docs/hybrid_deployment/rubik/figures/icon-note.gif differ diff --git a/docs/en/docs/hybrid_deployment/rubik/installation_and_deployment.md b/docs/en/docs/hybrid_deployment/rubik/installation_and_deployment.md new file mode 100644 index 0000000000000000000000000000000000000000..78852f2a859d23b0925688d5c10578c0b81a5d41 --- /dev/null +++ b/docs/en/docs/hybrid_deployment/rubik/installation_and_deployment.md @@ -0,0 +1,156 @@ +# Installation and Deployment + +## Overview + +This chapter describes how to install and deploy the Rubik component, using openEUler 24.03 LTS SP1 as an example. + +## Software and Hardware Requirements + +### Hardware + +- Architecture: x86 or AArch64 +- Drive: 1 GB or more +- Memory: 100 MB or more + +### Software + +- OS: openEuler 24.03_LTS_SP1 +- Kernel: openEuler 24.03_LTS_SP1 kernel + +### Environment Preparation + +- Install the openEuler OS. +- Install and deploy Kubernetes. +- Install the Docker or containerd container engine. + +## Installing Rubik + +Rubik is deployed on each Kubernetes node as a DaemonSet. Therefore, you need to perform the following steps to install the Rubik RPM package on each node. + +1. The Rubik component is available in the EPOL repository. Configure the Yum repositories openEuler 24.03-LTS-SP1 and openEuler 24.03-LTS-SP1:EPOL. + + ```ini + # openEuler 24.03-LTS-SP1 official repository + name=openEuler24.03-LTS-SP1 + baseurl=https://repo.openeuler.org/openEuler-24.03-LTS-SP1/everything/$basearch/ + enabled=1 + gpgcheck=1 + gpgkey=https://repo.openeuler.org/openEuler-24.03-LTS-SP1/everything/$basearch/RPM-GPG-KEY-openEuler + ``` + + ```ini + # openEuler 24.03-LTS-SP1:EPOL official repository + name=openEuler24.03-LTS-SP1-Epol + baseurl=https://repo.openeuler.org/openEuler-24.03-LTS-SP1/EPOL/$basearch/ + enabled=1 + gpgcheck=1 + gpgkey=https://repo.openeuler.org/openEuler-24.03-LTS-SP1/everything/$basearch/RPM-GPG-KEY-openEuler + ``` + +2. Install Rubik with **root** permissions. + + ```shell + sudo yum install -y rubik + ``` + +> ![](./figures/icon-note.gif)**Note**: +> +> Files related to Rubik are installed in the **/var/lib/rubik** directory. + +## Deploying Rubik + +Rubik runs as a container in a Kubernetes cluster in hybrid deployment scenarios. It is used to isolate and restrict resources for services with different priorities to prevent offline services from interfering with online services, improving the overall resource utilization and ensuring the quality of online services. Currently, Rubik supports isolation and restriction of CPU and memory resources, and must be used together with the openEuler 24.03-LTS-SP1 kernel. To enable or disable the memory priority feature (that is, memory tiering for services with different priorities), you need to set the value in the **/proc/sys/vm/memcg_qos_enable** file. The value can be **0** or **1**. The default value **0** indicates that the feature is disabled, and the value **1** indicates that the feature is enabled. + +```bash +sudo echo 1 > /proc/sys/vm/memcg_qos_enable +``` + +### Deploying the Rubik DaemonSet + +1. Build the Rubik image: Use the **/var/lib/rubik/build_rubik_image.sh** script to automatically build the Rubik image or directly use Docker to build it. Since Rubik is deployed as a DaemonSet, the Rubik image must be available on every node. You can build the image on one node and then use the save/load functionality of Docker to load the image onto all nodes in the Kubernetes cluster. Alternatively, you can build the Rubik image on each node individually. For Docker, the build command is: + + ```sh + docker build -f /var/lib/rubik/Dockerfile -t rubik:2.0.1-2 . + ``` + +2. On the Kubernetes master node, update the Rubik image name in the **/var/lib/rubik/rubik-daemonset.yaml** file to match the image name created in the previous step. + + ```yaml + ... + containers: + - name: rubik-agent + image: rubik_image_name_and_tag # Ensure this matches the Rubik image name built earlier. + imagePullPolicy: IfNotPresent + ... + ``` + +3. On the Kubernetes master node, run the **kubectl** command to deploy the Rubik DaemonSet so that Rubik will be automatically deployed on all Kubernetes nodes. + + ```sh + kubectl apply -f /var/lib/rubik/rubik-daemonset.yaml + ``` + +4. Run the **kubectl get pods -A** command to check whether Rubik has been deployed on each node in the cluster. (The number of rubik-agents is the same as the number of nodes and all rubik-agents are in the Running status.) + +```sh +$ kubectl get pods -A +NAMESPACE NAME READY STATUS RESTARTS AGE +... +kube-system rubik-agent-76ft6 1/1 Running 0 4s +... +``` + +## Common Configuration Description + +The Rubik deployed using the preceding method is started with the default configurations. You can modify the Rubik configurations as required by modifying the **config.json** section in the **rubik-daemonset.yaml** file and then redeploy the Rubik DaemonSet. The following describes some common configurations. For other configurations, see [Rubik Configuration Description](./configuration.md). + +### Absolute Pod Preemption + +If absolute pod preemption is enabled, you only need to specify the priority using annotations in the YAML file when deploying the service pods. After being deployed successfully, Rubik automatically detects the creation and update of the pods on the current node, and sets the pod priorities based on the configured priorities. For pods that are already started or whose annotations are modified, Rubik automatically updates the pod priority configurations. + +```yaml +... + "agent": { + "enabledFeatures": [ + "preemption" + ] + }, + "preemption": { + "resource": [ + "cpu", + "memory" + ] + } +... +``` + +> [!NOTE]Note +> +> Priority configurations support only pods switching from online to offline. + +## Configuring Rubik for Online and Offline Services + +After Rubik is successfully deployed, you can modify the YAML file of a service to specify the service type based on the following configuration example. Then Rubik can configure the priority of the service after it is deployed to isolate resources. + +The following is an example of deploying an online Nginx service: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: nginx + namespace: qosexample + annotations: + volcano.sh/preemptable: "false" # If volcano.sh/preemptable is set to true, the service is an offline service. If it is set to false, the service is an online service. The default value is false. +spec: + containers: + - name: nginx + image: nginx + resources: + limits: + memory: "200Mi" + cpu: "1" + requests: + memory: "200Mi" + cpu: "1" +``` diff --git a/docs/en/docs/hybrid_deployment/rubik/overview.md b/docs/en/docs/hybrid_deployment/rubik/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..c3b63e8ce895fa4ebc829a9327660e9ce4ee4c61 --- /dev/null +++ b/docs/en/docs/hybrid_deployment/rubik/overview.md @@ -0,0 +1,27 @@ +# Rubik User Guide + +## Overview + +Low server resource utilization has always been a recognized challenge in the industry. With the development of cloud native technologies, hybrid deployment of online (high-priority) and offline (low-priority) services becomes an effective means to improve resource utilization. + +In hybrid service deployment scenarios, Rubik can properly schedule resources based on Quality if Service (QoS) levels to greatly improve resource utilization while ensuring the quality of online services. + +Rubik supports the following features: + +- [Absolute Preemption](./feature_introduction.md#absolute-preemption) + - [CPU Absolute Preemption](./feature_introduction.md#cpu-absolute-preemption) + - [Memory Absolute Preemption](./feature_introduction.md#memory-absolute-preemption) +- [dynCache Memory Bandwidth and L3 Cache Access Limit](./feature_introduction.md#dyncache-memory-bandwidth-and-l3-cache-access-limit) +- [dynMemory Tiered Memory Reclamation](./feature_introduction.md#dynmemory-tiered-memory-reclamation) +- [Flexible Bandwidth](./feature_introduction.md#flexible-bandwidth) + - [quotaTurbo User-Mode Solution](./feature_introduction.md#quotaburst-kernel-mode-solution) + - [quotaTurbo Configuration](./feature_introduction.md#quotaturbo-user-mode-solution) +- [I/O Weight Control Based on ioCost](./feature_introduction.md#io-weight-control-based-on-iocost) +- [Interference Detection Based on Pressure Stall Information Metrics](./feature_introduction.md#interference-detection-based-on-pressure-stall-information-metrics) +- [CPU Eviction Watermark Control](./feature_introduction.md#cpu-eviction-watermark-control) +- [Memory Eviction Watermark Control](./feature_introduction.md#memory-eviction-watermark-control) + +This document is intended for community developers, open source enthusiasts, and partners who use the openEuler system and want to learn and use Rubik. Users must: + +- Know basic Linux operations. +- Be familiar with basic operations of Kubernetes and Docker/iSulad. diff --git a/docs/en/docs/image_builder/.DS_Store b/docs/en/docs/image_builder/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/docs/en/docs/image_builder/.DS_Store differ diff --git a/docs/en/docs/image_builder/isula_build/_toc.yaml b/docs/en/docs/image_builder/isula_build/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61e47093afd87648bb6c5e556d46e38aa804db8c --- /dev/null +++ b/docs/en/docs/image_builder/isula_build/_toc.yaml @@ -0,0 +1,10 @@ +label: Container Image Building +isManual: true +description: Quick contaienr image creation using Dockerfiles +sections: + - label: Overview + href: ./overview.md + - label: User Guide + href: ./isula_build.md + - label: Appendix + href: ./isula_build_appendix.md diff --git a/docs/en/docs/image_builder/isula_build/figures/isula-build_arch.png b/docs/en/docs/image_builder/isula_build/figures/isula-build_arch.png new file mode 100644 index 0000000000000000000000000000000000000000..f92f15085820ce824bc2ca60ff7d6d25e95f1402 Binary files /dev/null and b/docs/en/docs/image_builder/isula_build/figures/isula-build_arch.png differ diff --git a/docs/en/docs/image_builder/isula_build/isula_build.md b/docs/en/docs/image_builder/isula_build/isula_build.md new file mode 100644 index 0000000000000000000000000000000000000000..0090f67233c848837d96387af15a739c8fe53986 --- /dev/null +++ b/docs/en/docs/image_builder/isula_build/isula_build.md @@ -0,0 +1,938 @@ +# Installation + +## Preparations + +To ensure that isula-build can be successfully installed, the following software and hardware requirements must be met: + +- Supported architectures: x86_64 and AArch64 +- Supported OS: openEuler +- You have the permissions of the root user. + +### Installing isula-build + +Before using isula-build to build a container image, you need to install the following software packages: + +#### (Recommended) Method 1: Using Yum + +1. Configure the openEuler Yum source. + +2. Log in to the target server as the root user and install isula-build. + + ```shell + sudo yum install -y isula-build + ``` + +#### Method 2: Using the RPM Package + +1. Obtain an **isula-build-*.rpm** installation package from the openEuler Yum source, for example, **isula-build-0.9.6-4.oe1.x86_64.rpm**. + +2. Upload the obtained RPM software package to any directory on the target server, for example, **/home/**. + +3. Log in to the target server as the root user and run the following command to install isula-build: + + ```shell + sudo rpm -ivh /home/isula-build-*.rpm + ``` + +> [!NOTE]Note +> +> After the installation is complete, you need to manually start the isula-build service. For details about how to start the service, see [Managing the isula-build Service](#managing-the-isula-build-service). + +# Configuring and Managing the isula-build Service + +## Configuring the isula-build Service + +After the isula-build software package is installed, the systemd starts the isula-build service based on the default configuration contained in the isula-build software package on the isula-build server. If the default configuration file on the isula-build server cannot meet your requirements, perform the following operations to customize the configuration file: After the default configuration is modified, restart the isula-build server for the new configuration to take effect. For details, see [Managing the isula-build Service](#managing-the-isula-build-service). + +Currently, the isula-build server contains the following configuration file: + +- **/etc/isula-build/configuration.toml**: general isula-builder configuration file, which is used to set the isula-builder log level, persistency directory, runtime directory, and OCI runtime. Parameters in the configuration file are described as follows: + +| Configuration Item | Mandatory or Optional | Description | Value | +| --------- | -------- | --------------------------------- | ----------------------------------------------- | +| debug | Optional | Indicates whether to enable the debug log function. | **true**: Enables the debug log function. **false**: Disables the debug log function. | +| loglevel | Optional | Sets the log level. | debug
info
warn
error | +| run_root | Mandatory | Sets the root directory of runtime data. | For example, **/var/run/isula-build/** | +| data_root | Mandatory | Sets the local persistency directory. | For example, **/var/lib/isula-build/** | +| runtime | Optional | Sets the runtime type. Currently, only **runc** is supported. | runc | +| group | Optional | Sets the owner group for the local socket file **isula_build.sock** so that non-privileged users in the group can use isula-build. | isula | +| experimental | Optional | Indicates whether to enable experimental features. | **true**: Enables experimental features. **false**: Disables experimental features. | + +- **/etc/isula-build/storage.toml**: configuration file for local persistent storage, including the configuration of the storage driver in use. + +| Configuration Item | Mandatory or Optional | Description | +| ------ | -------- | ------------------------------ | +| driver | Optional | Storage driver type. Currently, **overlay2** is supported. | + + For more settings, see [containers-storage.conf.5](https://github.com/containers/storage/blob/main/docs/containers-storage.conf.5.md). + +- **/etc/isula-build/registries.toml**: configuration file for each image repository. + +| Configuration Item | Mandatory or Optional | Description | +| ------------------- | -------- | ------------------------------------------------------------ | +| registries.search | Optional | Search domain of the image repository. Only listed image repositories can be found. | +| registries.insecure | Optional | Accessible insecure image repositories. Listed image repositories cannot pass the authentication and are not recommended. | + + For more settings, see [containers-registries.conf.5](https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md). + +- **/etc/isula-build/policy.json**: image pull/push policy file. Currently, this file cannot be configured. + +> [!NOTE]Note +> +> - isula-build supports the preceding configuration file with the maximum size of 1 MB. +> - The persistent working directory dataroot cannot be configured on the memory disk, for example, tmpfs. +> - Currently, only overlay2 can be used as the underlying storage driver. +> - Before setting the `--group` option, ensure that the corresponding user group has been created on a local OS and non-privileged users have been added to the group. After isula-builder is restarted, non-privileged users in the group can use the isula-build function. In addition, to ensure permission consistency, the owner group of the isula-build configuration file directory **/etc/isula-build** is set to the group specified by `--group`. + +## Managing the isula-build Service + +Currently, openEuler uses systemd to manage the isula-build service. The isula-build software package contains the systemd service files. After installing the isula-build software package, you can use the systemd tool to start or stop the isula-build service. You can also manually start the isula-builder software. Note that only one isula-builder process can be started on a node at a time. + +> [!NOTE]Note +> +> Only one isula-builder process can be started on a node at a time. + +### (Recommended) Using systemd for Management + +You can run the following systemd commands to start, stop, and restart the isula-build service: + +- Run the following command to start the isula-build service: + + ```sh + sudo systemctl start isula-build.service + ``` + +- Run the following command to stop the isula-build service: + + ```sh + sudo systemctl stop isula-build.service + ``` + +- Run the following command to restart the isula-build service: + + ```sh + sudo systemctl restart isula-build.service + ``` + +The systemd service file of the isula-build software installation package is stored in the `/usr/lib/systemd/system/isula-build.service` directory. If you need to modify the systemd configuration of the isula-build service, modify the file and run the following command to make the modification take effect. Then restart the isula-build service based on the systemd management command. + +```sh +sudo systemctl daemon-reload +``` + +### Directly Running isula-builder + +You can also run the `isula-builder` command on the server to start the service. The `isula-builder` command can contain flags for service startup. The following flags are supported: + +- `-D, --debug`: whether to enable the debugging mode. +- `--log-level`: log level. The options are **debug**, **info**, **warn**, and **error**. The default value is **info**. +- `--dataroot`: local persistency directory. The default value is **/var/lib/isula-build/**. +- `--runroot`: runtime directory. The default value is **/var/run/isula-build/**. +- `--storage-driver`: underlying storage driver type. +- `--storage-opt`: underlying storage driver configuration. +- `--group`: sets the owner group for the local socket file **isula_build.sock** so that non-privileged users in the group can use isula-build. The default owner group is **isula**. +- `--experimental`: whether to enable experimental features. + +> [!NOTE]Note +> +> If the command line parameters contain the same configuration items as those in the configuration file, the command line parameters are preferentially used for startup. + +Start the isula-build service. For example, to specify the local persistency directory **/var/lib/isula-build** and disable debugging, run the following command: + +```sh +sudo isula-builder --dataroot "/var/lib/isula-build" --debug=false +``` + +# Usage Guidelines + +## Prerequisites + +isula-build depends on the executable file **runc** to build the **RUN** instruction in the Dockerfile. Therefore, runc must be pre-installed in the running environment of isula-build. The installation method depends on the application scenario. If you do not need to use the complete docker-engine tool chain, you can install only the docker-runc RPM package. + +```sh +sudo yum install -y docker-runc +``` + +If you need to use a complete docker-engine tool chain, install the docker-engine RPM package, which contains the executable file **runc** by default. + +```sh +sudo yum install -y docker-engine +``` + +> [!NOTE]Note +> +> Ensure the security of OCI runtime (runc) executable files to prevent malicious replacement. + +## Overview + +The isula-build client provides a series of commands for building and managing container images. Currently, the isula-build client provides the following commands: + +- `ctr-img`: manages container images. The `ctr-img` command contains the following subcommands: + - `build`: builds a container image based on the specified Dockerfile. + - `images`: lists local container images. + - `import`: imports a basic container image. + - `load`: imports a cascade image. + - `rm`: deletes a local container image. + - `save`: exports a cascade image to a local disk. + - `tag`: adds a tag to a local container image. + - `pull`: pulls an image to a local host. + - `push`: pushes a local image to a remote repository. +- `info`: displays the running environment and system information of isula-build. +- `login`: logs in to the remote container image repository. +- `logout`: logs out of the remote container image repository. +- `version`: displays the versions of isula-build and isula-builder. +- `manifest` (experimental): manages the manifest list. + +> [!NOTE]Note +> +> - The `isula-build completion` and `isula-builder completion` commands are used to generate the bash command completion script. These commands are implicitly provided by the command line framework and is not displayed in the help information. +> - isula-build client does not have any configuration file. To use isula-build experimental features, enable the environment variable **ISULABUILD_CLI_EXPERIMENTAL** on the client using the `export ISULABUILD_CLI_EXPERIMENTAL=enabled` command. + +The following describes how to use these commands in detail. + +## ctr-img: Container Image Management + +The isula-build command groups all container image management commands into the `ctr-img` command. The command format is as follows: + +```shell +isula-build ctr-img [command] +``` + +### build: Container Image Build + +The subcommand build of the `ctr-img` command is used to build container images. The command format is as follows: + +```shell +isula-build ctr-img build [flags] +``` + +The `build` command contains the following flags: + +- `--build-arg`: string list containing variables required during the build process. +- `--build-static`: key value, which is used to build binary equivalence. Currently, the following key values are included: + `- build-time`: string indicating that a container image is built at a specified timestamp. The timestamp format is *YYYY-MM-DD HH-MM-SS*. +- `-f, --filename`: string indicating the path of the Dockerfiles. If this parameter is not specified, the current path is used. +- `--format`: string indicating the image format **oci** or **docker** (**ISULABUILD_CLI_EXPERIMENTAL** needs to be enabled). +- `--iidfile`: string indicating a local file to which the ID of the image is output. +- `-o, --output`: string indicating the image export mode and path. +- `--proxy`: boolean, which inherits the proxy environment variable on the host. The default value is **true**. +- `--tag`: string indicating the tag value of the image that is successfully built. +- `--cap-add`: string list containing permissions required by the **RUN** instruction during the build process. + +**The following describes the flags in detail.** + +#### \--build-arg + +Parameters in the Dockerfile are inherited from the commands. The usage is as follows: + +```sh +$ echo "This is bar file" > bar.txt +$ cat Dockerfile_arg +FROM busybox +ARG foo +ADD ${foo}.txt . +RUN cat ${foo}.txt +$ sudo isula-build ctr-img build --build-arg foo=bar -f Dockerfile_arg +STEP 1: FROM busybox +Getting image source signatures +Copying blob sha256:8f52abd3da461b2c0c11fda7a1b53413f1a92320eb96525ddf92c0b5cde781ad +Copying config sha256:e4db68de4ff27c2adfea0c54bbb73a61a42f5b667c326de4d7d5b19ab71c6a3b +Writing manifest to image destination +Storing signatures +STEP 2: ARG foo +STEP 3: ADD ${foo}.txt . +STEP 4: RUN cat ${foo}.txt +This is bar file +Getting image source signatures +Copying blob sha256:6194458b07fcf01f1483d96cd6c34302ffff7f382bb151a6d023c4e80ba3050a +Copying blob sha256:6bb56e4a46f563b20542171b998cb4556af4745efc9516820eabee7a08b7b869 +Copying config sha256:39b62a3342eed40b41a1bcd9cd455d77466550dfa0f0109af7a708c3e895f9a2 +Writing manifest to image destination +Storing signatures +Build success with image id: 39b62a3342eed40b41a1bcd9cd455d77466550dfa0f0109af7a708c3e895f9a2 +``` + +#### \--build-static + +Specifies a static build. That is, when isula-build is used to build a container image, differences between all timestamps and other build factors (such as the container ID and hostname) are eliminated. Finally, a container image that meets the static requirements is built. + +When isula-build is used to build a container image, assume that a fixed timestamp is given to the build subcommand and the following conditions are met: + +- The build environment is consistent before and after the upgrade. +- The Dockerfile is consistent before and after the build. +- The intermediate data generated before and after the build is consistent. +- The build commands are the same. +- The versions of the third-party libraries are the same. + +For container image build, isula-build supports the same Dockerfile. If the build environments are the same, the image content and image ID generated in multiple builds are the same. + +`--build-static` supports the key-value pair option in the *key=value* format. Currently, the following options are supported: + +- build-time: string, which indicates the fixed timestamp for creating a static image. The value is in the format of *YYYY-MM-DD HH-MM-SS*. The timestamp affects the attribute of the file for creating and modifying the time at the diff layer. + + Example: + + ```sh + sudo isula-build ctr-img build -f Dockerfile --build-static='build-time=2020-05-23 10:55:33' . + ``` + + In this way, the container images and image IDs built in the same environment for multiple times are the same. + +#### \--format + +This option can be used when the experiment feature is enabled. The default image format is **oci**. You can specify the image format to build. For example, the following commands are used to build an OCI image and a Docker image, respectively. + + ```sh + export ISULABUILD_CLI_EXPERIMENTAL=enabled; sudo isula-build ctr-img build -f Dockerfile --format oci . + ``` + + ```sh + export ISULABUILD_CLI_EXPERIMENTAL=enabled; sudo isula-build ctr-img build -f Dockerfile --format docker . + ``` + +#### \--iidfile + +Run the following command to output the ID of the built image to a file: + +```shell +isula-build ctr-img build --iidfile filename +``` + +For example, to export the container image ID to the **testfile** file, run the following command: + + ```sh +sudo isula-build ctr-img build -f Dockerfile_arg --iidfile testfile + ``` + + Check the container image ID in the **testfile** file. + + ```sh +$ cat testfile +76cbeed38a8e716e22b68988a76410eaf83327963c3b29ff648296d5cd15ce7b + ``` + +#### \-o, --output + +Currently, `-o` and `--output` support the following formats: + +- `isulad:image:tag`: directly pushes the image that is successfully built to iSulad, for example, `-o isulad:busybox:latest`. The following restrictions apply: + + - isula-build and iSulad must be on the same node. + - The tag must be configured. + - On the isula-build client, you need to temporarily save the successfully built image as **/var/tmp/isula-build-tmp-%v.tar** and then import it to iSulad. Ensure that the **/var/tmp/** directory has sufficient disk space. + +- `docker-daemon:image:tag`: directly pushes the successfully built image to Docker daemon, for example, `-o docker-daemon:busybox:latest`. The following restrictions apply: +- isula-build and Docker must be on the same node. + - The tag must be configured. + +- `docker://registry.example.com/repository:tag`: directly pushes the successfully built image to the remote image repository in Docker image format, for example, `-o docker://localhost:5000/library/busybox:latest`. + +- `docker-archive:/:image:tag`: saves the successfully built image to the local host in Docker image format, for example, `-o docker-archive:/root/image.tar:busybox:latest`. + +When experiment feature is enabled, you can build image in OCI image format with: + +- `oci://registry.example.com/repository:tag`: directly pushes the successfully built image to the remote image repository in OCI image format(OCI image format should be supported by the remote repository), for example, `-o oci://localhost:5000/library/busybox:latest`. + +- `oci-archive:/:image:tag`: saves the successfully built image to the local host in OCI image format, for example, `-o oci-archive:/root/image.tar:busybox:latest`. + +In addition to the flags, the `build` subcommand also supports an argument whose type is string and meaning is context, that is, the context of the Dockerfile build environment. The default value of this parameter is the current path where isula-build is executed. This path affects the path retrieved by the **ADD** and **COPY** instructions of the .dockerignore file and Dockerfile. + +#### \--proxy + +Specifies whether the container started by the **RUN** instruction inherits the proxy-related environment variables **http_proxy**, **https_proxy**, **ftp_proxy**, **no_proxy**, **HTTP_PROXY**, **HTTPS_PROXY**, and **FTP_PROXY**. The default value is **true**. + +When a user configures proxy-related **ARG** or **ENV** in the Dockerfile, the inherited environment variables will be overwritten. + +> [!NOTE]Note +> +> If the client and daemon are running on different terminals, the environment variables of the terminal where the daemon is running are inherited. + +#### \--tag + +Specifies the tag of the image stored on the local disk after the image is successfully built. + +#### \--cap-add + +Run the following command to add the permission required by the **RUN** instruction during the build process: + +```shell +isula-build ctr-img build --cap-add ${CAP} +``` + +Example: + +```sh +sudo isula-build ctr-img build --cap-add CAP_SYS_ADMIN --cap-add CAP_SYS_PTRACE -f Dockerfile +``` + +> **Note:** +> +> - A maximum of 100 container images can be concurrently built. +> - isula-build supports Dockerfiles with a maximum size of 1 MB. +> - isula-build supports a .dockerignore file with a maximum size of 1 MB. +> - Ensure that only the current user has the read and write permissions on the Dockerfiles to prevent other users from tampering with the files. +> - During the build, the **RUN** instruction starts the container to build in the container. Currently, isula-build supports the host network only. +> - isula-build only supports the tar compression format. +> - isula-build commits once after each image build stage is complete, instead of each time a Dockerfile line is executed. +> - isula-build does not support cache build. +> - isula-build starts the build container only when the **RUN** instruction is built. +> - Currently, the history function of Docker images is not supported. +> - The stage name can start with a digit. +> - The stage name can contain a maximum of 64 characters. +> - isula-build does not support resource restriction on a single Dockerfile build. If resource restriction is required, you can configure a resource limit on isula-builder. +> - Currently, isula-build does not support a remote URL as the data source of the **ADD** instruction in the Dockerfile. +> - The local tar package exported using the **docker-archive** and **oci-archive** types are not compressed, you can manually compress the file as required. + +### image: Viewing Local Persistent Build Images + +You can run the `images` command to view the images in the local persistent storage. + +```sh +$ sudo isula-build ctr-img images +--------------------------------------- ----------- ----------------- ------------------------ ------------ +REPOSITORY TAG IMAGE ID CREATED SIZE +--------------------------------------- ----------- ----------------- ------------------------ ------------ +localhost:5000/library/alpine latest a24bb4013296 2022-01-17 10:02:19 5.85 MB + 39b62a3342ee 2022-01-17 10:01:12 1.45 MB +--------------------------------------- ----------- ----------------- ------------------------ ------------ +``` + +> [!NOTE]Note +> +> The image size displayed by running the `isula-build ctr-img images` command may be different from that displayed by running the `docker images` command. When calculating the image size, `isula-build` directly calculates the total size of .tar packages at each layer, while `docker` calculates the total size of files by decompressing the .tar packages and traversing the diff directory. Therefore, the statistics are different. + +### import: Importing a Basic Container Image + +A tar file in rootfs form can be imported into isula-build via the `ctr-img import` command. + +The command format is as follows: + +```shell +isula-build ctr-img import [flags] +``` + +Example: + +```sh +$ sudo isula-build ctr-img import busybox.tar mybusybox:latest +Getting image source signatures +Copying blob sha256:7b8667757578df68ec57bfc9fb7754801ec87df7de389a24a26a7bf2ebc04d8d +Copying config sha256:173b3cf612f8e1dc34e78772fcf190559533a3b04743287a32d549e3c7d1c1d1 +Writing manifest to image destination +Storing signatures +Import success with image id: "173b3cf612f8e1dc34e78772fcf190559533a3b04743287a32d549e3c7d1c1d1" +$ sudo isula-build ctr-img images +--------------------------------------- ----------- ----------------- ------------------------ ------------ +REPOSITORY TAG IMAGE ID CREATED SIZE +--------------------------------------- ----------- ----------------- ------------------------ ------------ +mybusybox latest 173b3cf612f8 2022-01-12 16:02:31 1.47 MB +--------------------------------------- ----------- ----------------- ------------------------ ------------ +``` + +> [!NOTE]Note +> +> isula-build supports the import of container basic images with a maximum size of 1 GB. + +### load: Importing Cascade Images + +Cascade images are images that are saved to the local computer by running the `docker save` or `isula-build ctr-img save` command. The compressed image package contains a layer-by-layer image package named **layer.tar**. You can run the `ctr-img load` command to import the image to isula-build. + +The command format is as follows: + +```shell +isula-build ctr-img load [flags] +``` + +Currently, the following flags are supported: + +- `-i, --input`: path of the local .tar package. + +Example: + +```sh +$ sudo isula-build ctr-img load -i ubuntu.tar +Getting image source signatures +Copying blob sha256:cf612f747e0fbcc1674f88712b7bc1cd8b91cf0be8f9e9771235169f139d507c +Copying blob sha256:f934e33a54a60630267df295a5c232ceb15b2938ebb0476364192b1537449093 +Copying blob sha256:943edb549a8300092a714190dfe633341c0ffb483784c4fdfe884b9019f6a0b4 +Copying blob sha256:e7ebc6e16708285bee3917ae12bf8d172ee0d7684a7830751ab9a1c070e7a125 +Copying blob sha256:bf6751561805be7d07d66f6acb2a33e99cf0cc0a20f5fd5d94a3c7f8ae55c2a1 +Copying blob sha256:c1bd37d01c89de343d68867518b1155cb297d8e03942066ecb44ae8f46b608a3 +Copying blob sha256:a84e57b779297b72428fc7308e63d13b4df99140f78565be92fc9dbe03fc6e69 +Copying blob sha256:14dd68f4c7e23d6a2363c2320747ab88986dfd43ba0489d139eeac3ac75323b2 +Copying blob sha256:a2092d776649ea2301f60265f378a02405539a2a68093b2612792cc65d00d161 +Copying blob sha256:879119e879f682c04d0784c9ae7bc6f421e206b95d20b32ce1cb8a49bfdef202 +Copying blob sha256:e615448af51b848ecec00caeaffd1e30e8bf5cffd464747d159f80e346b7a150 +Copying blob sha256:f610bd1e9ac6aa9326d61713d552eeefef47d2bd49fc16140aa9bf3db38c30a4 +Copying blob sha256:bfe0a1336d031bf5ff3ce381e354be7b2bf310574cc0cd1949ad94dda020cd27 +Copying blob sha256:f0f15db85788c1260c6aa8ad225823f45c89700781c4c793361ac5fa58d204c7 +Copying config sha256:c07ddb44daa97e9e8d2d68316b296cc9343ab5f3d2babc5e6e03b80cd580478e +Writing manifest to image destination +Storing signatures +Loaded image as c07ddb44daa97e9e8d2d68316b296cc9343ab5f3d2babc5e6e03b80cd580478e +``` + +> [!NOTE]Note +> +> - isula-build allows you to import a container image with a maximum size of 50 GB. +> - isula-build automatically recognizes the image format and loads it from the cascade image file. + +### rm: Deleting a Local Persistent Image + +You can run the `rm` command to delete an image from the local persistent storage. The command format is as follows: + +```shell +isula-build ctr-img rm IMAGE [IMAGE...] [FLAGS] +``` + +Currently, the following flags are supported: + +- `-a, --all`: deletes all images stored locally. +- `-p, --prune`: deletes all images that are stored locally and do not have tags. + +Example: + +```sh +$ sudo isula-build ctr-img rm -p +Deleted: sha256:78731c1dde25361f539555edaf8f0b24132085b7cab6ecb90de63d72fa00c01d +Deleted: sha256:eeba1bfe9fca569a894d525ed291bdaef389d28a88c288914c1a9db7261ad12c +``` + +### save: Exporting Cascade Images + +You can run the `save` command to export the cascade images to the local disk. The command format is as follows: + +```shell +isula-build ctr-img save [REPOSITORY:TAG]|imageID -o xx.tar +``` + +Currently, the following flags are supported: + +- `-f, --format`: which indicates the exported image format: **oci** or **docker** (**ISULABUILD_CLI_EXPERIMENTAL** needs to be enabled) +- `-o, --output`: which indicates the local path for storing the exported images. + +The following example shows how to export an image using *image/tag*: + +```sh +$ sudo isula-build ctr-img save busybox:latest -o busybox.tar +Getting image source signatures +Copying blob sha256:50644c29ef5a27c9a40c393a73ece2479de78325cae7d762ef3cdc19bf42dd0a +Copying blob sha256:824082a6864774d5527bda0d3c7ebd5ddc349daadf2aa8f5f305b7a2e439806f +Copying blob sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef +Copying config sha256:21c3e96ac411242a0e876af269c0cbe9d071626bdfb7cc79bfa2ddb9f7a82db6 +Writing manifest to image destination +Storing signatures +Save success with image: busybox:latest +``` + +The following example shows how to export an image using *ImageID*: + +```sh +$ sudo isula-build ctr-img save 21c3e96ac411 -o busybox.tar +Getting image source signatures +Copying blob sha256:50644c29ef5a27c9a40c393a73ece2479de78325cae7d762ef3cdc19bf42dd0a +Copying blob sha256:824082a6864774d5527bda0d3c7ebd5ddc349daadf2aa8f5f305b7a2e439806f +Copying blob sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef +Copying config sha256:21c3e96ac411242a0e876af269c0cbe9d071626bdfb7cc79bfa2ddb9f7a82db6 +Writing manifest to image destination +Storing signatures +Save success with image: 21c3e96ac411 +``` + +The following example shows how to export multiple images to the same tarball: + +```sh +$ sudo isula-build ctr-img save busybox:latest nginx:latest -o all.tar +Getting image source signatures +Copying blob sha256:eb78099fbf7fdc70c65f286f4edc6659fcda510b3d1cfe1caa6452cc671427bf +Copying blob sha256:29f11c413898c5aad8ed89ad5446e89e439e8cfa217cbb404ef2dbd6e1e8d6a5 +Copying blob sha256:af5bd3938f60ece203cd76358d8bde91968e56491daf3030f6415f103de26820 +Copying config sha256:b8efb18f159bd948486f18bd8940b56fd2298b438229f5bd2bcf4cedcf037448 +Writing manifest to image destination +Storing signatures +Getting image source signatures +Copying blob sha256:e2d6930974a28887b15367769d9666116027c411b7e6c4025f7c850df1e45038 +Copying config sha256:a33de3c85292c9e65681c2e19b8298d12087749b71a504a23c576090891eedd6 +Writing manifest to image destination +Storing signatures +Save success with image: [busybox:latest nginx:latest] +``` + +> [!NOTE]NOTE +> +> - Save exports an image in .tar format by default. If necessary, you can save the image and then manually compress it. +> - When exporting an image using image name, specify the entire image name in the *REPOSITORY:TAG* format. + +### tag: Tagging Local Persistent Images + +You can run the `tag` command to add a tag to a local persistent container image. The command format is as follows: + +```shell +isula-build ctr-img tag / busybox:latest +``` + +Example: + +```sh +$ sudo isula-build ctr-img images +--------------------------------------- ----------- ----------------- -------------------------- ------------ +REPOSITORY TAG IMAGE ID CREATED SIZE +--------------------------------------- ----------- ----------------- -------------------------- ------------ +alpine latest a24bb4013296 2020-05-29 21:19:46 5.85 MB +--------------------------------------- ----------- ----------------- -------------------------- ------------ +$ sudo isula-build ctr-img tag a24bb4013296 alpine:v1 +$ sudo isula-build ctr-img images +--------------------------------------- ----------- ----------------- ------------------------ ------------ +REPOSITORY TAG IMAGE ID CREATED SIZE +--------------------------------------- ----------- ----------------- ------------------------ ------------ +alpine latest a24bb4013296 2020-05-29 21:19:46 5.85 MB +alpine v1 a24bb4013296 2020-05-29 21:19:46 5.85 MB +--------------------------------------- ----------- ----------------- ------------------------ ------------ +``` + +### pull: Pulling an Image To a Local Host + +Run the `pull` command to pull an image from a remote image repository to a local host. Command format: + +```shell +isula-build ctr-img pull REPOSITORY[:TAG] +``` + +Example: + +```sh +$ sudo isula-build ctr-img pull example-registry/library/alpine:latest +Getting image source signatures +Copying blob sha256:8f52abd3da461b2c0c11fda7a1b53413f1a92320eb96525ddf92c0b5cde781ad +Copying config sha256:e4db68de4ff27c2adfea0c54bbb73a61a42f5b667c326de4d7d5b19ab71c6a3b +Writing manifest to image destination +Storing signatures +Pull success with image: example-registry/library/alpine:latest +``` + +### push: Pushing a Local Image to a Remote Repository + +Run the `push` command to push a local image to a remote repository. Command format: + +```shell +isula-build ctr-img push REPOSITORY[:TAG] +``` + +Currently, the following flags are supported: + +- `-f, --format`: indicates the pushed image format **oci** or **docker** (**ISULABUILD_CLI_EXPERIMENTAL** needs to be enabled) + +Example: + +```sh +$ sudo isula-build ctr-img push example-registry/library/mybusybox:latest +Getting image source signatures +Copying blob sha256:d2421964bad195c959ba147ad21626ccddc73a4f2638664ad1c07bd9df48a675 +Copying config sha256:f0b02e9d092d905d0d87a8455a1ae3e9bb47b4aa3dc125125ca5cd10d6441c9f +Writing manifest to image destination +Storing signatures +Push success with image: example-registry/library/mybusybox:latest +``` + +> [!NOTE]NOTE +> +> - Before pushing an image, log in to the corresponding image repository. + +## info: Viewing the Operating Environment and System Information + +You can run the `isula-build info` command to view the running environment and system information of isula-build. The command format is as follows: + +```shell + isula-build info [flags] +``` + +The following flags are supported: + +- `-H, --human-readable`: Boolean. The memory information is printed in the common memory format. The value is 1000 power. +- `-V, --verbose`: Boolean. The memory usage is displayed during system running. + +Example: + +```sh +$ sudo isula-build info -H + General: + MemTotal: 7.63 GB + MemFree: 757 MB + SwapTotal: 8.3 GB + SwapFree: 8.25 GB + OCI Runtime: runc + DataRoot: /var/lib/isula-build/ + RunRoot: /var/run/isula-build/ + Builders: 0 + Goroutines: 12 + Store: + Storage Driver: overlay + Backing Filesystem: extfs + Registry: + Search Registries: + oepkgs.net + Insecure Registries: + localhost:5000 + oepkgs.net + Runtime: + MemSys: 68.4 MB + HeapSys: 63.3 MB + HeapAlloc: 7.41 MB + MemHeapInUse: 8.98 MB + MemHeapIdle: 54.4 MB + MemHeapReleased: 52.1 MB +``` + +## login: Logging In to the Remote Image Repository + +You can run the `login` command to log in to the remote image repository. The command format is as follows: + +```shell + isula-build login SERVER [FLAGS] +``` + +Currently, the following flags are supported: + +```shell + Flags: + -p, --password-stdin Read password from stdin + -u, --username string Username to access registry +``` + +Enter the password through stdin. In the following example, the password in creds.txt is transferred to the stdin of isula-build through a pipe for input. + +```sh + $ cat creds.txt | sudo isula-build login -u cooper -p mydockerhub.io + Login Succeeded +``` + +Enter the password in interactive mode. + +```sh + $ sudo isula-build login mydockerhub.io -u cooper + Password: + Login Succeeded +``` + +## logout: Logging Out of the Remote Image Repository + +You can run the `logout` command to log out of the remote image repository. The command format is as follows: + +```shell + isula-build logout [SERVER] [FLAGS] +``` + +Currently, the following flags are supported: + +```shell + Flags: + -a, --all Logout all registries +``` + +Example: + +```sh + $ sudo isula-build logout -a + Removed authentications +``` + +## version: Querying the isula-build Version + +You can run the `version` command to view the current version information. + +```sh +$ sudo isula-build version +Client: + Version: 0.9.6-4 + Go Version: go1.15.7 + Git Commit: 83274e0 + Built: Wed Jan 12 15:32:55 2022 + OS/Arch: linux/amd64 + +Server: + Version: 0.9.6-4 + Go Version: go1.15.7 + Git Commit: 83274e0 + Built: Wed Jan 12 15:32:55 2022 + OS/Arch: linux/amd64 +``` + +## manifest: Manifest List Management + +The manifest list contains the image information corresponding to different system architectures. You can use the same manifest (for example, **openeuler:latest**) in different architectures to obtain the image of the corresponding architecture. The manifest contains the create, annotate, inspect, and push subcommands. + +> [!NOTE]NOTE +> +> manifest is an experiment feature. When using this feature, you need to enable the experiment options on the client and server. For details, see Client Overview and Configuring Services. + +### create: Manifest List Creation + +The create subcommand of the `manifest` command is used to create a manifest list. The command format is as follows: + +```shell +isula-build manifest create MANIFEST_LIST MANIFEST [MANIFEST...] +``` + +You can specify the name of the manifest list and the remote images to be added to the list. If no remote image is specified, an empty manifest list is created. + +Example: + +```sh +sudo isula-build manifest create openeuler localhost:5000/openeuler_x86:latest localhost:5000/openeuler_aarch64:latest +``` + +### annotate: Manifest List Update + +The `annotate` subcommand of the `manifest` command is used to update the manifest list. The command format is as follows: + +```shell +isula-build manifest annotate MANIFEST_LIST MANIFEST [flags] +``` + +You can specify the manifest list to be updated and the images in the manifest list, and use flags to specify the options to be updated. This command can also be used to add new images to the manifest list. + +Currently, the following flags are supported: + +- --arch: Applicable architecture of the rewritten image. The value is a string. +- --os: Indicates the applicable system of the image. The value is a string. +- --os-features: Specifies the OS features required by the image. This parameter is a string and rarely used. +- --variant: Variable of the image recorded in the list. The value is a string. + +Example: + +```sh +sudo isula-build manifest annotate --os linux --arch arm64 openeuler:latest localhost:5000/openeuler_aarch64:latest +``` + +### inspect: Manifest List Inspect + +The `inspect` subcommand of the `manifest` command is used to query the manifest list. The command format is as follows: + +```shell +isula-build manifest inspect MANIFEST_LIST +``` + +Example: + +```sh +$ sudo isula-build manifest inspect openeuler:latest +{ + "schemaVersion": 2, + "mediaType": "application/vnd.docker.distribution.manifest.list.v2+json", + "manifests": [ + { + "mediaType": "application/vnd.docker.distribution.manifest.v2+json", + "size": 527, + "digest": "sha256:bf510723d2cd2d4e3f5ce7e93bf1e52c8fd76831995ac3bd3f90ecc866643aff", + "platform": { + "architecture": "amd64", + "os": "linux" + } + }, + { + "mediaType": "application/vnd.docker.distribution.manifest.v2+json", + "size": 527, + "digest": "sha256:f814888b4bb6149bd39ba8375a1932fb15071b4dbffc7f76c7b602b06abbb820", + "platform": { + "architecture": "arm64", + "os": "linux" + } + } + ] +} +``` + +### push: Manifest List Push to the Remote Repository + +The manifest subcommand `push` is used to push the manifest list to the remote repository. The command format is as follows: + +```shell +isula-build manifest push MANIFEST_LIST DESTINATION +``` + +Example: + +```sh +sudo isula-build manifest push openeuler:latest localhost:5000/openeuler:latest +``` + +# Directly Integrating a Container Engine + +isula-build can be integrated with iSulad or Docker to import the built container image to the local storage of the container engine. + +## Integration with iSulad + +Images that are successfully built can be directly exported to the iSulad. + +Example: + +```sh +sudo isula-build ctr-img build -f Dockerfile -o isulad:busybox:2.0 +``` + +Specify iSulad in the -o parameter to export the built container image to iSulad. You can query the image using isula images. + +```sh +$ sudo isula images +isula images +REPOSITORY TAG IMAGE ID CREATED SIZE +busybox 2.0 2d414a5cad6d 2020-08-01 06:41:36 5.577 MB +``` + +> [!NOTE]Note +> +> - It is required that isula-build and iSulad be on the same node. +> - When an image is directly exported to the iSulad, the isula-build client needs to temporarily store the successfully built image as `/var/lib/isula-build/tmp/[build_id]/isula-build-tmp-%v.tar` and then import it to the iSulad. Ensure that the /var/tmp/ directory has sufficient disk space. If the isula-build client process is killed or Ctrl+C is pressed during the export, you need to manually clear the `/var/lib/isula-build/tmp/[build_id]/isula-build-tmp-%v.tar` file. + +## Integration with Docker + +Images that are successfully built can be directly exported to the Docker daemon. + +Example: + +```sh +sudo isula-build ctr-img build -f Dockerfile -o docker-daemon:busybox:2.0 +``` + +Specify docker-daemon in the -o parameter to export the built container image to Docker. You can run the `docker images` command to query the image. + +```sh +$ sudo docker images +REPOSITORY TAG IMAGE ID CREATED SIZE +busybox 2.0 2d414a5cad6d 2 months ago 5.22MB +``` + +> [!NOTE]Note +> +> The isula-build and Docker must be on the same node. + +# Precautions + +This chapter is something about constraints, limitations and differences with `docker build` when use isula-builder build images. + +## Constraints or Limitations + +1. When export an image to [iSulad](https://gitee.com/openeuler/iSulad/blob/master/README.md/), a tag is necessary. +2. Because the OCI runtime, for example, **runc**, will be called by isula-builder when executing the **RUN** instruction, the integrity of the runtime binary should be guaranteed by the user. +3. DataRoot should not be set to **tmpfs**. +4. **Overlay2** is the only storage driver supported by isula-builder currently. +5. Docker image is the only image format supported by isula-builder currently. +6. You are advised to set file permission of the Dockerfile to **0600** to avoid tampering by other users. +7. Only host network is supported by the **RUN** instruction currently. +8. When export image to a tar package, only tar compression format is supported by isula-builder currently. +9. The base image size is limited to 1 GB when importing a base image using `import`. + +## Differences with "docker build" + +`isula-build` complies with [Dockerfile specification](https://docs.docker.com/engine/reference/builder), but there are also some subtle differences between `isula-builder` and `docker build` as follows: + +1. isula-builder commits after each build stage, but not every line. +2. Build cache is not supported by isula-builder. +3. Only **RUN** instruction will be executed in the build container. +4. Build history is not supported currently. +5. Stage name can be start with a number. +6. The length of the stage name is limited to 64 in `isula-builder`. +7. **ADD** instruction source can not be a remote URL currently. +8. Resource restriction on a single build is not supported. If resource restriction is required, you can configure a resource limit on isula-builder. +9. `isula-builder` add each origin layer tar size to get the image size, but docker only uses the diff content of each layer. So the image size listed by `isula-builder images` is different. +10. Image name should be in the *NAME:TAG* format. For example **busybox:latest**, where **latest** must not be omitted. diff --git a/docs/en/docs/image_builder/isula_build/isula_build_appendix.md b/docs/en/docs/image_builder/isula_build/isula_build_appendix.md new file mode 100644 index 0000000000000000000000000000000000000000..31144a191135968fd1c02df34cc3a65743567d93 --- /dev/null +++ b/docs/en/docs/image_builder/isula_build/isula_build_appendix.md @@ -0,0 +1,91 @@ +# Appendix + +## Command Line Parameters + +**Table 1** Parameters of the `ctr-img build` command + +| **Command** | **Parameter** | **Description** | +| ------------- | -------------- | ------------------------------------------------------------ | +| ctr-img build | --build-arg | String list, which contains variables required during the build. | +| | --build-static | Key value, which is used to build binary equivalence. Currently, the following key values are included: - build-time: string, which indicates that a fixed timestamp is used to build a container image. The timestamp format is YYYY-MM-DD HH-MM-SS. | +| | -f, --filename | String, which indicates the path of the Dockerfiles. If this parameter is not specified, the current path is used. | +| | --format | String, which indicates the image format **oci** or **docker** (**ISULABUILD_CLI_EXPERIMENTAL** needs to be enabled). | +| | --iidfile | String, which indicates the ID of the image output to a local file. | +| | -o, --output | String, which indicates the image export mode and path.| +| | --proxy | Boolean, which inherits the proxy environment variable on the host. The default value is true. | +| | --tag | String, which indicates the tag value of the image that is successfully built. | +| | --cap-add | String list, which contains permissions required by the **RUN** instruction during the build process.| + +**Table 2** Parameters of the `ctr-img load` command + +| **Command** | **Parameter** | **Description** | +| ------------ | ----------- | --------------------------------- | +| ctr-img load | -i, --input | String, path of the local .tar package to be imported.| + +**Table 3** Parameters of the `ctr-img push` command + +| **Command** | **Parameter** | **Description** | +| ------------ | ----------- | --------------------------------- | +| ctr-img push | -f, --format | String, which indicates the pushed image format **oci** or **docker** (**ISULABUILD_CLI_EXPERIMENTAL** needs to be enabled).| + +**Table 4** Parameters of the `ctr-img rm` command + +| **Command** | **Parameter** | **Description** | +| ---------- | ----------- | --------------------------------------------- | +| ctr-img rm | -a, --all | Boolean, which is used to delete all local persistent images. | +| | -p, --prune | Boolean, which is used to delete all images that are stored persistently on the local host and do not have tags. | + +**Table 5** Parameters of the `ctr-img save` command + +| **Command** | **Parameter** | **Description** | +| ------------ | ------------ | ---------------------------------- | +| ctr-img save | -o, --output | String, which indicates the local path for storing the exported images.| +| ctr-img save | -f, --format | String, which indicates the exported image format **oci** or **docker** (**ISULABUILD_CLI_EXPERIMENTAL** needs to be enabled).| + +**Table 6** Parameters of the `login` command + +| **Command** | **Parameter** | **Description** | +| -------- | -------------------- | ------------------------------------------------------- | +| login | -p, --password-stdin | Boolean, which indicates whether to read the password through stdin. or enter the password in interactive mode. | +| | -u, --username | String, which indicates the username for logging in to the image repository.| + +**Table 7** Parameters of the `logout` command + +| **Command** | **Parameter** | **Description** | +| -------- | --------- | ------------------------------------ | +| logout | -a, --all | Boolean, which indicates whether to log out of all logged-in image repositories. | + +**Table 8** Parameters of the `manifest annotate` command + +| **Command** | **Parameter** | **Description** | +| ----------------- | ------------- | ---------------------------- | +| manifest annotate | --arch | Set architecture | +| | --os | Set operating system | +| | --os-features | Set operating system feature | +| | --variant | Set architecture variant | + +## Communication Matrix + +The isula-build component processes communicate with each other through the Unix socket file. No port is used for communication. + +## File and Permission + +- All isula-build operations must be performed by the **root** user. To perform operations as a non-privileged user, you need to configure the `--group` option. + +- The following table lists the file permissions involved in the running of isula-build. + +| **File Path** | **File/Folder Permission** | **Description** | +| ------------------------------------------- | ------------------- | ------------------------------------------------------------ | +| /usr/bin/isula-build | 550 | Binary file of the command line tool. | +| /usr/bin/isula-builder | 550 | Binary file of the isula-builder process. | +| /usr/lib/systemd/system/isula-build.service | 640 | systemd configuration file, which is used to manage the isula-build service. | +| /usr/isula-build | 650 | Root directory of the isula-builder configuration file. | +| /etc/isula-build/configuration.toml | 600 | General isula-builder configuration file, including the settings of the isula-builder log level, persistency directory, runtime directory, and OCI runtime. | +| /etc/isula-build/policy.json | 600 | Syntax file of the signature verification policy file. | +| /etc/isula-build/registries.toml | 600 | Configuration file of each image repository, including the available image repository list and image repository blacklist. | +| /etc/isula-build/storage.toml | 600 | Configuration file of the local persistent storage, including the configuration of the used storage driver. | +| /etc/isula-build/isula-build.pub | 400 | Asymmetric encryption public key file. | +| /var/run/isula_build.sock | 660 | Local socket of isula-builder. | +| /var/lib/isula-build | 700 | Local persistency directory. | +| /var/run/isula-build | 700 | Local runtime directory. | +| /var/lib/isula-build/tmp/\[build_id\]/isula-build-tmp-*.tar | 644 | Local temporary directory for storing the images when they are exported to iSulad. | diff --git a/docs/en/docs/image_builder/isula_build/overview.md b/docs/en/docs/image_builder/isula_build/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..1f4e7e510d5e686a50f93febbda4332cb3d1248f --- /dev/null +++ b/docs/en/docs/image_builder/isula_build/overview.md @@ -0,0 +1,13 @@ +# Container Image Building + +## Overview + +isula-build is a container image build tool developed by the iSula container team. It allows you to quickly build container images using Dockerfiles. + +The isula-build uses the server/client mode. The isula-build functions as a client and provides a group of command line tools for image build and management. The isula-builder functions as the server to process client management requests, and runs as a daemon process in the background. + +![isula-build architecture](./figures/isula-build_arch.png) + +> [!NOTE]Note +> +> Currently, isula-build supports OCI image format ([OCI Image Format Specification](https://github.com/opencontainers/image-spec/blob/main/spec.md/)) and Docker image format ([Image Manifest Version 2, Schema 2](https://docs.docker.com/registry/spec/manifest-v2-2/)). Use the `export ISULABUILD_CLI_EXPERIMENTAL=enabled` command to enable the experimental feature for supporting OCI image format. When the experimental feature is disabled, isula-build will take Docker image format as the default image format. Otherwise, isula-build will take OCI image format as the default image format. diff --git a/docs/en/docs/kmesh/kmesh/_toc.yaml b/docs/en/docs/kmesh/kmesh/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c001010f69338744a83ba15c64a524cc4ec6986e --- /dev/null +++ b/docs/en/docs/kmesh/kmesh/_toc.yaml @@ -0,0 +1,14 @@ +label: Kmesh User Guide +isManual: true +description: High-performance service mesh data plane software for openEuler +sections: + - label: Overview + href: ./overview.md + - label: Introduction to Kmesh + href: ./getting_to_know_kmesh.md + - label: Installation and Deployment + href: ./installation_and_deployment.md + - label: Usage + href: ./usage.md + - label: Appendix + href: ./appendixes.md diff --git a/docs/en/docs/kmesh/kmesh/appendixes.md b/docs/en/docs/kmesh/kmesh/appendixes.md new file mode 100644 index 0000000000000000000000000000000000000000..57d7e6004166b4311557540a1821860a69aac575 --- /dev/null +++ b/docs/en/docs/kmesh/kmesh/appendixes.md @@ -0,0 +1,3 @@ +# Appendix + +Learn more about [Kmesh](https://gitee.com/openeuler/Kmesh#kmesh). diff --git a/docs/en/docs/kmesh/kmesh/figures/kmesh-arch.png b/docs/en/docs/kmesh/kmesh/figures/kmesh-arch.png new file mode 100644 index 0000000000000000000000000000000000000000..000ec80ff35556199caef6ce78953599c1c52312 Binary files /dev/null and b/docs/en/docs/kmesh/kmesh/figures/kmesh-arch.png differ diff --git a/docs/en/docs/kmesh/kmesh/getting_to_know_kmesh.md b/docs/en/docs/kmesh/kmesh/getting_to_know_kmesh.md new file mode 100644 index 0000000000000000000000000000000000000000..c9032667f952944e5c0a8182c6a4b4cf202288f8 --- /dev/null +++ b/docs/en/docs/kmesh/kmesh/getting_to_know_kmesh.md @@ -0,0 +1,38 @@ +# Introduction to Kmesh + +## Introduction + +As the number of cloud-native applications surges, the scale of cloud applications and application SLAs pose high requirements on cloud infrastructure. + +The Kubernetes-based cloud infrastructure can help implement agile deployment and management of applications. However, it does not support application traffic orchestration. The emergence of service mesh makes up for the lack of traffic orchestration in Kubernetes and complements Kubernetes to implement agile cloud application development and O&M. However, with the development of service mesh applications, the current Sidecar-based mesh architecture has obvious performance defects on the data plane, which has become a consensus in the industry. + +* Long delay + Take the typical service mesh Istio as an example. After meshing, the single-hop delay of service access increases by 2.65 ms, which cannot meet the requirements of delay-sensitive applications. + +* High overhead + In Istio, each Sidecar configuration occupies more than 50 MB memory, and the CPU exclusively occupies two cores by default. For large-scale clusters, the overhead is high, reducing the deployment density of service containers. + +Based on the programmable kernel, Kmesh offloads mesh traffic governance to the OS and shortens the data path from 3 hops to 1 hop, greatly shortening the delay of the data plane and accelerating service innovation. + +## Architecture + +The following figure shows the overall architecture of Kmesh. + +![](./figures/kmesh-arch.png) + +Kmesh consists of the following components: + +* kmesh-controller + Kmesh management program, which is responsible for Kmesh lifecycle management, xDS protocol interconnection, and O&M monitoring. + +* kmesh-api + API layer provided by Kmesh for external systems, including orchestration APIs converted by xDS and O&M monitoring channels. + +* kmesh-runtime + Runtime that supports L3 to L7 traffic orchestration implemented in the kernel. + +* kmesh-orchestration + L3 to L7 traffic orchestration implemented based on eBPF, such as routing, gray release, and load balancing. + +* kmesh-probe + O&M monitoring probe, providing E2E monitoring capabilities. diff --git a/docs/en/docs/kmesh/kmesh/installation_and_deployment.md b/docs/en/docs/kmesh/kmesh/installation_and_deployment.md new file mode 100644 index 0000000000000000000000000000000000000000..32b8da79e5a18072c976764420475a5a8ec73193 --- /dev/null +++ b/docs/en/docs/kmesh/kmesh/installation_and_deployment.md @@ -0,0 +1,99 @@ +# Installation and Deployment + +## Software + +* OS: openEuler 24.03 LTS SP1 + +## Hardware + +* x86_64 + +## Preparing the Environment + +* Install the openEuler OS by referring to the [*openEuler Installation Guide*](https://docs.openeuler.openatom.cn/en/docs/24.03_LTS_SP2/server/installation_upgrade/installation/installation_on_servers.html). + +* Root permissions are required for installing Kmesh. + +## Installing Kmesh + +* Install the Kmesh software package. + +```shell +yum install Kmesh +``` + +* Check whether the installation is successful. If the command output contains the name of the software package, the installation is successful. + +```shell +rpm -q Kmesh +``` + +## Deploying Kmesh + +### Cluster Mode + +Before starting Kmesh, configure the IP address of the control plane program (for example, Istiod IP address) in the cluster. + +```json + "clusters": [ + { + "name": "xds-grpc", + "type" : "STATIC", + "connect_timeout": "1s", + "lb_policy": "ROUND_ROBIN", + "load_assignment": { + "cluster_name": "xds-grpc", + "endpoints": [{ + "lb_endpoints": [{ + "endpoint": { + "address":{ + "socket_address": { + "protocol": "TCP", + "address": "192.168.0.1",# Configure the control plane IP address (for example, Istiod IP address). + "port_value": 15010 + } + } + } + }] + }] +``` + +Currently, only the traffic orchestration function is supported in the cluster mode. + +### Local Mode + +Before starting Kmesh, modify `kmesh.service` to enable or disable required functions. + +```shell +# Choose -enable-kmesh and disable ADS. +$ vim /usr/lib/systemd/system/kmesh.service +ExecStart=/usr/bin/kmesh-daemon -enable-kmesh -enable-ads=false +$ systemctl daemon-reload +``` + +To enable mesh acceleration, run the following commands: + +```shell +# Choose -enable-mda and disable ADS. +$ vim /usr/lib/systemd/system/kmesh.service +ExecStart=/usr/bin/kmesh-daemon -enable-mda -enable-ads=false +$ systemctl daemon-reload +``` + +When the Kmesh service is started, the kmesh-daemon program is invoked. For details about how to use the kmesh-daemon program, see [Using kmesh-daemon](./usage.md). + +### Starting Kmesh + +```shell +# Start the Kmesh service. +$ systemctl start kmesh.service +# Check the Kmesh running status. +$ systemctl status kmesh.service +``` + +### Stopping Kmesh + +```shell +# Stop the Kmesh service. +$ systemctl stop kmesh.service +``` diff --git a/docs/en/docs/kmesh/kmesh/overview.md b/docs/en/docs/kmesh/kmesh/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..74476ad1ef86e7a2998256bafbdd50d08b136ed7 --- /dev/null +++ b/docs/en/docs/kmesh/kmesh/overview.md @@ -0,0 +1,5 @@ +# Kmesh User Guide + +This document describes how to install, deploy, and use Kmesh, a high-performance service mesh data plane software provided by openEuler. + +This document is intended for developers, open source enthusiasts, and partners who use the openEuler operating system (OS) and want to learn and use Kmesh. Users must have basic knowledge of the Linux OS. diff --git a/docs/en/docs/kmesh/kmesh/usage.md b/docs/en/docs/kmesh/kmesh/usage.md new file mode 100644 index 0000000000000000000000000000000000000000..54ae86e327651587a2fa32a22fa36c9898316d3f --- /dev/null +++ b/docs/en/docs/kmesh/kmesh/usage.md @@ -0,0 +1,69 @@ +# Usage + +## Using kmesh-daemon + +```shell +# Display help information. +[root@openEuler ~]# kmesh-daemon -h +Usage of kmesh-daemon: + -bpf-fs-path string + bpf fs path (default "/sys/fs/bpf") + -cgroup2-path string + cgroup2 path (default "/mnt/kmesh_cgroup2") + -config-file string + [if -enable-kmesh] deploy in kube cluster (default "/etc/kmesh/kmesh.json") + -enable-ads + [if -enable-kmesh] enable control-plane from ads (default true) + -enable-kmesh + enable bpf kmesh + -service-cluster string + [if -enable-kmesh] TODO (default "TODO") + -service-node string + [if -enable-kmesh] TODO (default "TODO") + +# Enable ADS by default. +[root@openEuler ~]# kmesh-daemon -enable-kmesh + +# Enable ADS and specify the path of the configuration file. +[root@openEuler ~]# kmesh-daemon -enable-kmesh -enable-ads=true -config-file=/examples/kmesh.json + +# Disable ADS. +[root@openEuler ~]# kmesh-daemon -enable-kmesh -enable-ads=false +``` + +## Using kmesh-cmd + +```shell +# Display help information. +[root@openEuler ~]# kmesh-cmd -h +Usage of kmesh-cmd: + -config-file string + input config-resources to bpf maps (default "./config-resources.json") + +# Manually load configurations. +[root@openEuler ~]# kmesh-cmd -config-file=/examples/config-resources.json +``` + +## Using O&M Commands + +```shell +# Display help information. +[root@openEuler ~]# curl http://localhost:15200/help + /help: print list of commands + /options: print config options + /bpf/kmesh/maps: print bpf kmesh maps in kernel + /controller/envoy: print control-plane in envoy cache + /controller/kubernetes: print control-plane in kubernetes cache + +# Read the loaded configurations. +[root@openEuler ~]# curl http://localhost:15200/bpf/kmesh/maps +[root@openEuler ~]# curl http://localhost:15200/options +``` + +## Precautions + +* If `-enable-ads=true` is configured, Kmesh automatically receives orchestration rules from the service mesh control plane. In this case, do not run the `kmesh-cmd` command to deliver rules to avoid duplicated configurations. + +* The `-bpf-fs-path` option specifies the BPF directory of the OS. Data related to the Kmesh BPF program will be stored in this directory. The default directory is `/sys/fs/bpf`. + +* The `-cgroup2-path` option specifies the cgroup directory of the OS. The default directory is `/mnt/kmesh_cgroup2`. diff --git a/docs/en/docs/kubeos/.DS_Store b/docs/en/docs/kubeos/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/docs/en/docs/kubeos/.DS_Store differ diff --git a/docs/en/docs/kubeos/kubeos/_toc.yaml b/docs/en/docs/kubeos/kubeos/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa8cb626346fda16c02f394de9fe0bb59fbf42af --- /dev/null +++ b/docs/en/docs/kubeos/kubeos/_toc.yaml @@ -0,0 +1,14 @@ +label: KubeOS User Guide +isManual: true +description: KubeOS is a lightweight OS tailored for containerized workloads. It enables atomic updates, maintains version uniformity, and simplifies O&M. +sections: + - label: Overview + href: ./overview.md + - label: About KubeOS + href: ./about_kubeos.md + - label: Installation and Deployment + href: ./installation_and_deployment.md + - label: Usage Instructions + href: ./usage_instructions.md + - label: KubeOS Image Creation + href: ./kubeos_image_creation.md diff --git a/docs/en/docs/kubeos/kubeos/about_kubeos.md b/docs/en/docs/kubeos/kubeos/about_kubeos.md new file mode 100644 index 0000000000000000000000000000000000000000..4a90c881ea56099c2a200c718ed09f3a3fb92764 --- /dev/null +++ b/docs/en/docs/kubeos/kubeos/about_kubeos.md @@ -0,0 +1,39 @@ +# About KubeOS + +## Introduction + +Containers and Kubernetes are widely used in cloud scenarios. However, a current manner of managing the containers and the OSs separately usually faces problems of function redundancy and difficult collaboration between scheduling systems. In addition, it is difficult to manage OS versions. Software packages are installed, updated, and deleted separately in OSs of the same version. After a period of time, the OS versions become inconsistent, causing version fragmentation. Besides, the OSs may be tightly coupled with services, making it difficult to upgrade major versions. To solve the preceding problems, openEuler provides KubeOS, a container OS upgrade tool based on openEuler. + +Container OSs are lightweight OSs designed for scenarios where services run in containers. KubeOS connects container OSs as components to Kubernetes, so that the container OSs are in the same position as services. With KubeOS, a Kubernetes cluster manages containers and container OSs in a unified system. + +KubeOS is a Kubernetes operator for controlling the container OS upgrade process and upgrading the container OSs as a whole to implement collaboration between the OS managers and services. Before the container OSs are upgraded, services are migrated to other nodes to reduce the impact on services during OS upgrade and configuration. In this upgrade pattern, the container OSs are upgraded atomically so that the OSs remain synchronized with the expected status. This ensures that the OS versions in the cluster are consistent, preventing version fragmentation. + +## Architecture + +### KubeOS Architecture + +**Figure 1** KubeOS architecture + +![](./figures/kubeos-architecture.png) + +As shown in the preceding figure, KubeOS consists of three components: os-operator, os-proxy, and os-agent. The os-operator and os-proxy components run in containers and are deployed in the Kubernetes cluster. os-agent is not considered a cluster component. Its instances run on worker nodes as processes. + +- os-operator: global container OS manager, which continuously checks the container OS versions of all nodes, controls the number of nodes to be upgraded concurrently based on the configured information, and marks the nodes to be upgraded. + +- os-proxy: OS manager of a single node, which continuously checks the container OS version of the node. If a node is marked as the node to be upgraded by os-operator, the node is locked, the pod is evicted, and the upgrade information is forwarded to os-agent. + +- os-agent: receives information from os-proxy, downloads the container OS image used for upgrade from the OS image server, upgrades the container OS, and restarts the node. + +### File System of a Container OS + +**Figure 2** File system layout of a container OS + +![](./figures/file-system-layout-of-a-container-os.png) + +As shown in the figure, a container OS comprises four partitions: + +- boot partition: GRUB2 file partition. +- Persist partition: stores persistent user data. When the container OS is upgraded, the data in this partition is retained. +- Two root partitions: Container OSs use the dual-partition mode with two root partitions, rootA and rootB. Assume that the container runs the OS stored in the rootA partition after initialization. When the system is upgraded, the new system is downloaded to the rootB partition. GRUB has two boot options: A and B. The default boot option of GRUB is set to B and the node is restarted. After the node is started, the container runs the upgraded OS in the rootB partition. + +The root file system of a container OS is read-only. Users' persistent data is stored in the Persist partition. diff --git a/docs/en/docs/kubeos/kubeos/figures/file-system-layout-of-a-container-os.png b/docs/en/docs/kubeos/kubeos/figures/file-system-layout-of-a-container-os.png new file mode 100644 index 0000000000000000000000000000000000000000..add62e72f85b103b7dd5780d2e360049f5f712df Binary files /dev/null and b/docs/en/docs/kubeos/kubeos/figures/file-system-layout-of-a-container-os.png differ diff --git a/docs/en/docs/kubeos/kubeos/figures/kubeos-architecture.png b/docs/en/docs/kubeos/kubeos/figures/kubeos-architecture.png new file mode 100644 index 0000000000000000000000000000000000000000..7834a3793b73c49ddd046502c65335a08f576c30 Binary files /dev/null and b/docs/en/docs/kubeos/kubeos/figures/kubeos-architecture.png differ diff --git a/docs/en/docs/kubeos/kubeos/installation_and_deployment.md b/docs/en/docs/kubeos/kubeos/installation_and_deployment.md new file mode 100644 index 0000000000000000000000000000000000000000..3fdbdb2749bb9c29078cb2bdf5017cef91f6b5ec --- /dev/null +++ b/docs/en/docs/kubeos/kubeos/installation_and_deployment.md @@ -0,0 +1,183 @@ +# Installation and Deployment + +This chapter describes how to install and deploy the KubeOS tool. + +## Software and Hardware Requirements + +### Hardware Requirements + +- Currently, only the x86 and AArch64 architectures are supported. + +### Software Requirements + +- OS: openEuler 24.03-LTS-SP1 + +### Environment Preparation + +- Install the openEuler system. For details, see the [*openEuler Installation Guide*](https://docs.openeuler.openatom.cn/en/docs/24.03_LTS_SP2/server/installation_upgrade/installation/installation_on_servers.html). +- Install qemu-img, bc, Parted, tar, Yum, Docker, and dosfstools. + +## KubeOS Installation + +To install KubeOS, perform the following steps: + +1. Configure the Yum sources openEuler 24.03-LTS-SP1 and openEuler 24.03-LTS-SP1:EPOL: + + ```conf + [openEuler24.03-LTS-SP1] # openEuler 24.03-LTS-SP1 official source + name=openEuler24.03-LTS-SP1 + baseurl=http://repo.openeuler.org/openEuler-24.03-LTS-SP1/everything/$basearch/ + enabled=1 + gpgcheck=1 + gpgkey=http://repo.openeuler.org/openEuler-24.03-LTS-SP1/everything/$basearch/RPM-GPG-KEY-openEuler + ``` + +2. Install KubeOS as the **root** user. + + ```shell + # yum install KubeOS KubeOS-scripts -y + ``` + +> ![Note](./public_sys-resources/icon-note.gif)**Note**: +> +> KubeOS is installed in the **/opt/kubeOS** directory, including the os-operator, os-proxy, os-agent binary files, KubeOS image build tools, and corresponding configuration files. + +## KubeOS Deployment + +After KubeOS is installed, you need to configure and deploy it. This section describes how to configure and deploy KubeOS. + +### Building the os-operator and os-proxy Images + +#### Environment Preparation + +Before using Docker to create a container image, ensure that Docker has been installed and configured. + +#### Procedure + +1. Go to the working directory. + + ```shell + cd /opt/kubeOS + ``` + +2. Specify the image repository, name, and version for os-proxy. + + ```shell + export IMG_PROXY=your_imageRepository/os-proxy_imageName:version + ``` + +3. Specify the image repository, name, and version for os-operator. + + ```shell + export IMG_OPERATOR=your_imageRepository/os-operator_imageName:version + ``` + +4. Compile a Dockerfile to build an image. Pay attention to the following points when compiling a Dockerfile: + + - The os-operator and os-proxy images must be built based on the base image. Ensure that the base image is safe. + - Copy the os-operator and os-proxy binary files to the corresponding images. + - Ensure that the owner and owner group of the os-proxy binary file in the os-proxy image are **root**, and the file permission is **500**. + - Ensure that the owner and owner group of the os-operator binary file in the os-operator image are the user who runs the os-operator process in the container, and the file permission is **500**. + - The locations of the os-operator and os-proxy binary files in the image and the commands run during container startup must correspond to the parameters specified in the YAML file used for deployment. + + An example Dockerfile is as follows: + + ```text + FROM your_baseimage + COPY ./bin/proxy /proxy + ENTRYPOINT ["/proxy"] + ``` + + ```text + FROM your_baseimage + COPY --chown=6552:6552 ./bin/operator /operator + ENTRYPOINT ["/operator"] + ``` + + Alternatively, you can use multi-stage builds in the Dockerfile. + +5. Build the images (the os-operator and os-proxy images) to be included in the containers OS image. + + ```shell + # Specify the Dockerfile path of os-proxy. + export DOCKERFILE_PROXY=your_dockerfile_proxy + # Specify the Dockerfile path of os-operator. + export DOCKERFILE_OPERATOR=your_dockerfile_operator + # Build images. + docker build -t ${IMG_OPERATOR} -f ${DOCKERFILE_OPERATOR} . + docker build -t ${IMG_PROXY} -f ${DOCKERFILE_PROXY} . + ``` + +6. Push the images to the image repository. + + ```shell + docker push ${IMG_OPERATOR} + docker push ${IMG_PROXY} + ``` + +### Creating a KubeOS VM Image + +#### Precautions + +- The VM image is used as an example. For details about how to create a physical machine image, see **KubeOS Image Creation**. +- The root permission is required for creating a KubeOS image. +- The RPM sources of the kbimg are the **everything** and **EPOL** repositories of openEuler of a specific version. In the Repo file provided during image creation, you are advised to configure the **everything** and **EPOL** repositories of a specific openEuler version for the Yum source. +- By default, the KubeOS VM image built using the default RPM list is stored in the same path as the kbimg tool. This partition must have at least 25 GiB free drive space. +- When creating a KubeOS image, you cannot customize the file system to be mounted. + +#### Procedure + +Use the **kbimg.sh** script to create a KubeOS VM image. For details about the commands, see **KubeOS Image Creation**. + +To create a KubeOS VM image, perform the following steps: + +1. Go to the working directory. + + ```shell + cd /opt/kubeOS/scripts + ``` + +2. Run `kbming.sh` to create a KubeOS image. The following is a command example: + + ```shell + bash kbimg.sh create vm-image -p xxx.repo -v v1 -b ../bin/os-agent -e '''$1$xyz$RdLyKTL32WEvK3lg8CXID0''' + ``` + + In the command, **xx.repo** indicates the actual Yum source file used for creating the image. You are advised to configure both the **everything** and **EPOL** repositories as Yum sources. + + After the KubeOS image is created, the following files are generated in the **/opt/kubeOS/scripts** directory: + + - **system.img**: system image in raw format. The default size is 20 GB. The size of the root file system partition is less than 2,560 MiB, and the size of the Persist partition is less than 14 GiB. + - **system.qcow2**: system image in QCOW2 format. + - **update.img**: partition image of the root file system that is used for upgrade. + + The created KubeOS VM image can be used only in a VM of the x86 or AArch64 architecture. KubeOS does not support legacy boot in an x86 VM + +### Deploying CRD, os-operator, and os-proxy + +#### Precautions + +- The Kubernetes cluster must be deployed first. For details, see the *openEuler 24.03-LTS-SP1 Kubernetes Cluster Deployment Guide*. + +- The OS of the worker nodes to be upgraded in the cluster must be the KubeOS built using the method described in the previous section. If it is not, use **system.qcow2** to deploy the VM again. For details about how to deploy a VM, see the *openEuler 24.03-LTS-SP1 Virtualization User Guide*. Currently, KubeOS does not support the master nodes. Use openEuler 24.03-LTS-SP1 to deploy the upgrade on the master nodes. +- The YAML files for deploying CustomResourceDefinition (CRD), os-operator, os-proxy, and role-based access control (RBAC) of the OS need to be compiled. +- The os-operator and os-proxy components are deployed in the Kubernetes cluster. os-operator must be deployed as a Deployment, and os-proxy as a DaemonSet. +- Kubernetes security mechanisms, such as the RBAC, pod service account, and security policies, must be deployed. + +#### Procedure + +1. Prepare YAML files used for deploying CRD, RBAC, os-operator, and os-proxy of the OS. For details, see [YAML examples](https://gitee.com/openeuler/KubeOS/tree/master/docs/example/config). The following uses **crd.yaml**, **rbac.yaml**, and **manager.yaml** as examples. + +2. Deploy CRD, RBAC, os-operator, and os-proxy. Assume that the **crd.yaml**, **rbac.yaml**, and **manager.yaml** files are stored in the **config/crd**, **config/rbac**, and **config/manager** directories, respectively. Run the following commands: + + ```shell + kubectl apply -f config/crd + kubectl apply -f config/rbac + kubectl apply -f config/manager + ``` + +3. After the deployment is complete, run the following command to check whether each component is started properly. If **STATUS** of all components is **Running**, the components are started properly. + + ```shell + kubectl get pods -A + ``` diff --git a/docs/en/docs/kubeos/kubeos/kubeos_image_creation.md b/docs/en/docs/kubeos/kubeos/kubeos_image_creation.md new file mode 100644 index 0000000000000000000000000000000000000000..6aa6a636c20c8415061e4ed6b9ea789b34f093e7 --- /dev/null +++ b/docs/en/docs/kubeos/kubeos/kubeos_image_creation.md @@ -0,0 +1,169 @@ +# KubeOS Image Creation + +## Introduction + +kbimg is an image creation tool required for KubeOS deployment and upgrade. You can use kbimg to create KubeOS Docker, VM, and physical machine images. + +## Commands + +### Command Format + +**bash kbimg.sh** \[ --help \| -h \] create \[ COMMANDS \] \[ OPTIONS \] + +### Parameter Description + +* COMMANDS + + | Parameter | Description | + | ------------- | ---------------------------------------------- | + | upgrade-image | Generates a OCI image for installation and upgrade.| + | vm-image | Generates a VM image for installation and upgrade. | + | pxe-image | Generates images and files required for physical machine installation. | + +* OPTIONS + + | Option | Description | + | ------------ | ------------------------------------------------------------ | + | -p | Path of the repo file. The Yum source required for creating an image is configured in the repo file. | + | -v | Version of the created KubeOS image. | + | -b | Path of the os-agent binary file. | + | -e | Password of the **root** user of the KubeOS image, which is an encrypted password with a salt value. You can run the OpenSSL or KIWI command to generate the password.| + | -d | Generated or used Docker image. | + | -h --help | Help Information. | + +## Usage Description + +### Precautions + +* The root permission is required for executing **kbimg.sh**. +* Currently, only the x86 and AArch64 architectures are supported. +* The RPM sources of the kbimg are the **everything** and **EPOL** repositories of openEuler of a specific version. In the Repo file provided during image creation, you are advised to configure the **everything** and **EPOL** repositories of a specific openEuler version for the Yum source. + +### Creating a KubeOS OCI Image + +#### Precautions + +* The created OCI image can be used only for subsequent VM or physical machine image creation or upgrade. It cannot be used to start containers. +* If the default RPM list is used to create a KubeOS image, at least 6 GB drive space is required. If the RPM list is customized, the occupied drive space may exceed 6 GB. + +#### Example + +* To configure the DNS, customize the `resolv.conf` file in the `scripts` directory. + +```shell + cd /opt/kubeOS/scripts + touch resolv.conf + vim resolv.conf +``` + +* Create a KubeOS image. + +``` shell +cd /opt/kubeOS/scripts +bash kbimg.sh create upgrade-image -p xxx.repo -v v1 -b ../bin/os-agent -e '''$1$xyz$RdLyKTL32WEvK3lg8CXID0''' -d your_imageRepository/imageName:version +``` + +* After the creation is complete, view the created KubeOS image. + +``` shell +docker images +``` + +### Creating a KubeOS VM Image + +#### Precautions + +* To use a Docker image to create a KubeOS VM image, pull the corresponding image or create a Docker image first and ensure the security of the Docker image. +* The created KubeOS VM image can be used only in a VM of the x86 or AArch64 architecture. +* Currently, KubeOS does not support legacy boot in an x86 VM. +* If the default RPM list is used to create a KubeOS image, at least 25 GB drive space is required. If the RPM list is customized, the occupied drive space may exceed 25 GB. + +#### Example + +* Using the Repo Source + * To configure the DNS, customize the `resolv.conf` file in the `scripts` directory. + + ```shell + cd /opt/kubeOS/scripts + touch resolv.conf + vim resolv.conf + ``` + + * Create a KubeOS VM image. + + ``` shell + cd /opt/kubeOS/scripts + bash kbimg.sh create vm-image -p xxx.repo -v v1 -b ../bin/os-agent -e '''$1$xyz$RdLyKTL32WEvK3lg8CXID0''' + ``` + +* Using a Docker Image + + ``` shell + cd /opt/kubeOS/scripts + bash kbimg.sh create vm-image -d your_imageRepository/imageName:version + ``` + +* Result Description + After the KubeOS image is created, the following files are generated in the **/opt/kubeOS/scripts** directory: + * **system.qcow2**: system image in QCOW2 format. The default size is 20 GiB. The size of the root file system partition is less than 2,020 MiB, and the size of the Persist partition is less than 16 GiB. + * **update.img**: partition image of the root file system used for upgrade. + +### Creating Images and Files Required for Installing KubeOS on Physical Machines + +#### Precautions + +* To use a Docker image to create a KubeOS VM image, pull the corresponding image or create a Docker image first and ensure the security of the Docker image. +* The created image can only be used to install KubeOS on a physical machine of the x86 or AArch64 architecture. +* The IP address specified in the **Global.cfg** file is a temporary IP address used during installation. After the system is installed and started, configure the network by referring to **openEuler 22.09 Administrator Guide** > **Configuring the Network**. +* KubeOS cannot be installed on multiple drives at the same time. Otherwise, the startup may fail or the mounting may be disordered. +* Currently, KubeOS does not support legacy boot in an x86 physical machine. +* If the default RPM list is used to create a KubeOS image, at least 5 GB drive space is required. If the RPM list is customized, the occupied drive space may exceed 5 GB. + +#### Example + +* Modify the `00bootup/Global.cfg` file. All parameters are mandatory. Currently, only IPv4 addresses are supported. The following is a configuration example: + + ```shell + # rootfs file name + rootfs_name=kubeos.tar + # select the target disk to install kubeOS + disk=/dev/sda + # pxe server ip address where stores the rootfs on the http server + server_ip=192.168.1.50 + # target machine temporary ip + local_ip=192.168.1.100 + # target machine temporary route + route_ip=192.168.1.1 + # target machine temporary netmask + netmask=255.255.255.0 + # target machine netDevice name + net_name=eth0 + ``` + +* Using the Repo Source + * To configure the DNS, customize the `resolv.conf` file in the `scripts` directory. + + ```shell + cd /opt/kubeOS/scripts + touch resolv.conf + vim resolv.conf + ``` + + * Create an image required for installing KubeOS on a physical machine. + + ```shell + cd /opt/kubeOS/scripts + bash kbimg.sh create pxe-image -p xxx.repo -v v1 -b ../bin/os-agent -e '''$1$xyz$RdLyKTL32WEvK3lg8CXID0''' + ``` + +* Using a Docker Image + + ``` shell + cd /opt/kubeOS/scripts + bash kbimg.sh create pxe-image -d your_imageRepository/imageName:version + ``` + +* Result Description + + * **initramfs.img**: initramfs image used for boot from PXE. + * **kubeos.tar**: OS used for installation from PXE. diff --git a/docs/en/docs/kubeos/kubeos/overview.md b/docs/en/docs/kubeos/kubeos/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..643381a93e5bca97a4c0a1a44493c7cd7f10e7ce --- /dev/null +++ b/docs/en/docs/kubeos/kubeos/overview.md @@ -0,0 +1,8 @@ +# KubeOS User Guide + +This document describes how to install, deploy, and use KubeOS in the openEuler system. KubeOS connects the container OS to the scheduling system in standard extension pattern and manages the OS upgrade of nodes in the cluster through the scheduling system. + +This document is intended for community developers, open source enthusiasts, and partners who use the openEuler system and want to learn and use the container OSs. Users must: + +* Know basic Linux operations. +* Understand Kubernetes and Docker. diff --git a/docs/en/docs/kubeos/kubeos/public_sys-resources/icon-note.gif b/docs/en/docs/kubeos/kubeos/public_sys-resources/icon-note.gif new file mode 100644 index 0000000000000000000000000000000000000000..6314297e45c1de184204098efd4814d6dc8b1cda Binary files /dev/null and b/docs/en/docs/kubeos/kubeos/public_sys-resources/icon-note.gif differ diff --git a/docs/en/docs/kubeos/kubeos/usage_instructions.md b/docs/en/docs/kubeos/kubeos/usage_instructions.md new file mode 100644 index 0000000000000000000000000000000000000000..cbbe752fffbb3be64288643e5602934923c92d13 --- /dev/null +++ b/docs/en/docs/kubeos/kubeos/usage_instructions.md @@ -0,0 +1,503 @@ +# Usage Instructions + +## Precautions + +- General precautions + - KubeOS currently only supports virtual machines (VMs) and physical machines using UEFI with x86 and AArch64 architectures. + - When creating or updating the OS CustomResource (CR) using `kubectl apply` with a YAML file, avoid concurrent `apply` operations. Excessive concurrent requests may overwhelm the kube-apiserver, leading to failures. + - If you configure certificates or keys for the container image registry, ensure that the permissions on these files are set to the minimum necessary. +- Upgrade precautions + - Upgrades are performed as atomic upgrades for all packages. Individual package upgrades are not supported. + - Upgrades use a dual-partition upgrade strategy. Configurations with more than two partitions are not supported. + - Cross-major version upgrades are not currently supported. + - Logs for the upgrade process on a single node can be found in the **/var/log/messages** file on that node. + - Strictly adhere to the provided upgrade and rollback procedures. Deviation from the prescribed order of operations may result in upgrade or rollback failures. + - If you need to configure private images for `ctr` (used by `containerd`) on a node, place the **host.toml** configuration file in the `/etc/containerd/certs.d` directory, following the `ctr` guidelines. + - Upgrades using OCI images and mutual TLS (mTLS) authentication are only supported on openEuler 22.09 and later. + - Features `nodeselector`, `executionmode`, `timewindow`, and `timeinterval` are only supported on openEuler 24.09 and later. + - KubeOS 24.09 is not compatible with previous versions. + +- Configuration Precautions + - Users are responsible for the security and reliability of any custom configurations, particularly persistent configurations such as `kernel.sysctl.persist`, `grub.cmdline.current`, and `grub.cmdline.next`. KubeOS does not validate the effectiveness of these parameters. + - When `opstype` is set to `config`, configurations will not be applied if the specified `osversion` does not match the OS version of the target nodes in the cluster. + - Currently, only temporary kernel parameter configuration (`kernel.sysctl`), persistent kernel parameter configuration (`kernel.sysctl.persist`), and GRUB command line configuration (`grub.cmdline.current` and `grub.cmdline.next`) are supported. + - Persistent configurations are written to the persistent partition and will be retained after upgrades and reboots. Temporary kernel parameter configurations will not be retained after a reboot. + - When configuring `grub.cmdline.current` or `grub.cmdline.next`, if a single parameter is provided (not in the `key=value` format), specify the parameter as the key and leave the value empty. + - When deleting a configuration (`operation=delete`), ensure that the key and value in the `key=value` format match the actual configuration. + - Configuration changes cannot be rolled back. If a rollback is required, modify the configuration version and content and reapply the configuration. + - If a configuration error occurs and a node enters the `config` state, revert the configuration version to the previous version and reapply it. This should return the node to the `idle` state. However, note that parameters successfully configured before the error occurred cannot be reverted. + - When configuring `grub.cmdline.current` or `grub.cmdline.next`, if you need to update an existing parameter in the format of `key=value` to a format with only key and no value, for example, updating `rd.info=0` to `rd.info`, you need to delete `key=value` first, and then add the key in the next configuration. Direct updates or updates and deletions in the same operation are not supported. + +## OS CR Parameters + +Create a custom object of the OS type in the cluster and set the corresponding fields. The OS type comes from the CRD object created in the installation and deployment sections. The following describes the fields. + +- The `imageurl` field specifies the location of the operating system image. This URL must use either the `http` or `https` protocol. For `https`, the image transfer is secure. For `http`, you must set the `flagSafe` parameter to `true`. This explicitly signals that you trust the source and allows the image download to proceed. If `imageurl` uses `http` and `flagSafe` is not `true`, the URL is considered unsafe, the image will not be downloaded, and an error message will appear in the node upgrade log. +- You are advised to use the `https` protocol for security. When using `https`, ensure that the target machines being upgraded have the necessary certificates installed. If you maintain the image server yourself, you must sign the images to guarantee their authenticity and ensure the nodes being upgraded trust your certificate. Place the certificate file in the `/etc/KubeOS/certs` directory. The administrator provides the `imageurl` and is responsible for ensuring the security and validity of this URL. An internal network address is recommended for enhanced security. +- The provider of the container OS image is responsible for its integrity. Verify that you obtain images from a trustworthy source. +- When your cluster uses multiple OS versions (meaning that there are multiple OS instances), each OS must have a distinct `nodeselector`. This ensures that a group of nodes identified by a specific label corresponds to only one OS instance. + - If an OS instance has `nodeselector` set to `all-label`, it will be the only valid instance in the cluster (only nodes matching its criteria will be managed). + - Similarly, only one OS instance can have an unconfigured `nodeselector`. This is because an absent `nodeselector` is interpreted as targeting nodes without any labels. +- `timewinterval` parameter + - When not set, the default value is 15 seconds. + - Setting this parameter to `0` will cause the task dispatch interval of the operator to gradually increase until it reaches 1000 seconds. This behavior is due to rate limiting imposed by the Kubernetes `controller-runtime`. + - In parallel execution mode, `timeinterval` defines the delay between the operator dispatching upgrade/configuration tasks for each batch of nodes. + - In serial execution mode, `timeinterval` represents the delay between the completion of one batch of nodes (upgraded/configured serially) and the dispatch of the next upgrade/configuration task. Within a batch, the interval between individual nodes remains 15 seconds. + - Any update to fields of an OS instance will immediately trigger the operator. + + | Parameter | Type | Description | Usage Notes | Mandatory | + | ---------------- | ------ | ------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------- | + | `imagetype` | string | Type of image used for the upgrade | The value can be `docker`, `containerd`, or `disk` and is valid only for upgrades. **Note:** When the value is `containerd`, the agent prioritizes the `crictl` tool for pulling images. If `crictl` is unavailable, it uses the `ctr` command. When `ctr` is used to pull images from a private repository, configure the repository host information in the **/etc/containerd/certs.d** directory according to the [containerd official documentation](https://github.com/containerd/containerd/blob/main/docs/hosts.md). | Yes | + | `opstype` | string | Operation type (upgrade, configuration, or rollback) | The value can be `upgrade`, `config`, or `rollback`. | Yes | + | `osversion` | string | Target version for the upgrade or rollback | `osversion` must match the target OS version of the nodes (specified in the `PRETTY_NAME` field in the **/etc/os-release** file or the OS version detected by Kubernetes). For example: `KubeOS 1.0.0`. | Yes | + | `maxunavailable` | int | Maximum number of nodes undergoing upgrade/configuration/rollback concurrently | If `maxunavailable` exceeds the actual number of nodes, the operation proceeds with the actual number of nodes. | Yes | + | `containerimage` | string | Container image used for the upgrade | This parameter is only applicable when `imagetype` is a container type. The value can be one of the three container image address formats: `repository/name`, `repository/name@sha256:xxxx`, and `repository/name:tag`. | Yes | + | `imageurl` | string | URL of the drive image used for the upgrade | `imageurl` must include the protocol and supports only `http` or `https`. Example: `https://192.168.122.15/update.img`. Valid only for upgrades using drive images. | Yes | + | `checksum` | string | Checksum (SHA-256) of the drive image used for the upgrade or the digests of the container image | This parameter is valid only for upgrades. | Yes | + | `flagSafe` | bool | Whether the address specified by `imageurl` is safe when the `http` protocol is used | The value must be `true` or `false`. This parameter is valid only when `imageurl` uses the `http` protocol. | Yes | + | `mtls` | bool | Whether the connection to `imageurl` uses two-way HTTPS authentication | The value must be `true` or `false`. This parameter is valid only when `imageurl` uses the `https` protocol. | Yes | + | `cacert` | string | Root certificate file used for HTTPS or two-way HTTPS authentication | This parameter is valid only when `imageurl` uses the `https` protocol. | Required when `imageurl` uses `https` | + | `clientcert` | string | Client certificate file used for two-way HTTPS authentication | This parameter is valid only when two-way HTTPS authentication is used. | Required when `mtls` is `true` | + | `clientkey` | string | Client private key file used for two-way HTTPS authentication | This parameter is valid only when two-way HTTPS authentication is used. | Required when `mtls` is `true` | + | `evictpodforce` | bool | Whether to forcibly evict pods during upgrade/rollback | Must be `true` or `false`. This parameter is valid only for upgrades or rollbacks. | Yes | + | `sysconfigs` | / | Configuration settings | 1. When `opstype` is `config`, only configuration is performed.
2. When `opstype` is `upgrade/rollback`, it indicates post-upgrade/rollback configuration, meaning it takes effect after the upgrade/rollback and subsequent reboot. For detailed field descriptions, see the [Settings](#settings). | Required when `opstype` is `config` | + | `upgradeconfigs` | / | Configuration settings to apply before an upgrade. | This parameter is valid for upgrades or rollbacks and takes effect before the upgrade or rollback operation. For detailed field descriptions, see the [Settings](#settings). | Optional | + | `nodeselector` | string | Label of the nodes targeted for the upgrade/configuration/rollback | This parameter is used to perform operations on nodes with specific labels, rather than all worker nodes in the cluster. The nodes targeted for the operation need to have a label with the `upgrade.openeuler.org/node-selector` key. The `nodeselector` parameter should be set to the value of this label. **Notes:** 1. When this parameter is not set or is set to `no-label`, operations are performed on nodes that do not have the `upgrade.openeuler.org/node-selector` label.
2. When this parameter is set to `""` (an empty string), operations are performed on nodes that have the `upgrade.openeuler.org/node-selector=""` label.
3. To ignore labels and perform operations on all nodes, set this parameter to `all-label`. | Optional | + | `timewindow` | / | Time window during which the upgrade/configuration/rollback can take place. | 1. When specifying a time window, both `starttime` and `endtime` must be specified. That is, they should either both be empty or both be non-empty.
1. Both `starttime` and `endtime` are strings and should be in the `YYYY-MM-DD HH:MM:SS` or `HH:MM:SS` format, and both should follow the same format.
2. When in `HH:MM:SS` format, if `starttime` is less than `endtime`, it is assumed that `starttime` refers to that time on the next day.
3. When `timewindow` is not specified, it defaults to no time window restrictions. | Optional | + | `timeinterval` | int | The time interval between each batch of tasks for the upgrade/configuration/rollback operation. | This parameter is in seconds and defines the time interval between the operator dispatching tasks. If the Kubernetes cluster is busy and cannot immediately respond to the operator's request, the actual interval may be longer than the specified time. | Optional | + | `executionmode` | string | The mode in which the upgrade/configuration/rollback operation is executed. | The value can be `serial` or `parallel`. If this parameter is not set, the operation defaults to parallel mode. | Optional | + +## Upgrade + +1. Create a YAML file and deploy an instance of the OS Custom Resource (CR) in the cluster. This YAML file defines the upgrade process. The following example assumes you save the YAML content to **upgrade_v1alpha1_os.yaml**. + + - Upgrade using a drive image + + ```yaml + apiVersion: upgrade.openeuler.org/v1alpha1 + kind: OS + metadata: + name: os-sample + spec: + imagetype: disk + opstype: upgrade + osversion: edit.os.version + maxunavailable: edit.node.upgrade.number + containerimage: "" + evictpodforce: true/false + imageurl: edit.image.url + checksum: image.checksum + flagSafe: imageurl.safety + mtls: imageurl use mtls or not + cacert: ca certificate + clientcert: client certificate + clientkey: client certificate key + ``` + + - Upgrade using a container image + - Before you can upgrade using a container image, you need to create a container image specifically for the upgrade process. For detailed instructions on how to create this image, see [Creating a KubeOS OCI Image](./kubeos_image_creation.md#creating-a-kubeos-oci-image) in [KubeOS Image Creation](./kubeos_image_creation.md). + + ``` yaml + apiVersion: upgrade.openeuler.org/v1alpha1 + kind: OS + metadata: + name: os-sample + spec: + imagetype: docker + opstype: upgrade + osversion: edit.os.version + maxunavailable: edit.node.upgrade.number + containerimage: container image like repository/name:tag + evictpodforce: true/false + imageurl: "" + checksum: container image digests + flagSafe: false + mtls: true + ``` + + - Using containerd as the container engine + + ```yaml + apiVersion: upgrade.openeuler.org/v1alpha1 + kind: OS + metadata: + name: os-sample + spec: + imagetype: containerd + opstype: upgrade + osversion: edit.os.version + maxunavailable: edit.node.upgrade.number + containerimage: container image like repository/name:tag + evictpodforce: true/false + imageurl: "" + checksum: container image digests + flagSafe: false + mtls: true + ``` + + - Example of upgrading and applying configurations + - This example uses containerd as the container engine. The upgrade method does not affect the configuration process. `upgradeconfigs` are applied before the upgrade. `sysconfigs` are applied after the machine reboots from the upgrade. See [Settings](#settings) for detailed information about the configuration parameters. + - When upgrading and configuring, set the `opstype` field to `upgrade`. + + ```yaml + apiVersion: upgrade.openeuler.org/v1alpha1 + kind: OS + metadata: + name: os-sample + spec: + imagetype: "" + opstype: upgrade + osversion: edit.os.version + maxunavailable: edit.node.upgrade.number + containerimage: "" + evictpodforce: true/false + imageurl: "" + checksum: container image digests + flagSafe: false + mtls: false + sysconfigs: + version: edit.os.version + configs: + - model: kernel.sysctl + contents: + - key: kernel param key1 + value: kernel param value1 + - key: kernel param key2 + value: kernel param value2 + - model: kernel.sysctl.persist + configpath: persist file path + contents: + - key: kernel param key3 + value: kernel param value3 + - key: "" + value: "" + upgradeconfigs: + version: 1.0.0 + configs: + - model: kernel.sysctl + contents: + - key: kernel param key4 + value: kernel param value4 + ``` + + - Example of upgrading specific nodes using `nodeselector`, `timewindow`, `timeinterval`, and `executionmode` + - This example uses containerd as the container engine. The upgrade method does not affect node selection. + - Nodes targeted for upgrade must include the `upgrade.openeuler.org/node-selector` label. The value of `nodeselector` in the YAML file should match the value of this label on the desired nodes. For example, if `nodeselector` is set to `kubeos`, only worker nodes with the `upgrade.openeuler.org/node-selector=kubeos` label will be upgraded. + - `nodeselector`, `timewindow`, `timeinterval`, and `executionmode` are also applicable to configuration and rollback operations. + - Example commands for managing node labels: + + ``` shell + # Add a label to node kubeos-node1 + kubectl label nodes kubeos-node1 upgrade.openeuler.org/node-selector=kubeos-v1 + # Modify the label of node kubeos-node1 + kubectl label --overwrite nodes kubeos-node1 upgrade.openeuler.org/node-selector=kubeos-v2 + # Delete the label from node kubeos-node1 + kubectl label nodes kubeos-node1 upgrade.openeuler.org/node-selector- + # View the labels of all nodes + kubectl get nodes --show-labels + ``` + + - Example YAML file: + + ```yaml + apiVersion: upgrade.openeuler.org/v1alpha1 + kind: OS + metadata: + name: os-sample + spec: + imagetype: containerd + opstype: upgrade + osversion: edit.os.version + maxunavailable: edit.node.upgrade.number + containerimage: container image like repository/name:tag + evictpodforce: true/false + imageurl: "" + checksum: container image digests + flagSafe: false + mtls: true + nodeselector: edit.node.label.key + timewindow: + starttime: "HH::MM::SS/YYYY-MM-DD HH::MM::SS" + endtime: "HH::MM::SS/YYYY-MM-DD HH::MM::SS" + timeinterval: time intervel like 30 + executionmode: serial/parallel + ``` + +2. Check the OS version of nodes that have not been upgraded. + + ```shell + kubectl get nodes -o custom-columns='NAME:.metadata.name,OS:.status.nodeInfo.osImage' + ``` + +3. Deploy the CR instance in the cluster. Nodes will be upgraded based on the parameters specified in the YAML file. + + ```shell + kubectl apply -f upgrade_v1alpha1_os.yaml + ``` + +4. Check the OS version of the nodes again to confirm if the upgrade is complete. + + ```shell + kubectl get nodes -o custom-columns='NAME:.metadata.name,OS:.status.nodeInfo.osImage' + ``` + +5. If you need to perform the upgrade again, modify the corresponding fields in **upgrade_v1alpha1_os.yaml**. + +> [!NOTE]Note +> +> If you need to perform the upgrade again, modify the `imageurl`, `osversion`, `checksum`, `maxunavailable`, `flagSafe`, or `dockerimage` parameters in **upgrade_v1alpha1_os.yaml**. + +## Settings + +- Settings parameters + + This section describes the configuration parameters using an example YAML file. Your configuration should follow the same indentation as the example: + + ```yaml + apiVersion: upgrade.openeuler.org/v1alpha1 + kind: OS + metadata: + name: os-sample + spec: + imagetype: "" + opstype: config + osversion: edit.os.version + maxunavailable: edit.node.config.number + containerimage: "" + evictpodforce: false + checksum: "" + sysconfigs: + version: edit.sysconfigs.version + configs: + - model: kernel.sysctl + contents: + - key: kernel param key1 + value: kernel param value1 + - key: kernel param key2 + value: kernel param value2 + operation: delete + - model: kernel.sysctl.persist + configpath: persist file path + contents: + - key: kernel param key3 + value: kernel param value3 + - model: grub.cmdline.current + contents: + - key: boot param key1 + - key: boot param key2 + value: boot param value2 + - key: boot param key3 + value: boot param value3 + operation: delete + - model: grub.cmdline.next + contents: + - key: boot param key4 + - key: boot param key5 + value: boot param value5 + - key: boot param key6 + value: boot param value6 + operation: delete + ``` + + Configuration parameters + + | Parameter | Type | Description | Usage Note | Mandatory | + | ---------- | -------- | --------------------------- | ------------------------------------------------------------ | ----------------------- | + | `version` | string | Configuration version | This parameter determines if the configuration should be applied by comparing versions. If `version` is empty (`""` or not set), the comparison will still be performed. Therefore, if `sysconfigs` or `upgradeconfigs` is not configured, the existing `version` will be cleared, triggering the configuration. | Yes | + | `configs` | / | Specific configuration content | This parameter contains a list of specific configuration items. | Yes | + | `model` | string | Configuration type | See the [Settings List](#settings-list) in the appendix for supported configuration types. | Yes | + | `configpath` | string | Configuration file path | This parameter is only effective for the `kernel.sysctl.persist` configuration type. See the [Settings List](#settings-list) in the appendix for the description of the configuration file path. | No | + | `contents` | / | Specific key/value pairs and operation type | This parameter contains a list of specific configuration items. | Yes | + | `key` | string | Parameter name | `key` cannot be empty or contain `=`. You are advised not to configure strings containing spaces or tabs. For specific usage of `key` for each configuration type, see the [Settings List](#settings-list) in the appendix. | Yes | + | `value` | string | Parameter value | `value` cannot be empty for parameters in the `key=value` format. You are advised not to configure strings containing spaces or tabs. For specific usage of `value` for each configuration type, see the [Settings List](#settings-list) in the appendix. | Required for parameters in the `key=value` format | + | `operation` | string | Operation to be performed on the parameter | This parameter is only effective for `kernel.sysctl.persist`, `grub.cmdline.current`, and `grub.cmdline.next` parameter types. The default behavior is to add or update. The value can only be `delete`, which means deleting the existing parameter (the `key=value` must match exactly for deletion). | No | + + - `upgradeconfigs` has the same parameters as `sysconfigs`. `upgradeconfigs` is for configuration before upgrade/rollback and only takes effect in upgrade/rollback scenarios. `sysconfigs` supports both configuration only and configuration after upgrade/rollback reboot. + +- Usage + + 1. Create a YAML file like the **upgrade_v1alpha1_os.yaml** example above and deploy the OS CR instance in the cluster. + + 2. Check the configuration version and node status before applying the configuration (`NODESTATUS` should be `idle`). + + ```shell + kubectl get osinstances -o custom-columns='NAME:.metadata.name,NODESTATUS:.spec.nodestatus,SYSCONFIG:status.sysconfigs.version,UPGRADECONFIG:status.upgradeconfigs.version' + ``` + + 3. Apply the configuration, then check the node status again (`NODESTATUS` should change to `config`). + + ```shell + kubectl apply -f upgrade_v1alpha1_os.yaml + kubectl get osinstances -o custom-columns='NAME:.metadata.name,NODESTATUS:.spec.nodestatus,SYSCONFIG:status.sysconfigs.version,UPGRADECONFIG:status.upgradeconfigs.version' + ``` + + 4. Check the node configuration version again to confirm whether the configuration is complete (`NODESTATUS` should return to `idle`): + + ```shell + kubectl get osinstances -o custom-columns='NAME:.metadata.name,NODESTATUS:.spec.nodestatus,SYSCONFIG:status.sysconfigs.version,UPGRADECONFIG:status.upgradeconfigs.version' + ``` + +- If you need to perform the configuration again, modify the corresponding fields in **upgrade_v1alpha1_os.yaml**. + +## Rollback + +- Scenarios + - When a VM fails to start, you can manually select the previous version from the GRUB boot menu. This method only supports rollback to the previous version. + - When a VM starts successfully and you can access the system, you can use the rollback tool (recommended) or manually select the previous version from the GRUB boot menu. + - You can use the rollback tool in two ways: + 1. Rollback mode: reverts to the previous version. + 2. Upgrade mode: re-upgrades to the previous version. + +- Manual rollback instructions + - Restart the VM and select the second boot option in the GRUB boot menu to roll back to the previous version. + +- Rollback tool instructions + - Rolling back to any version + 1. Modify the YAML configuration file of the OS CR instance (for example, **upgrade_v1alpha1_os.yaml**). Set the relevant fields to the image information of the desired version. The OS category originates from the CRD object created in the installation and deployment document. Refer to the upgrade instructions in the previous section for field descriptions and examples. + 2. After modifying the YAML file, execute the update command. Nodes will then roll back according to the configured field information. + + ```shell + kubectl apply -f upgrade_v1alpha1_os.yaml + ``` + + - Rolling back to the previous version + - To roll back to the previous OS version, modify the **upgrade_v1alpha1_os.yaml** file. Set `osversion` to the previous version and `opstype` to `rollback` to roll back to the previous version (that is, switch to the previous partition). Example YAML file: + + ```yaml + apiVersion: upgrade.openeuler.org/v1alpha1 + kind: OS + metadata: + name: os-sample + spec: + imagetype: "" + opstype: rollback + osversion: KubeOS previous version + maxunavailable: 2 + containerimage: "" + evictpodforce: true/false + imageurl: "" + checksum: "" + flagSafe: false + mtls: true + ``` + + - To roll back to the previous configuration version (note that already configured parameters cannot be rolled back), modify the `upgrade_v1alpha1_os.yaml` file. Set `version` of `sysconfigs/upgradeconfigs` to the previous version. Example YAML file: + + ```yaml + apiVersion: upgrade.openeuler.org/v1alpha1 + kind: OS + metadata: + name: os-sample + spec: + imagetype: "" + opstype: config + osversion: edit.os.version + maxunavailable: edit.node.config.number + containerimage: "" + evictpodforce: true/false + imageurl: "" + checksum: "" + flagSafe: false + mtls: false + sysconfigs: + version: previous config version + configs: + - model: kernel.sysctl + contents: + - key: kernel param key1 + value: kernel param value1 + - key: kernel param key2 + value: kernel param value2 + - model: kernel.sysctl.persist + configpath: persist file path + contents: + - key: kernel param key3 + value: kernel param value3 + ``` + + - After modifying the YAML file and executing the update command, the nodes will roll back based on the configured information. + + ```shell + kubectl apply -f upgrade_v1alpha1_os.yaml + ``` + + - Verify that the rollback was successful. + - To check the container OS version (for OS version rollback), verify container OS version of the node. To check the configuration version (for configuration rollback), verify the node configuration version and that the node status is `idle`. + + ```shell + kubectl get osinstances -o custom-columns='NAME:.metadata.name,NODESTATUS:.spec.nodestatus,SYSCONFIG:status.sysconfigs.version,UPGRADECONFIG:status.upgradeconfigs.version' + ``` + +## Appendixes + +### Settings List + +#### kernel Settings + +- `kernel.sysctl`: temporarily sets kernel parameters. These settings will be lost after a reboot. The key/value pairs represent key/value pairs of kernel parameters. Both keys and values cannot be empty. Keys cannot contain the `=` character. The value of `operation` cannot be `delete`. Example: + + ```yaml + configs: + - model: kernel.sysctl + contents: + - key: user.max_user_namespaces + value: 16384 + - key: net.ipv4.tcp_tw_recycle + value: 0 + operation: delete + ``` + +- `kernel.sysctl.persist`: sets persistent kernel parameters that will be retained after a reboot. The key/value pairs represent key/value pairs of kernel parameters. Both keys and values cannot be empty. Keys cannot contain the `=` character. `configpath` specifies the path to the configuration file, which can be a new file (given that the parent directory exists). If not specified, it defaults to **/etc/sysctl.conf**. Example: + + ```yaml + configs: + - model: kernel.sysctl.persist + configpath : /etc/persist.conf + contents: + - key: user.max_user_namespaces + value: 16384 + - key: net.ipv4.tcp_tw_recycle + value: 0 + operation: delete + ``` + +#### GRUB Settings + +- `grub.cmdline.current/next`: sets the kernel boot parameters in the **grub.cfg** file. These parameters appear on the line resembling the following example in **grub.cfg**: + + ```text + linux /boot/vmlinuz root=/dev/sda2 ro rootfstype=ext4 nomodeset quiet oops=panic softlockup_panic=1 nmi_watchdog=1 rd.shell=0 selinux=0 crashkernel=256M panic=3 + ``` + + - The `grub.cmdline.current/next` settings allow configuration for either the current or the next partition: + + - `grub.cmdline.current`: Configures the boot parameters for the current partition. + - `grub.cmdline.next`: Configures the boot parameters for the next partition. + + - Note: During upgrades/rollbacks, the `current` and `next` partition designations in the configuration (`sysconfigs`) are determined at the time the upgrade/rollback operation is initiated. For instance, if the current partition is `A` and an upgrade is initiated with `grub.cmdline.current` configured in `sysconfigs`, the configuration will still be applied to partition `A` after the reboot, even though it may no longer be the `current` partition. + + - `grub.cmdline.current/next` supports both `key=value` (where `value` cannot be empty) and single `key` formats. If `value` contains an equal sign (for example, `root=UUID=some-uuid`), `key` should be set to all characters before the first `=` and `value` should be set to all characters after the first `=`. Example: + + ```yaml + configs: + - model: grub.cmdline.current + contents: + - key: selinux + value: "0" + - key: root + value: UUID=e4f1b0a0-590e-4c5f-9d8a-3a2c7b8e2d94 + - key: panic + value: "3" + operation: delete + - key: crash_kexec_post_notifiers + - model: grub.cmdline.next + contents: + - key: selinux + value: "0" + - key: root + value: UUID=e4f1b0a0-590e-4c5f-9d8a-3a2c7b8e2d94 + - key: panic + value: "3" + operation: delete + - key: crash_kexec_post_notifiers + ``` diff --git a/docs/en/docs/nestos/nestos/_toc.yaml b/docs/en/docs/nestos/nestos/_toc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b858ac603684d406f8be44e5dbae6db4c2100653 --- /dev/null +++ b/docs/en/docs/nestos/nestos/_toc.yaml @@ -0,0 +1,10 @@ +label: NestOS User Guide +isManual: true +description: NestOS is a lightweight OS optimized for containerized environments. It employs dual-partition atomic updates to maintain security and reliability. +sections: + - label: Overview + href: ./overview.md + - label: NestOS for Container User Guide + href: ./nestos_for_container.md + - label: Feature Description + href: ./feature_description.md diff --git a/docs/en/docs/nestos/nestos/feature_description.md b/docs/en/docs/nestos/nestos/feature_description.md new file mode 100644 index 0000000000000000000000000000000000000000..8e0861098d4e0abd2cfc293dc1e90cb8f5809cdb --- /dev/null +++ b/docs/en/docs/nestos/nestos/feature_description.md @@ -0,0 +1,100 @@ +# Feature Description + +## Container Technology + +NestOS provides computing resources for applications using a containerized computing environment. Applications share a system kernel and resources, but are invisible to each other. This means that applications are no longer directly installed in the OS. Instead, they run in containers through container engines (for example, Docker, PadMan, and iSula). This greatly reduces the coupling among the OS, applications, and running environment. Compared with the traditional application deployment mode, the NestOS cluster provides more flexible and convenient application deployment, less interference between application running environments , and the easier maintenance of OSs. + +## rpm-ostree + +### System Upgrade + +rpm-ostree is a hybrid image/package system that combines RPM and OSTree. It provides RPM-based software package installation and management, and OSTree-based OS update and upgrade. rpm-ostree sees the two operations as updates to the OS. Each update to the system is similar to a transaction submitted by rpm-ostree. This ensures that the update completely succeeds or fails completely and allows the system to be rolled back to the status before the update. + +When updating the OS, rpm-ostree keeps two bootable deployments: the active deployment and standby deployment. The update is performed to the standby deployment and takes effect only after the OS is restarted and the active and standby deployments are switched over. If an error occurs during software installation or upgrade, the rpm-ostree rollback allows NestOS to revert to the previous deployment. The **/ostree/** and **/boot/** directories of NestOS are the OSTree repository environment and show which OSTree deployment is booted into. + +### Read-Only File System + +In the rpm-ostree file system layout, only the **/etc** and **/var** directories are writable. Any data in the **/var** directory is not touched and is shared across upgrades. During the system upgrade, rpm-ostree takes the new default **/etc** and adds the changes on the top. This means that the upgrades will receive new default files in **/etc**, which is a critical feature. + +In a common OS, some files in the **/var** directory adopt the processing policy "/var to tmpfiles.d". That is, the system reads the configuration files in the **/usr/lib/tmpfiles.d/** directory through systemd-tmpfile-setup.service and creates folders and blank files in the **/var** directory. All configuration files in the **/usr/lib/tmpfiles.d/** directory are provided by related RPM packages. In NestOS, the **/var** directory does not involve the commit layer of rpm-ostree. Each commit layer of rpm-ostree shares a **/var** directory. However, files in the **/var** directory conflict with the ostree transaction update model. As a result, when the software package is installed, rpm-ostree deletes those files. The content in the **/var** directory is generated based on "/var to tmpfiles.d". Therefore, in the **/usr/lib/tmpfiles.d/** directory of the NestOS, besides the configuration files provided by some RPM packages, there is the **pkg-XXX.conf** file generated by rpm-ostree during the installation of the XXX software package (even if the XXX has provided a configuration file). **pkg-XXX.conf** records the folders in the **/var** directory provided by the XXX software package but not the files, as they are deleted when rpm-ostree installs the package. When you need to perform operations on a folder in the **/var** directory provided by the RPM package, for example, deleting a folder, you can only delete the folder temporarily by running the `rm` command. After the system is restarted, the folder still exists. You need to modify the **pkg-XXX.conf** file to permanently delete the folder. + +OSTree is designed to parallel-install multiple versions of multiple independent operating systems. OSTree relies on a new top-level **ostree** directory; it can in fact parallel install inside an existing OS or distribution occupying the physical **/root**. On each client machine, there is an OSTree repository stored in **/ostree/repo**, and a set of deployments stored in **/ostree/deploy/$STATEROOT/$CHECKSUM**. Each deployment is primarily composed of a set of hard links into the repository. This means each version is deduplicated; an upgrade process only costs disk space proportional to the new files, plus some constant overhead. + +The model OSTree emphasizes is that the OS read-only content is kept in **/usr**; it comes with code to create a Linux read-only bind mount to prevent inadvertent corruption. There is exactly one **/var** writable directory shared between each deployment for a given OS. The OSTree core code does not touch content in this directory; it is up to the code in each operating system for how to manage and upgrade state. + +### OS Extensions + +NestOS keeps the base image as simple and small as possible for security and maintainability reasons. However, in some cases it is necessary to add software to the base OS itself. For example, drivers or VPN software are potential candidates because they are harder to containerize. These software packages extend the functionality of the base OS rather than providing runtimes for user applications. For this reason, rpm-ostree treats these packages as extensions. That said, there are no restrictions on which packages you can actually install. By default, packages are downloaded from the openEuler repositories. + +To layer a software package, you need to write a systemd unit that executes the `rpm-ostree` command to install the wanted package. The changes are added to a new deployment, which takes effect after restart. + +## nestos-installer + +nestos-installer helps with NestOS installation. It provides the following functions: + +(1) Installing the OS to a target disk, optionally customizing it with an Ignition configuration or first-boot kernel parameters (`nestos-installer install`) + +(2) Downloading and verify an OS image for various cloud, virtualization, or bare metal platforms (`nestos-installer download`) + +(3) Listing NestOS images available for download (`nestos-installer list-stream`) + +(4) Embed an Ignition configuration in a live ISO image to customize the running system that boots from it (`nestos-installer iso ignition`) + +(5) Wrap an Ignition configuration in an initrd image that can be appended to the live PXE initramfs to customize the running system that boots from it (`nestos-installer pxe ignition`) + +## Zincati + +Zincati is an auto-update agent for NestOS hosts. It works as a client for the Cincinnati service, responsible for monitoring NestOS version changes and automatically updating machines using rpm-ostree. Zincati has the following features: + +(1) Agent for continuous automatic updates, with support for phased rollouts + +(2) Runtime customization via TOML dropins, allowing users to overwrite the default configuration. + +(3) Multiple update strategies + +(4) Local maintenance windows on a weekly schedule for planned upgrades + +(5) Tracks and exposes Zincati internal metrics to Prometheus to ease monitoring tasks across a large fleet of nodes + +(6) Logging with configurable priority levels + +(7) Support for complex update-graphs via Cincinnati protocol + +(8) Support for cluster-wide reboot orchestration, via an external lock-manager + +## System Initialization (Ignition) + +Ignition is a distribution-agnostic provisioning utility that not only installs, but also reads configuration files (in JSON format) to initialize NestOS. Configurable components include storage and file systems, systemd units, and users. + +Ignition runs only once during the first boot of the system (while in the initramfs). Because Ignition runs so early in the boot process, it can re-partition disks, format file systems, create users, and write files before the userspace begins to boot. As a result, systemd services are already written to disk when systemd starts, speeding the time to boot. + +(1) Ignition runs only on the first boot +Ignition is designed to be used as a provisioning tool, not as a configuration management tool. Ignition encourages immutable infrastructure, in which machine modification requires that users discard the old node and re-provision the machine. + +(2) Ignition produces the machine specified or no machine at all +Ignition does what it needs to make the system match the state described in the Ignition configuration. If for any reason Ignition cannot deliver the exact machine that the configuration asked for, Ignition prevents the machine from booting successfully. For example, if the user wanted to fetch the document hosted at **** and write it to disk, Ignition would prevent the machine from booting if it were unable to resolve the given URL. + +(3) Ignition configurations are declarative +Ignition configurations describe the state of a system. Ignition configurations do not list a series of steps that Ignition should take. +Ignition configurations do not allow users to provide arbitrary logic (including scripts for Ignition to run). Users describe which file systems must exist, which files must be created, which users must exist, and more. Any further customization must use systemd services, created by Ignition. + +(4) Ignition configurations should not be written by hand +Ignition configurations were designed to be human readable, but difficult to write, to discourage users from attempting to write configs by hand. Use Butane, or a similar tool, to generate Ignition configurations. + +## Afterburn + +Afterburn is a one-shot agent for cloud-like platforms which interacts with provider-specific metadata endpoints. It is typically used in conjunction with Ignition. + +Afterburn comprises several modules which may run at different times during the lifecycle of an instance. Depending on the specific platform, the following services may run in the initramfs on first boot: + +- setting local hostname + +- injecting network command-line arguments + +The following features are conditionally available on some platforms as systemd service units: + +- installing public SSH keys for local system users + +- retrieving attributes from instance metadata + +- checking in to the provider in order to report a successful boot or instance provisioning diff --git a/docs/en/docs/nestos/nestos/figures/figure1.png b/docs/en/docs/nestos/nestos/figures/figure1.png new file mode 100644 index 0000000000000000000000000000000000000000..b4eb9017ed202e854c076802492d8561942dfc88 Binary files /dev/null and b/docs/en/docs/nestos/nestos/figures/figure1.png differ diff --git a/docs/en/docs/nestos/nestos/figures/figure2.png b/docs/en/docs/nestos/nestos/figures/figure2.png new file mode 100644 index 0000000000000000000000000000000000000000..90049769c04e2bd494533da1613e38a5199da3d7 Binary files /dev/null and b/docs/en/docs/nestos/nestos/figures/figure2.png differ diff --git a/docs/en/docs/nestos/nestos/nestos_for_container.md b/docs/en/docs/nestos/nestos/nestos_for_container.md new file mode 100644 index 0000000000000000000000000000000000000000..3be61b81c1b29933d96a4df8e7d0ba292018e05c --- /dev/null +++ b/docs/en/docs/nestos/nestos/nestos_for_container.md @@ -0,0 +1,985 @@ +# NestOS for Container User Guide + +## 1. Introduction to NestOS + +### 1.1 Overview + +NestOS, developed by KylinSoft and incubated in the openEuler community, is a cloud-native OS designed for modern infrastructure. It incorporates advanced technologies like rpm-ostree support and Ignition configuration, featuring a dual-root file system with mutual backup and atomic update capabilities. The system also includes the nestos-assembler tool for streamlined integration and building. Optimized for Kubernetes and OpenStack platforms, NestOS minimizes container runtime overhead, enabling efficient cluster formation and secure operation of large-scale containerized workloads. + +This guide provides a comprehensive walkthrough of NestOS, covering its building, installation, deployment, and usage. It aims to help users maximize the system benefits for rapid and efficient configuration and deployment. + +### 1.2 Application Scenarios and Advantages + +NestOS serves as an ideal foundation for cloud environments centered around containerized applications. It resolves challenges such as fragmented operation and maintenance (O&M) practices and redundant platform development, which arise from the decoupling of container and orchestration technologies from the underlying infrastructure. By ensuring alignment between application services and the base OS, NestOS delivers consistent and streamlined O&M. + +![figure1](./figures/figure1.png) + +## 2. Environment Preparation + +### 2.1 Build Environment Requirements + +#### 2.1.1 Requirements for Building the nestos-assembler Tool + +- Use openEuler for optimal results. +- Ensure at least 5 GB of available drive space. + +#### 2.1.2 Requirements for Building NestOS + +| Category | Requirements | +| :----------: | :---------------------: | +| CPU | 4 vCPUs | +| Memory | 4 GB | +| Drive | Available space > 10 GB | +| Architecture | x86_64 or AArch64 | +| Others | Support for KVM | + +### 2.2 Deployment Configuration Requirements + +| Category | Recommended Configuration | Minimum Configuration | +| :----------: | :-----------------------: | :-------------------: | +| CPU | > 4 vCPU | 1 vCPU | +| Memory | > 4GB | 512 MB | +| Drive | > 20GB | 10 GB | +| Architecture | x86_64, aarch64 | / | + +## 3. Quick Start + +### 3.1 Quick Build + +(1) Obtain the nestos-assembler container image. + +You are advised the openEuler-based base image. For additional details, see [Section 6.1](#61-nestos-assembler-container-image-creation). + +```shell +docker pull hub.oepkgs.net/nestos/nestos-assembler:24.03-LTS.20240903.0-aarch64 +``` + +(2) Create a script named `nosa` and save it to `/usr/local/bin`, then make it executable. + +```shell +#!/bin/bash + +sudo docker run --rm -it --security-opt label=disable --privileged --user=root \ + -v ${PWD}:/srv/ --device /dev/kvm --device /dev/fuse --network=host \ + --tmpfs /tmp -v /var/tmp:/var/tmp -v /root/.ssh/:/root/.ssh/ -v /etc/pki/ca-trust/:/etc/pki/ca-trust/ \ + ${COREOS_ASSEMBLER_CONFIG_GIT:+-v $COREOS_ASSEMBLER_CONFIG_GIT:/srv/src/config/:ro} \ + ${COREOS_ASSEMBLER_GIT:+-v $COREOS_ASSEMBLER_GIT/src/:/usr/lib/coreos-assembler/:ro} \ + ${COREOS_ASSEMBLER_CONTAINER_RUNTIME_ARGS} \ + ${COREOS_ASSEMBLER_CONTAINER:-nestos-assembler:your_tag} "$@" +``` + +Note: Replace the value of `COREOS_ASSEMBLER_CONTAINER` with the actual nestos-assembler container image in your environment. + +(3) Obtain nestos-config. + +Use `nosa init` to initialize the build workspace, pull the build configuration, and create the `nestos-build` directory. Run the following command in this directory: + +```shell +nosa init https://gitee.com/openeuler/nestos-config +``` + +(4) Adjust build configurations. + +nestos-config provides default build configurations, so no additional steps are required. For customization, refer to [Section 5](#5-build-configuration-nestos-config). + +(5) Build NestOS images. + +```shell +# Pull build configurations and update cache. +nosa fetch +# Generate root file system, qcow2, and OCI images. +nosa build +# Generate live ISO and PXE images. +nosa buildextend-metal +nosa buildextend-metal4k +nosa buildextend-live +``` + +For detailed build and deployment steps, refer to [Section 6](#6-build-process). + +### 3.2 Quick Deployment + +Using the NestOS ISO image as an example, boot into the live environment and execute the following command to complete the installation by following the wizard: + +```shell +sudo installnestos +``` + +For alternative deployment methods, see [Section 8](#8-deployment-process). + +## 4. Default Configuration + +| Item | Default Configuration | +| :-------------------------: | :----------------------------------------------: | +| Docker service | Disabled by default, requires manual activation. | +| SSH service security policy | Supports only key-based login by default. | + +## 5. Build Configuration: nestos-config + +### 5.1 Obtaining Configuration + +The repository for nestos-config is located at + +### 5.2 Directory Structure Explanation + +| Directory/File | Description | +| :---------------: | :------------------------------------: | +| live/* | Boot configuration for live ISO builds | +| overlay.d/* | Custom file configurations | +| tests/* | User-defined test case configurations | +| *.repo | Repository configurations | +| .yaml, manifests/ | Main build configurations | + +### 5.3 Key Files + +#### 5.3.1 .repo Files + +.repo files in the directory are used to configure software repositories for building NestOS. + +#### 5.3.2 YAML Configuration Files + +YAML files in the directory provide various configurations for NestOS builds. For details, refer to [Section 5.4](#54-key-fields). + +### 5.4 Key Fields + +| Field | Purpose | +| :------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------- | +| packages-aarch64, packages-x86_64, packages | Scope of software package integration | +| exclude-packages | Blocklist for software package integration | +| remove-from-packages | Files/folders to remove from specified packages | +| remove-files | Files/folders to remove | +| extra-kargs | Additional kernel boot parameters | +| initramfs-args | Initramfs build parameters | +| postprocess | Post-build scripts for the file system | +| default-target | Default target, such as **multi-user.target** | +| rojig.name, releasever | Image-related information (name and version) | +| lockfile-repos | List of repository names available for builds, which must match the repository names in the repo files described in [Section 5.3.1](#531-repo-files) | + +### 5.5 Configurable Items + +#### 5.5.1 Repository Configuration + +(1) Edit the .repo file in the configuration directory and modify its content to the desired software repositories. + +```shell +$ vim nestos-pool.repo +[repo_name_1] +Name=xxx +baseurl = https://ip.address/1 +enabled = 1 + +[repo_name_2] +Name=xxx +baseurl = https://ip.address/2 +enabled = 1 +``` + +(2) Modify the `lockfile-repos` field in the YAML configuration file to include the corresponding repository names. + +Note: The repository name is the content inside `[]` in the repo file, not the `name` field. + +```shell +$ vim manifests/rpmlist.yaml +Modify the `lockfile-repos` field as follows: +lockfile-repos: +- repo_name_1 +- repo_name_2 +``` + +#### 5.5.2 Software Package Customization + +Modify the `packages`, `packages-aarch64`, and `packages-x86_64` fields to add or remove software packages. + +For example, adding `nano` to the `packages` field ensures that the system includes `nano` after installation. + +```shell +$ vim manifests/rpmlist.yaml +packages: +- bootupd +... +- authselect +- nano +... +packages-aarch64: +- grub2-efi-aa64 +packages-x86_64: +- microcode_ctl +- grub2-efi-x64 +``` + +#### 5.5.3 Image Name and Version Customization + +Modify the `releasever` and `rojig.name` fields in the YAML file to control the image version and name. + +```shell +$ vim manifest.yaml + +releasever: "1.0" +rojig: + license: MIT + name: nestos + summary: NestOS stable +``` + +With the above configuration, the built image format will be **nestos-1.0.$(date "+%Y%m%d").$build_num.$type**, where **build_num** is the build count and **type** is the type suffix. + +#### 5.5.4 Image Release Information Customization + +Normally, release information is provided by the integrated release package (e.g., `openeuler-release`). However, you can rewrite the **/etc/os-release** file by adding a **postprocess** script. + +```shell +$ vim manifests/system-configuration.yaml +# Add the following content to postprocess. If the content already exists, simply modify the corresponding release information. +postprocess: + - | + #!/usr/bin/env bash + set -xeuo pipefail + export OSTREE_VERSION="$(tail -1 /etc/os-release)" + date_now=$(date "+%Y%m%d") + echo -e 'NAME="openEuler NestOS"\nVERSION="24.03-LTS"\nID="openeuler"\nVERSION_ID="24.03-LTS"\nPRETTY_NAME="NestOS"\nANSI_COLOR="0;31"\nBUILDID="'${date_now}'"\nVARIANT="NestOS"\nVARIANT_ID="nestos"\n' > /usr/lib/os-release + echo -e $OSTREE_VERSION >> /usr/lib/os-release + cp -f /usr/lib/os-release /etc/os-release +``` + +#### 5.5.5 Custom File Creation + +Add or modify custom files in the **overlay.d** directory. This allows for customization of the image content. + +```shell +mkdir -p overlay.d/15nestos/etc/test/test.txt +echo "This is a test message !" > overlay.d/15nestos/etc/test/test.txt +``` + +Using the above configuration to build the image. After image boot, the content of the corresponding file in the system will match the custom content added above. + +```shell +[root@nosa-devsh ~]# cat /etc/test/test.txt +This is a test message ! +``` + +## 6. Build Process + +NestOS employs a containerized method to bundle the build toolchain into a comprehensive container image called nestos-assembler. + +NestOS enables users to create the nestos-assembler container image, simplifying the process of building various NestOS image formats in any Linux distribution environment, such as within existing CI/CD pipelines. Additionally, users can manage, debug, and automate testing of build artifacts using this image. + +### 6.1 nestos-assembler Container Image Creation + +#### 6.1.1 Prerequisites + +1. Prepare the base container image. + + The nestos-assembler container image must be based on a base image that supports the Yum or DNF package manager. Although it can be created from any distribution base image, using an openEuler base image is recommended to reduce software compatibility issues. + +2. Install required software packages. + + Install Docker, the essential dependency: + + ```shell + dnf install -y docker + ``` + +3. Clone the nestos-assembler source code repository. + +```shell +git clone --depth=1 --single-branch https://gitee.com/openeuler/nestos-assembler.git +``` + +#### 6.1.2 Building the nestos-assembler Container Image + +Using the openEuler container image as the base, build the image with the following command: + +```shell +cd nestos-assembler/ +docker build -f Dockerfile . -t nestos-assembler:your_tag +``` + +### 6.2 nestos-assembler Container Image Usage + +#### 6.2.1 Prerequisites + +1. Prepare the nestos-assembler container image. + + Once the nestos-assembler container image is built following [Section 6.1](#61-nestos-assembler-container-image-creation), it can be managed and distributed via a privately hosted container image registry. Ensure the correct version of the nestos-assembler container image is pulled before initiating the NestOS build. + +2. Create the nosa script. + + To streamline user operations, you can write a `nosa` command script. This is particularly useful as the NestOS build process involves multiple calls to the nestos-assembler container image for executing various commands and configuring numerous parameters. For quick build details, see [Section 3.1](#31-quick-build). + +#### 6.2.2 Usage Instructions + +nestos-assembler commands + +| Command | Description | +| :-------------------: | :-------------------------------------------------------------------------------------: | +| init | Initialize the build environment and configuration. See [Section 6.3](#63-build-environment-preparation) for details. | +| fetch | Fetch the latest software packages to the local cache based on the build configuration. | +| build | Build the ostree commit, which is the core command for building NestOS. | +| run | Directly start a QEMU instance, using the latest build version by default. | +| prune | Clean up historical build versions, retaining the latest three versions by default. | +| clean | Delete all build artifacts. Use the `--all` parameter to also clean the local cache. | +| list | List the versions and artifacts present in the current build environment. | +| build-fast | Quickly build a new version based on the previous build record. | +| push-container | Push the container image artifact to the container image registry. | +| buildextend-live | Build ISO artifacts and PXE images that support the live environment. | +| buildextend-metal | Build raw artifacts for bare metal. | +| buildextend-metal4k | Build raw artifacts for bare metal in native 4K mode. | +| buildextend-openstack | Build QCOW2 artifacts for the OpenStack platform. | +| buildextend-qemu | Build QCOW2 artifacts for QEMU. | +| basearch | Retrieve the current architecture information. | +| compress | Compress artifacts. | +| kola | Automated testing framework | +| kola-run | A wrapper for automated testing that outputs summarized results | +| runc | Mount the current build root file system in a container. | +| tag | Manage build project tags. | +| virt-install | Create an instance for the specified build version. | +| meta | Manage build project metadata. | +| shell | Enter the nestos-assembler container image. | + +### 6.3 Build Environment Preparation + +The NestOS build environment requires a dedicated empty folder as the working directory, supporting multiple builds while preserving and managing historical versions. Before setting up the build environment, ensure the build configuration is prepared (see [Section 5](#5-build-configuration-nestos-config)). + +You are advised to maintain a separate build configuration for each independent build environment. If you plan to build NestOS for various purposes, maintain multiple build configurations and their corresponding directories. This approach allows independent evolution of configurations and clearer version management. + +#### 6.3.1 Initializing the Build Environment + +Navigate to the target working directory and run the following command to initialize the build environment: + +```shell +nosa init https://gitee.com/openeuler/nestos-config +``` + +Initialization is only required for the first build. Subsequent builds can reuse the same environment unless significant changes are made to the build configuration. + +#### 6.3.2 Build Environment Structure + +After initialization, the following folders are created in the working directory: + +**builds**: stores build artifacts and metadata. The **latest** subdirectory is a symbolic link to the most recent build version. + +**cache**: contains cached data pulled from software sources and package lists specified in the build configuration. Historical NestOS ostree repositories are also stored here. + +**overrides**: used to place files or RPM packages that should be added to the rootfs of the final artifact during the build process. + +**src**: holds the build configuration, including nestos-config-related content. + +**tmp**: used during builds and automated testing. In case of errors, you can inspect VM CLI outputs, journal logs, and other debugging information here. + +### 6.4 Build Steps + +The primary steps and reference commands for building NestOS are outlined below. + +![figure2](./figures/figure2.png) + +#### 6.4.1 Initial Build + +For the initial build, the build environment must be initialized. Refer to [Section 6.3](#63-build-environment-preparation) for detailed instructions. + +For subsequent builds, the existing build environment can be reused. Use `nosa list` to check the current versions and corresponding artifacts in the build environment. + +#### 6.4.2 Updating Build Configuration and Cache + +After initializing the build environment, run the following command to update the build configuration and cache: + +```shell +nosa fetch +``` + +This step validates the build configuration and pulls software packages from the configured sources to the local cache. When the build configuration changes or you want to update to the latest software versions, repeat this step. Otherwise, the build may fail or produce unexpected results. + +If significant changes are made to the build configuration and you want to clear the local cache and re-fetch, use: + +```shell +nosa clean --all +``` + +#### 6.4.3 Building the Immutable Root File system + +The core of NestOS, an immutable OS, is its immutable root file system based on ostree technology. Run the following command to build the ostree file system: + +```shell +nosa build +``` + +By default, the `build` command generates the ostree file system and an OCI archive. You can also include `qemu`, `metal`, or `metal4k` to simultaneously build the corresponding artifacts, equivalent to running `buildextend-qemu`, `buildextend-metal`, and `buildextend-metal4k` afterward. + +```shell +nosa build qemu metal metal4k +``` + +To add custom files or RPM packages during the NestOS build, place them in the **rootfs/** or **rpm/** folders under the **overrides** directory before running the `build` command. + +#### 6.4.4 Building Various Artifacts + +After running the `build` command, you can use `buildextend` commands to build different types of artifacts. Details are as follows. + +- Building QCOW2 images: + +```shell +nosa buildextend-qemu +``` + +- Building ISO images with a live environment or PXE boot components: + +```shell +nosa buildextend-metal +nosa buildextend-metal4k +nosa buildextend-live +``` + +- Building QCOW2 images for the OpenStack environment: + +```shell +nosa buildextend-openstack +``` + +- Building container images for container-based updates: + +When the `nosa build` command is executed, an OCI archive format image is also generated. This image can be pushed to a local or remote image registry directly. + +```shell +nosa push-container [container-image-name] +``` + +The remote image registry address must be appended to the container image name, and no `:` should appear except in the tag. If no `:` is detected, the command generates a tag in the format `{latest_build}-{arch}`. Example: + +```shell +nosa push-container registry.example.com/nestos:1.0.20240903.0-x86_64 +``` + +This command supports the following options: + +`--authfile`: specifies the authentication file for logging into the remote image registry. + +`--insecure`: bypasses SSL/TLS verification for self-signed certificates. + +`--transport`: specifies the target image push protocol. The default is `docker`. Supported options: + +- `containers-storage`: pushes to the local storage directory of container engines like Podman and CRIO. +- `dir`: pushes to a specified local directory. +- `docker`: pushes to a private or remote container image registry using the Docker API. +- `docker-archive`: exports an archive file for use with `docker load`. +- `docker-daemon`: pushes to the local storage directory of the Docker container engine. + +### 6.5 Artifacts Acquisition + +Once the build process is complete, the artifacts are stored in the following directory within the build environment: + +```text +builds/{version}/{arch}/ +``` + +For convenience, if you are only interested in the latest build version or are using CI/CD, a **latest** directory symbol link points to the most recent version directory: + +```text +builds/latest/{arch}/ +``` + +To reduce the size of the artifacts for easier transfer, you can compress them using the following command: + +```shell +nosa compress +``` + +Note that compression removes the original files, which may disable some debugging commands. To restore the original files, use the decompression command: + +```shell +nosa uncompress +``` + +### 6.6 Build Environment Maintenance + +Before or after setting up the NestOS environment, you may need to address specific requirements. The following commands are recommended for resolving these issues. + +#### 6.6.1 Cleaning Up Historical or Invalid Build Versions to Free Drive Space + +To clean up historical build versions, run: + +```shell +nosa prune +``` + +To delete all artifacts in the current build environment, run: + +```shell +nosa clean +``` + +If the build configuration has changed software repositories or historical caches are no longer needed, you can completely clear the current build environment cache: + +```shell +nosa clean --all +``` + +#### 6.6.2 Temporarily Running a Build Version Instance for Debugging or Verification + +```shell +nosa run +``` + +Use `--qemu-image` or `--qemu-iso` to specify the boot image address. For additional parameters, refer to `nosa run --help`. + +Once the instance starts, the build environment directory is mounted to **/var/mnt/workdir**, allowing access to the build environment. + +#### 6.6.3 Running Automated Tests + +```shell +nosa kola run +``` + +This command runs predefined test cases. You can also append a specific test case name to execute it individually. + +```shell +nosa kola testiso +``` + +This command performs installation and deployment tests for ISO or PXE live environments, acting as a smoke test for the build process. + +#### 6.6.4 Debugging and Verifying netsos-assembler + +```shell +nosa shell +``` + +This command launches a shell environment within the build toolchain container, enabling you to verify the functionality of the build toolchain environment. + +## 7. Deployment Configuration + +### 7.1 Introduction + +Before you deploy NestOS, it is essential to understand and prepare the necessary configurations. NestOS offers flexible configuration options through Ignition files, which can be managed using Butane. This simplifies automated deployment and environment setup for users. + +This section provides a detailed overview of Butane functionality and usage, along with configuration examples for various scenarios. These configurations will help you quickly set up and run NestOS, ensuring system security and reliability while meeting application needs. Additionally, we will explore how to customize images by pre-integrating Ignition files, enabling efficient configuration and deployment for specific use cases. + +### 7.2 Introduction to Butane + +Butane is a tool that converts human-readable YAML configuration files into NestOS Ignition files. It simplifies the process of writing complex configurations by allowing users to create configuration files in a more readable format, which are then converted into JSON format suitable for NestOS. + +NestOS has adapted Butane by adding support for the `nestos` variant and configuration specification version `v1.0.0`, corresponding to the Ignition configuration specification `v3.3.0`. This ensures configuration stability and compatibility. + +### 7.3 Butane Usage + +To install the Butane package, use the following command: + +```shell +dnf install butane +``` + +Edit **example.yaml** and execute the following command to convert it into an Ignition file **example.ign**. The process of writing YAML files will be explained in detail later: + +```shell +butane example.yaml -o example.ign -p +``` + +### 7.4 Supported Functional Scenarios + +The following configuration examples (**example.yaml**) briefly describe the main functional scenarios and advanced usage methods supported by NestOS. + +#### 7.4.1 Configuring Users, Groups, Passwords, and SSH Keys + +```YAML +variant: nestos +version: 1.0.0 +passwd: + users: + - name: nest + ssh_authorized_keys: + - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDHn2eh... + - name: jlebon + groups: + - wheel + ssh_authorized_keys: + - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDC5QFS... + - ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIveEaMRW... + - name: miabbott + groups: + - docker + - wheel + password_hash: $y$j9T$aUmgEDoFIDPhGxEe2FUjc/$C5A... + ssh_authorized_keys: + - ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDTey7R... +``` + +#### 7.4.2 File Operations: Configuring Network Interfaces + +```YAML +variant: nestos +version: 1.0.0 +storage: + files: + - path: /etc/NetworkManager/system-connections/ens2.nmconnection + mode: 0600 + contents: + inline: | + [connection] + id=ens2 + type=ethernet + interface-name=ens2 + [ipv4] + address1=10.10.10.10/24,10.10.10.1 + dns=8.8.8.8; + dns-search= + may-fail=false + method=manual +``` + +#### 7.4.3 Creating Directories, Files, and Symbolic Links with Permissions + +```YAML +variant: nestos +version: 1.0.0 +storage: + directories: + - path: /opt/tools + overwrite: true + files: + - path: /var/helloworld + overwrite: true + contents: + inline: Hello, world! + mode: 0644 + user: + name: dnsmasq + group: + name: dnsmasq + - path: /opt/tools/transmogrifier + overwrite: true + contents: + source: https://mytools.example.com/path/to/archive.gz + compression: gzip + verification: + hash: sha512-00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + mode: 0555 + links: + - path: /usr/local/bin/transmogrifier + overwrite: true + target: /opt/tools/transmogrifier + hard: false +``` + +#### 7.4.4 Writing systemd Services: Starting and Stopping Containers + +```YAML +variant: nestos +version: 1.0.0 +systemd: + units: + - name: hello.service + enabled: true + contents: | + [Unit] + Description=MyApp + After=network-online.target + Wants=network-online.target + + [Service] + TimeoutStartSec=0 + ExecStartPre=-/bin/podman kill busybox1 + ExecStartPre=-/bin/podman rm busybox1 + ExecStartPre=/bin/podman pull busybox + ExecStart=/bin/podman run --name busybox1 busybox /bin/sh -c ""trap 'exit 0' INT TERM; while true; do echo Hello World; sleep 1; done"" + + [Install] + WantedBy=multi-user.target +``` + +### 7.5 Pre-Integration of Ignition Files + +The NestOS build toolchain enables users to customize images based on specific use cases and requirements. After creating the image, nestos-installer offers various features for customizing image deployment and application, such as pre-integrating Ignition files, pre-allocating installation locations, and modifying kernel parameters. Below, we introduce the main functionalities. + +#### 7.5.1 Pre-Integration of Ignition Files into ISO Images + +Prepare the NestOS ISO image locally and install the nestos-installer package. Edit **example.yaml** and use the Butane tool to convert it into an Ignition file. In this example, we configure a simple username and password (the password must be encrypted; the example uses `qwer1234`), as shown below: + +```YAML +variant: nestos +version: 1.0.0 +passwd: + users: + - name: root + password_hash: "$1$root$CPjzNGH.NqmQ7rh26EeXv1" +``` + +After converting the YAML file into an Ignition file, execute the following command to embed the Ignition file and specify the target drive location. Replace `xxx.iso` with the local NestOS ISO image: + +```shell +nestos-installer iso customize --dest-device /dev/sda --dest-ignition example.ign xxx.iso +``` + +When installing using the ISO image with the embedded Ignition file , NestOS will automatically read the Ignition file and install it to the target drive. Once the progress bar reaches 100%, the system will automatically boot into the installed NestOS environment. Users can log in using the username and password configured in the Ignition file. + +#### 7.5.2 Pre-Integration of Ignition Files into PXE Images + +Prepare the NestOS PXE image locally. See [Section 6.5](#65-artifacts-acquisition) for details on obtaining the components. The remaining steps are the same as above. + +To simplify the process for users, nestos-installer also supports extracting PXE components from an ISO image. Execute the following command, replacing `xxx.iso` with the local NestOS ISO image: + +```shell +nestos-installer iso extract pxe xxx.iso +``` + +This will generate the following output files: + +```text +xxx-initrd.img +xxx-rootfs.img +xxx-vmlinuz +``` + +Execute the following command to pre-integrate the Ignition file and specify the target drive location: + +```shell +nestos-installer pxe customize --dest-device /dev/sda --dest-ignition example.ign xxx-initrd.img --output custom-initrd.img +``` + +Replace `xxx-initrd.img` with `custom-initrd.img` according to the PXE installation method for NestOS. After booting, NestOS will automatically read the Ignition file and install it to the target drive. Once the progress bar reaches 100%, the system will automatically boot into the installed NestOS environment. Users can log in using the username and password configured in the Ignition file. + +## 8. Deployment Process + +### 8.1 Introduction + +NestOS supports multiple deployment platforms and common deployment methods, currently focusing on QCOW2, ISO, and PXE. Compared to general-purpose OS deployments, the main difference lies in how to pass custom deployment configurations characterized by Ignition files. The following sections will introduce these methods in detail. + +### 8.2 Installation Using QCOW2 Images + +#### 8.2.1 Creating a QCOW2 Instance with QEMU + +Prepare the NestOS QCOW2 image and the corresponding Ignition file (see [Section 7](#7-deployment-configuration) for details). Execute the following commands in the terminal: + +```shell +IGNITION_CONFIG="/path/to/example.ign" +IMAGE="/path/to/image.qcow2" +IGNITION_DEVICE_ARG="-fw_cfg name=opt/com.coreos/config,file=${IGNITION_CONFIG}" + +qemu-img create -f qcow2 -F qcow2 -b ${IMAGE} my-nestos-vm.qcow2 +``` + +For the AArch64 environment, execute the following command: + +```shell +qemu-kvm -m 2048 -M virt -cpu host -nographic -drive if=virtio,file=my-nestos-vm.qcow2 ${IGNITION_DEVICE_ARG} -nic user,model=virtio,hostfwd=tcp::2222-:22 -bios /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw +``` + +For the x86_64 environment, execute the following command: + +```shell +qemu-kvm -m 2048 -M pc -cpu host -nographic -drive if=virtio,file=my-nestos-vm.qcow2 ${IGNITION_DEVICE_ARG} -nic user,model=virtio,hostfwd=tcp::2222-:22 +``` + +#### 8.2.2 Creating a QCOW2 Instance with virt-install + +Assuming the libvirt service is running normally and the network uses the default subnet bound to the `virbr0` bridge, you can follow these steps to create a NestOS instance. + +Prepare the NestOS QCOW2 image and the corresponding Ignition file (see [Section 7](#7-deployment-configuration) for details). Execute the following commands in the terminal: + +```shell +IGNITION_CONFIG="/path/to/example.ign" +IMAGE="/path/to/image.qcow2" +VM_NAME="nestos" +VCPUS="4" +RAM_MB="4096" +DISK_GB="10" +IGNITION_DEVICE_ARG=(--qemu-commandline="-fw_cfg name=opt/com.coreos/config,file=${IGNITION_CONFIG}") +``` + +**Note: When using virt-install, the QCOW2 image and Ignition file must be specified with absolute paths.** + +Execute the following command to create the instance: + +```shell +virt-install --connect="qemu:///system" --name="${VM_NAME}" --vcpus="${VCPUS}" --memory="${RAM_MB}" --os-variant="kylin-hostos10.0" --import --graphics=none --disk="size=${DISK_GB},backing_store=${IMAGE}" --network bridge=virbr0 "${IGNITION_DEVICE_ARG[@]} +``` + +### 8.3 Installation Using ISO Images + +Prepare the NestOS ISO image and boot it. The first boot of the NestOS ISO image will default to the Live environment, which is a volatile memory-based environment. + +#### 8.3.1 Installing the OS to the Target Drive Using the nestos-installer Wizard Script + +1. In the NestOS live environment, follow the printed instructions upon first entry. Enter the following command to automatically generate a simple Ignition file and proceed with the installation and reboot: + + ```shell + sudo installnestos + ``` + +2. Follow the terminal prompts to enter the username and password. + +3. Select the target drive installation location. Press **Enter** to use the default option **/dev/sda**. + +4. After completing the above steps, nestos-installer will begin installing NestOS to the target drive based on the provided configuration. Once the progress bar reaches 100%, the system will automatically reboot. + +5. After rebooting, the system will automatically enter NestOS. Press **Enter** at the GRUB menu or wait 5 seconds to boot the system. Log in using the previously configured username and password. The installation is now complete. + +#### 8.3.2 Manually Installing the OS to the Target Drive Using the nestos-installer Command + +1. Prepare the Ignition file **example.ign** (see [Section 7](#7-deployment-configuration) for details). + +2. Follow the printed instructions upon first entry into the NestOS live environment. Enter the following command to begin the installation: + + ```shell + sudo nestos-installer install /dev/sda --ignition-file example.ign + ``` + + If network access is available, the Ignition file can also be retrieved via a URL, for example: + + ```shell + sudo nestos-installer install /dev/sda --ignition-file http://www.example.com/example.ign + ``` + +3. After executing the above command, nestos-installer will begin installing NestOS to the target drive based on the provided configuration. Once the progress bar reaches 100%, the system will automatically reboot. + +4. After rebooting, the system will automatically enter NestOS. Press **Enter** at the GRUB menu or wait 5 seconds to boot the system. Log in using the previously configured username and password. The installation is now complete. + +### 8.4 PXE Deployment + +The PXE installation components for NestOS include the kernel, **initramfs.img**, and **rootfs.img**. These components are generated using the `nosa buildextend-live` command (see [Section 6](#6-build-process) for details). + +1. Use the PXELINUX `KERNEL` command to specify the kernel. A simple example is as follows: + + ```shell + KERNEL nestos-live-kernel-x86_64 + ``` + +2. Use the PXELINUX `APPEND` command to specify the initrd and rootfs. A simple example is as follows: + + ```shell + APPEND initrd=nestos-live-initramfs.x86_64.img,nestos-live-rootfs.x86_64.img + ``` + + **Note: If you have pre-integrated the Ignition file into the PXE components as described in [Section 7.5](#75-pre-integration-of-ignition-files), you only need to replace it here and skip the subsequent steps.** + +3. Specify the installation location. For example, to use **/dev/sda**, append the following to the `APPEND` command: + + ```ini + nestosos.inst.install_dev=/dev/sda + ``` + +4. Specify the Ignition file, which must be retrieved over the network. Append the corresponding URL to the `APPEND` command, for example: + + ```ini + nestos.inst.ignition_url=http://www.example.com/example.ign + ``` + +5. After booting, NestOS will automatically read the Ignition file and install the OS to the target drive. Once the progress bar reaches 100%, the system will automatically boot into the installed NestOS environment. Users can log in using the username and password configured in the Ignition file. + +## 9. Basic Usage + +### 9.1 Introduction + +NestOS employs an OS packaging solution based on ostree and rpm-ostree technologies, setting critical directories to read-only mode to prevent accidental modifications to core system files and configurations. Leveraging the overlay layering concept, it allows users to manage RPM packages on top of the base ostree filesystem without disrupting the initial system architecture. Additionally, it supports building OCI-format images, enabling OS version switching at the granularity of images. + +### 9.2 SSH Connection + +For security reasons, NestOS does not support password-based SSH login by default and only allows key-based authentication. This design enhances system security by mitigating risks associated with password leaks or weak password attacks. + +The method for establishing an SSH connection using keys in NestOS is the same as in openEuler. If users need to temporarily enable password-based login, they can follow these steps: + +1. Edit the additional configuration file of the SSH service: + + ```shell + vi /etc/ssh/sshd_config.d/40-disable-passwords.conf + ``` + +2. Modify the default `PasswordAuthentication` setting as follows: + + ```shell + PasswordAuthentication yes + ``` + +3. Restart the sshd service to temporarily enable password-based SSH login. + +### 9.3 RPM Package Installation + +**Note: Immutable OS discourages installing software packages in the runtime environment. This method is provided only for temporary debugging scenarios. For service requirements that necessitate changes to the integrated package list, rebuild the OS by updating the build configuration.** + +NestOS does not support conventional package managers like Yum or DNF. Instead, it uses rpm-ostree to manage system updates and package installations. rpm-ostree combines the advantages of image-based and package-based management, allowing users to layer and manage RPM packages on top of the base OS without disrupting its initial structure. Use the following command to install an RPM package: + +```shell +rpm-ostree install +``` + +After installation, reboot the OS. The bootloader menu will display two branches, with the first branch being the latest by default: + +```shell +systemctl reboot +``` + +After rebooting, check the system package layering status to confirm that the package has been installed in the current version: + +```shell +rpm-ostree status -v +``` + +### 9.4 Version Rollback + +After an update or RPM package installation, the previous version of the OS deployment remains on the drive. If the update causes issues, users can manually roll back to a previous version using rpm-ostree. The specific process is as follows: + +#### 9.4.1 Temporary Rollback + +To temporarily roll back to a previous OS deployment, hold down the **Shift** key during system boot. When the bootloader menu appears, select the corresponding branch (by default, there are two branches; choose the other one). Before doing this, you can use the following command to view the two existing version branches in the current environment: + +```shell +rpm-ostree status +``` + +#### 9.4.2 Permanent Rollback + +To permanently roll back to a previous OS deployment, run the following command in the current version. This operation will set system deployment of the previous version as the default deployment. + +```shell +rpm-ostree rollback +``` + +Reboot to apply the changes. The default deployment option in the bootloader menu will have changed, eliminating the need for manual switching. + +```shell +systemctl reboot +``` + +## 10. Container Image-Based Updates + +### 10.1 Use Case Description + +NestOS, as a container cloud base OS based on the immutable infrastructure concept, distributes and updates the file system as a whole. This approach brings significant convenience in terms of operations and security. However, in real-world production environments, the officially released versions often fail to meet user requirements. For example, users may want to integrate self-maintained critical foundational components by default or further trim software packages to reduce system runtime overhead based on specific scenarios. Therefore, compared to general-purpose OSs, users have stronger and more frequent customization needs for NestOS. + +nestos-assembler can provide OCI-compliant container images. Beyond simply packaging and distributing the root file system, leveraging the ostree native container feature allows container cloud users to utilize familiar technology stacks. By writing a single ContainerFile (Dockerfile), users can easily build customized images for integrating custom components or subsequent upgrade and maintenance tasks. + +### 10.2 Usage + +#### 10.2.1 Customizing Images + +- Basic steps + +1. Refer to [Section 6](#6-build-process) to build the NestOS container image, and use the `nosa push-container` command to push it to a public or private container image registry. +2. Write a Containerfile (Dockerfile) as shown in the following example: + + ```dockerfile + FROM registry.example.com/nestos:1.0.20240603.0-x86_64 + + # Perform custom build steps, such as installing software or copying self-built components. + # Here, installing the strace package is used as an example. + RUN rpm-ostree install strace && rm -rf /var/cache && ostree container commit + ``` + +3. Run `docker build` or integrate it into CI/CD to build the corresponding image. + + > Note: + > 1. NestOS does not have the yum/dnf package manager. If software packages need to be installed, use the `rpm-ostree install` command to install local RPM packages or software provided in the repository. + > 2. If needed, you can also modify the software source configurations in the `/etc/yum.repo.d/` directory. + > 3. Each meaningful build command should end with `&& ostree container commit`. From the perspective of container image build best practices, it is recommended to minimize the number of RUN layers. + > 4. During the build process, non-/usr or /etc directory contents are cleaned up. Therefore, customization via container images is primarily suitable for software package or component updates. Do not use this method for system maintenance or configuration changes (e.g., adding users with `useradd`). + +#### 10.2.2 Deploying/Upgrading Images + +Assume that the container image built in the above steps is pushed as `registry.example.com/nestos:1.0.20240903.0-x86_64`. + +In an environment where NestOS is already deployed, execute the following command: + +```shell +sudo rpm-ostree rebase ostree-unverified-registry:registry.example.com/nestos:1.0.20240903.0-x86_64 +``` + +Reboot to complete the deployment of the customized version. + +After deployment is complete using the container image method, `rpm-ostree upgrade` will default to updating the source from the ostree update source to the container image address. Subsequently, you can update the container image under the same tag. Using `rpm-ostree upgrade` will detect if the remote image has been updated. If changes are detected, it will pull the latest image and complete the deployment. diff --git a/docs/en/docs/nestos/nestos/overview.md b/docs/en/docs/nestos/nestos/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..f5402dbccf922eb34b0d7e5f8d9bd57ffeee0472 --- /dev/null +++ b/docs/en/docs/nestos/nestos/overview.md @@ -0,0 +1,3 @@ +# NestOS User Guide + +This document describes the installation, deployment, features, and usage of the NestOS cloud-based operating system. NestOS runs common container engines, such as Docker, iSula, PodMan, and CRI-O, and integrates technologies such as Ignition, rpm-ostree, OCI runtime, and SELinux. NestOS adopts the design principles of dual-system partitions, container technology, and cluster architecture. It can adapt to multiple basic running environments in cloud scenarios.In addition, NestOS optimizes Kubernetes and provides support for platforms such as OpenStack and oVirt for IaaS ecosystem construction. In terms of PaaS ecosystem construction, platforms such as OKD and Rancher are supported for easy deployment of clusters and secure running of large-scale containerized workloads. To download NestOS images, see [NestOS](https://nestos.openeuler.org/). diff --git a/docs/zh/docs/hybrid_deployment/rubik/figures/icon-note.gif b/docs/zh/docs/hybrid_deployment/rubik/figures/icon-note.gif index eebb838c275843dfaf5b402c550e64eb887c1035..6314297e45c1de184204098efd4814d6dc8b1cda 100644 Binary files a/docs/zh/docs/hybrid_deployment/rubik/figures/icon-note.gif and b/docs/zh/docs/hybrid_deployment/rubik/figures/icon-note.gif differ diff --git a/docs/zh/docs/hybrid_deployment/rubik/figures/iocost.PNG b/docs/zh/docs/hybrid_deployment/rubik/figures/iocost.PNG deleted file mode 100644 index c3eae863ad15d79d7e36c44799fc4dc946e8ca26..0000000000000000000000000000000000000000 Binary files a/docs/zh/docs/hybrid_deployment/rubik/figures/iocost.PNG and /dev/null differ diff --git a/docs/zh/docs/kmesh/kmesh/installation_and_deployment.md b/docs/zh/docs/kmesh/kmesh/installation_and_deployment.md index 1d0c92527965a37d48357f0f7936bceb2a6184f9..c44c9564ee891019600403aa2d5764c1c2dfa07a 100644 --- a/docs/zh/docs/kmesh/kmesh/installation_and_deployment.md +++ b/docs/zh/docs/kmesh/kmesh/installation_and_deployment.md @@ -10,7 +10,7 @@ ## 环境准备 -* 安装openEuler系统,安装方法参考 《[安装指南](https://docs.openeuler.org/zh/docs/24.03_LTS_SP2/server/installation_upgrade/installation/installation-on-servers.html)》。 +* 安装openEuler系统,安装方法参考 《[安装指南](https://docs.openeuler.openatom.cn/zh/docs/24.03_LTS_SP2/server/installation_upgrade/installation/installation_on_servers.html)》。 * 安装Kmesh需要使用root权限。 ## 安装Kmesh diff --git a/docs/zh/docs/kubeos/kubeos/installation_and_deployment.md b/docs/zh/docs/kubeos/kubeos/installation_and_deployment.md index da390b756061cb052efd97d209d0e070c9a6fe9c..84bc7835f157e3f78097b789f4be407ed312e2bf 100644 --- a/docs/zh/docs/kubeos/kubeos/installation_and_deployment.md +++ b/docs/zh/docs/kubeos/kubeos/installation_and_deployment.md @@ -14,7 +14,7 @@ ### 环境准备 -* 安装 openEuler 系统,安装方法参考《[安装指南](https://docs.openeuler.org/zh/docs/24.03_LTS_SP2/server/installation_upgrade/installation/installation-on-servers.html)》 +* 安装 openEuler 系统,安装方法参考《[安装指南](https://docs.openeuler.openatom.cn/zh/docs/24.03_LTS_SP2/server/installation_upgrade/installation/installation_on_servers.html)》 * 安装 qemu-img,bc,parted,tar,yum,docker,dosfstools diff --git a/docs/zh/docs/nestos/nestos/figures/figure1.png b/docs/zh/docs/nestos/nestos/figures/figure1.png index 388a038273b5bb428c2f961d4241754fc2edc982..b4eb9017ed202e854c076802492d8561942dfc88 100644 Binary files a/docs/zh/docs/nestos/nestos/figures/figure1.png and b/docs/zh/docs/nestos/nestos/figures/figure1.png differ diff --git a/docs/zh/docs/nestos/nestos/figures/figure2.png b/docs/zh/docs/nestos/nestos/figures/figure2.png index 8e1bbf940a46234f75229611f706cf9ffd54b73a..90049769c04e2bd494533da1613e38a5199da3d7 100644 Binary files a/docs/zh/docs/nestos/nestos/figures/figure2.png and b/docs/zh/docs/nestos/nestos/figures/figure2.png differ