diff --git a/.gitignore b/.gitignore index 13ae2767ce2844c65a5d716a9763cecc36326501..cd9c9df66dd229886eab6961adebadc003e8b61b 100644 --- a/.gitignore +++ b/.gitignore @@ -38,10 +38,10 @@ test/st/functions/pkg test/st/functions/**/*.so test/st/functions/**/*.zip test/st/functions/**/*.xml -functionsystem -datasystem -metrics +./functionsystem +./datasystem +./metrics #thirdparty -thirdparty +./thirdparty go/pkg/mod/ cmake-build-debug diff --git a/build.sh b/build.sh index 67767cfa46fd1d533a114cf316192b919e8e212d..8e1c14fd48166287da77a261abd6dfadd6ee56c8 100644 --- a/build.sh +++ b/build.sh @@ -12,18 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. + set -e source /etc/profile.d/*.sh diff --git a/deploy/k8s/build/functionsystem/build.sh b/deploy/k8s/build/functionsystem/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..6490d076dd64a9f9c9c60381f23d6da74053fa77 --- /dev/null +++ b/deploy/k8s/build/functionsystem/build.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cur=$(dirname "$(readlink -f $0)") + +set -e +BUILD_LOCAL="${BUILD_LOCAL:-"true"}" + +SN_ID=1002 +SNUSER_ID=1003 + +source ${cur}/common.sh $1 +project_dir=${cur}../../../ +read var_pipeline_version var_image_version < <(get_version) + +read var_pipeline_version var_image_version < <(get_version) + +echo "build with image_version='${var_image_version}' pipeline_version='${var_pipeline_version}'" + +if [[ $app == "aaa.bbb" ]]; then + cd ${cur}/../ + bash compile.sh + cd - +fi + +var_image_prefix="" +var_service_image_prefix="" + +var_base_image_full="" +var_service_image_full="${var_service_image_prefix}/${var_service_image_name}:${var_image_version}" + +docker pull ${var_base_image_full} +echo "pulled base image base_image_full='${var_base_image_full}'" + +#镜像构建 +if [ "${BUILD_LOCAL}" != "true" ]; then + rm -rf ${codeRootDir}/yuanrong + tar -xf ${codeRootDir}/yuanrong_${architecture}/*Software_EulerOS_yuanrong.tar.gz -C ${cur} +fi +cd ${cur} + +docker build --no-cache --build-arg docker_image=${var_base_image_full} \ + --build-arg app=${app} \ + --build-arg SNUSER_ID="${SNUSER_ID}" \ + --build-arg SN_ID="${SN_ID}" \ + -t ${var_service_image_full} -f ${cur}/dockerfile/Dockerfile-${app} . + +echo "show docker images after build" +docker images "${var_service_image_full}" || true +docker inspect ${var_service_image_full} || true +docker push ${var_service_image_full} || true + +write_buildInfo ${var_pipeline_version} ${var_service_image_full} +write_buildImage ${cur} ${var_base_image_full} diff --git a/deploy/k8s/build/functionsystem/common.sh b/deploy/k8s/build/functionsystem/common.sh new file mode 100644 index 0000000000000000000000000000000000000000..65514f031a00e4197c51b06e8576f47a811f16de --- /dev/null +++ b/deploy/k8s/build/functionsystem/common.sh @@ -0,0 +1,104 @@ +#!/bin/bash +# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cur=$(dirname "$(readlink -f $0)") + +app=$1 + +microServiceName=YuanRong + +var_service_image_name=${app} +echo "build app is ${app}" +echo "build microServiceName is ${microServiceName}" + +arch_x86="x86" +arch_aarch64="aarch64" + +result=$(uname -a) + +if [[ $result =~ $arch_x86 ]]; then + architecture="x86" +elif [[ $result =~ $arch_aarch64 ]]; then + architecture="arm" +else + result=$(arch) + if [[ $result =~ $arch_x86 ]]; then + architecture="x86" + elif [[ $result =~ $arch_aarch64 ]]; then + architecture="arm" + fi +fi +echo "build architecture is $architecture" + +read_version_file() { + local filename="${cur}/../version.txt" + version=$(cat ${filename}) + version_number=${version#*=} + echo ${version_number} +} + +datetime=$(date +"%Y%m%d%H%M%S") +get_timestamp() { + if [[ ${pipelineStartTime} == "" ]]; then + echo "${datetime}" + else + echo ${pipelineStartTime} + fi +} + +get_version() { + local pipeline_version="" + local image_version="" + local timestamp=$(get_timestamp) + local version_timestamp="$(read_version_file).${timestamp}" + + if [[ ${BuildType} == "" ]]; then + pipeline_version="${version_timestamp}" + image_version="${version_timestamp}" + else + # 这里是 dailybuild + pipeline_version="${BuildType}.${timestamp}" + image_version="${BuildType}" + fi + + echo ${pipeline_version} ${image_version} +} + +write_buildInfo() { + # meta信息写入buildInfo.properties环境变量 + # 细则1: 版本可以不一致 + # buildVersion 是给流水线用的 + # imageName 是给推镜像用的 + # 这两个地方写的版本可以不一致 + # 细则2:如果写imageInfos=''必须要求 build.yml 文件里搭配 xxx_batch_xx.yml的模板使用 + cat <${WORKSPACE}/buildInfo.properties +buildVersion=$1 +imageName=$2 +orgName=yuanrong +scopeName=yuanrong +EOF + echo "show file ${WORKSPACE}/buildInfo.properties" + cat ${WORKSPACE}/buildInfo.properties +} + +write_buildImage() { + # 镜像可溯源 + cat <${WORKSPACE}/buildImage.properties +dockerfile=$1/dockerfile/Dockerfile-${app} +docker_var=$2 +EOF + echo "show file ${WORKSPACE}/buildImage.properties" + cat ${WORKSPACE}/buildImage.properties +} diff --git a/deploy/k8s/build/functionsystem/dockerfile/Dockerfile-function-agent b/deploy/k8s/build/functionsystem/dockerfile/Dockerfile-function-agent new file mode 100644 index 0000000000000000000000000000000000000000..30dd8602e26dee5d10ba840aea4706c2088b4a6a --- /dev/null +++ b/deploy/k8s/build/functionsystem/dockerfile/Dockerfile-function-agent @@ -0,0 +1,41 @@ +ARG docker_image +FROM ${docker_image} +ARG SN_ID +ENV USER_UID=${SN_ID} \ + USER_NAME=sn + +ENV GROUP_ID=${USER_UID} \ + GROUP_NAME=${USER_NAME} \ + HOME=/home/${USER_NAME} \ + FUNCTIONSYSTEM_OUTPUT_DIR=openyuanrong + +ENV FUNCTIONAGENT=${HOME}/bin/function_agent + +RUN mkdir -p ${HOME}/bin/ && \ + mkdir -p ${HOME}/lib/ && \ + groupadd -g ${GROUP_ID} ${GROUP_NAME} && \ + useradd -K MAIL_DIR=/dev/null -u ${USER_UID} -g ${GROUP_ID} -s /sbin/nologin ${USER_NAME} && \ + chown -R ${USER_UID}:${GROUP_ID} ${HOME} + +RUN chmod u+s $(which ip) && \ + chmod u+s $(which ping) && \ + chmod u+s $(which iptables) && \ + chmod u+s $(which ipset) + +# install operator binary +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/functionsystem/bin/function_agent ${HOME}/bin +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/deploy/k8s/build/functionsystem/entrypoint/entrypoint-function-agent ${HOME}/bin +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/deploy/k8s/build/functionsystem/entrypoint/health-check ${HOME}/bin +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/function_system/lib/* ${HOME}/lib/ + +RUN chmod -R 700 ${HOME} && \ + chmod -R 500 ${HOME}/bin && \ + chmod -R 500 ${HOME}/lib && \ + setcap 'cap_net_admin,cap_net_raw+ep-i' ${FUNCTIONAGENT} && \ + echo "${HOME}/lib" >> /etc/ld.so.conf.d/snlib.conf && ldconfig + +USER ${USER_UID} + +WORKDIR ${HOME} + +# ENTRYPOINT ["/home/sn/bin/entrypoint"] diff --git a/deploy/k8s/build/functionsystem/dockerfile/Dockerfile-function-agent-init b/deploy/k8s/build/functionsystem/dockerfile/Dockerfile-function-agent-init new file mode 100644 index 0000000000000000000000000000000000000000..a42cd751fa8650856f9e6ef54c80c2ceebb91bef --- /dev/null +++ b/deploy/k8s/build/functionsystem/dockerfile/Dockerfile-function-agent-init @@ -0,0 +1,36 @@ +ARG docker_image +FROM ${docker_image} + +ARG SNUSER_ID +ARG SN_ID + +ENV USER_UID=${SN_ID} \ + USER_NAME=sn \ + SNUSER_NAME=snuser \ + SNUSER_ID=${SNUSER_ID} \ + FUNCTIONSYSTEM_OUTPUT_DIR=openyuanrong + +ENV GROUP_ID=${USER_UID} \ + GROUP_NAME=${USER_NAME} \ + SNUSER_GROUP_NAME=${SNUSER_NAME} \ + HOME=/home/${USER_NAME} \ + SNUSER_GID=${SNUSER_ID} + +RUN chmod u+s $(which iptables) && \ + chmod u+s $(which ipset) + +RUN mkdir -p ${HOME}/bin/ && \ + groupadd -g ${SNUSER_GID} ${SNUSER_GROUP_NAME} && \ + useradd -K MAIL_DIR=/dev/null -u ${SNUSER_ID} -g ${SNUSER_GID} -s /sbin/nologin ${SNUSER_NAME} && \ + groupadd -g ${GROUP_ID} ${GROUP_NAME} && \ + useradd -K MAIL_DIR=/dev/null -u ${USER_UID} -g ${GROUP_ID} -s /sbin/nologin ${USER_NAME} && \ + chown -R ${USER_UID}:${GROUP_ID} ${HOME} + +RUN setcap 'cap_net_admin,cap_net_raw,cap_sys_admin+ep' $(readlink -f $(which iptables)) && \ + setcap 'cap_net_admin+ep' $(readlink -f $(which ipset)) + +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/deploy/k8s/build/functionsystem/entrypoint/entrypoint-function-agent-init ${HOME}/bin + +RUN chmod -R 500 ${HOME}/bin + +USER ${USER_UID} diff --git a/deploy/k8s/build/functionsystem/dockerfile/Dockerfile-functionsystem b/deploy/k8s/build/functionsystem/dockerfile/Dockerfile-functionsystem new file mode 100644 index 0000000000000000000000000000000000000000..707308f6c7df236da21789bd0aeaa55093b1abf1 --- /dev/null +++ b/deploy/k8s/build/functionsystem/dockerfile/Dockerfile-functionsystem @@ -0,0 +1,84 @@ +ARG docker_image +FROM ${docker_image} +ARG SNUSER_ID +ARG SN_ID +ENV USER_UID=${SN_ID} \ + USER_NAME=sn \ + OPT=/opt/yuanrong/app + +ENV GROUP_ID=${USER_UID} \ + GROUP_NAME=${USER_NAME} \ + HOME=/home/${USER_NAME} \ + FUNCTIONSYSTEM_OUTPUT_DIR=openyuanrong + +ENV HOME_FUNCTION_MASTER=${HOME}/function-master \ + HOME_IAM_ADAPTOR=${HOME}/iam-adaptor \ + HOME_FUNCTION_PROXY=${HOME}/function-proxy \ + HOME_SCHEDULER=${HOME}/scheduler \ + HOME_FRONTEND=${HOME}/frontend \ + HOME_MANAGER=${HOME}/manager + +ENV FUNCTION_PROXY=${HOME}/bin/function_proxy +ENV FUNCTION_MASTER=${HOME_FUNCTION_MASTER}/bin/function_master +ENV IAM_SERVER=${HOME_IAM_ADAPTOR}/bin/iam_server + +RUN mkdir -p ${HOME} && \ + echo "${USER_NAME}:x:${USER_UID}:${GROUP_ID}:${USER_NAME}:${HOME}:/bin/bash" >> /etc/passwd && \ + echo "${GROUP_NAME}:x:${GROUP_ID}:" >> /etc/group + +RUN mkdir -p ${HOME}/bin/ && \ + mkdir -p ${HOME}/lib/ && \ + mkdir -p ${HOME}/bin/alias && \ + mkdir -p ${HOME}/function-metas && \ + mkdir -p ${HOME}/config && \ + mkdir -p ${HOME}/iam-config && \ + cp /etc/skel/.bashrc ${HOME}/.bashrc && \ + chown sn:sn ${HOME}/.bashrc && \ + chmod -R 700 ${HOME}/.bashrc + +RUN mkdir -p ${HOME_FUNCTION_MASTER} && \ + mkdir -p ${HOME_FUNCTION_PROXY} && \ + mkdir -p ${HOME_IAM_ADAPTOR} && \ + mkdir -p ${HOME_SCHEDULER}&& \ + mkdir -p ${HOME_FRONTEND}&& \ + mkdir -p ${HOME_MANAGER} + +# Common lib and configuration files +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/deploy/k8s/build/functionsystem/entrypoint/health-check ${HOME}/bin/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/pattern/pattern_faas/alias/control_plane_alias.sh ${HOME}/bin/alias/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/functionsystem/lib/* ${HOME}/lib/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/pattern/pattern_faas/executor-meta/* ${HOME}/function-metas/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/functionsystem/bin/function_proxy ${HOME}/bin/ + +# Component function-master +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/functionsystem/bin/function_master ${HOME_FUNCTION_MASTER}/bin/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/deploy/k8s/build/functionsystem/entrypoints/function-master/* ${HOME_FUNCTION_MASTER}/bin/ + +# Component iam-adaptor +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/functionsystem/bin/iam_server ${HOME_IAM_ADAPTOR}/bin/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/deploy/k8s/build/functionsystem/entrypoints/iam-server/* ${HOME_IAM_ADAPTOR}/bin/ + +# Component function-proxy +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/functionsystem/bin/function_proxy ${HOME_FUNCTION_PROXY}/bin/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/deploy/k8s/build/functionsystem/entrypoints/function-proxy/* ${HOME_FUNCTION_PROXY}/bin/ + +# Component scheduler +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/runtime/service/go/bin/* ${HOME_SCHEDULER}/bin/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/pattern/pattern_faas/faasscheduler/* ${HOME_SCHEDULER}/bin/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/deploy/k8s/build/functionsystem/entrypoints/scheduler/* ${HOME_SCHEDULER}/bin/ + +# Component frontend +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/runtime/service/go/bin/* ${HOME_FRONTEND}/bin/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/pattern/pattern_faas/faasfrontend/* ${HOME_FRONTEND}/bin/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/deploy/k8s/build/functionsystem/entrypoints/frontend/* ${HOME_FRONTEND}/bin/ + +# Component manager +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/runtime/service/go/bin/* ${HOME_MANAGER}/bin/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/pattern/pattern_faas/faasmanager/* ${HOME_MANAGER}/bin/ +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/deploy/k8s/build/functionsystem/entrypoints/manager/* ${HOME_MANAGER}/bin/ + +RUN chmod -R 777 ${HOME} + +USER ${USER_UID} + +WORKDIR /opt/yuanrong/logs diff --git a/deploy/k8s/build/functionsystem/dockerfile/Dockerfile-runtime-manager b/deploy/k8s/build/functionsystem/dockerfile/Dockerfile-runtime-manager new file mode 100644 index 0000000000000000000000000000000000000000..2994ab5160daf30c516e9757b817929e80e5fc52 --- /dev/null +++ b/deploy/k8s/build/functionsystem/dockerfile/Dockerfile-runtime-manager @@ -0,0 +1,130 @@ +ARG docker_image +FROM ${docker_image} + +RUN if [ -f /etc/bashrc ]; then sed -i "s/umask .*/umask 0027/g" /etc/bashrc && source /etc/bashrc; fi + +# runtime env +ENV USER_UID=1003 \ + USER_NAME=snuser \ + GROUP_ID=1003 \ + GROUP_NAME=snuser \ + HOME=/home/snuser \ + PYTHON3_BIN_PATH=/opt/python3.9 \ + RUNTIME_LANGUAGE=python3.9 \ + JAVA8_BIN_PATH=/opt/function/runtime/java8/rtsp/jre \ + JAVA11_BIN_PATH=/opt/function/runtime/java11/rtsp/jre \ + JAVA17_BIN_PATH=/opt/function/runtime/java17/rtsp/jre \ + JAVA21_BIN_PATH=/opt/function/runtime/java21/rtsp/jre \ + FUNCTION_AGENT_UID=1002 \ + FUNCTION_AGENT_NAME=sn \ + FUNCTION_AGENT_GROUP_ID=1002 \ + FUNCTION_AGENT_GROUP_NAME=sn \ + FUNCTIONSYSTEM_OUTPUT_DIR=openyuanrong \ + SNHOME=/home/sn \ + RUNTIMEMANAGER=/home/sn/bin/runtime_manager + +ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/lib64 \ + SNLIB=/home/snuser/snlib + +# the second digit is set to 5, which gives the custom UID and GID permission to run a new runtime process +RUN mkdir -m 750 ${HOME} && \ + mkdir -m 750 ${HOME}/.datasystem && \ + mkdir -m 750 ${HOME}/datasystem && \ + mkdir -m 550 ${HOME}/bin && \ + mkdir -m 750 ${HOME}/runtime && \ + # runtime cpp need 755 permission + mkdir -m 750 ${HOME}/.local && \ + groupadd -g ${GROUP_ID} ${GROUP_NAME} && \ + useradd -K MAIL_DIR=/dev/null -u ${USER_UID} -g ${GROUP_ID} -s /sbin/nologin ${USER_NAME} && \ + chown -R ${USER_UID}:${GROUP_ID} ${HOME} && \ + # runtime-manager + mkdir -m 750 ${SNHOME} && \ + mkdir -m 750 -p ${SNHOME}/bin/alias && \ + groupadd -g ${FUNCTION_AGENT_GROUP_ID} ${FUNCTION_AGENT_GROUP_NAME} && \ + useradd -K MAIL_DIR=/dev/null -u ${FUNCTION_AGENT_UID} -g ${FUNCTION_AGENT_GROUP_ID} -s /sbin/nologin ${FUNCTION_AGENT_NAME} && \ + usermod -aG ${GROUP_NAME} ${FUNCTION_AGENT_NAME} && \ + chown -R ${FUNCTION_AGENT_UID}:${FUNCTION_AGENT_GROUP_ID} ${SNHOME} && \ + cp /etc/skel/.bashrc ${HOME}/.bashrc && \ + chown ${FUNCTION_AGENT_UID}:${FUNCTION_AGENT_GROUP_ID} ${HOME}/.bashrc && \ + chmod -R 700 ${HOME}/.bashrc && \ + cp -a ${HOME}/.bashrc ${SNHOME}/.bashrc && \ + rm -rf /tmp/CFF && \ + mkdir -m 750 ${HOME}/serve && \ + # conda + if [ -d "/opt/conda/envs" ]; then \ + mkdir -m 750 ${HOME}/.cache && \ + mkdir -m 750 ${HOME}/.conda && \ + chown ${FUNCTION_AGENT_UID}:${FUNCTION_AGENT_GROUP_ID} ${HOME}/.cache && \ + chown ${FUNCTION_AGENT_UID}:${FUNCTION_AGENT_GROUP_ID} ${HOME}/.conda && \ + chown ${FUNCTION_AGENT_UID}:${FUNCTION_AGENT_GROUP_ID} /opt/conda/envs; fi +COPY --chown=snuser:snuser ${FUNCTIONSYSTEM_OUTPUT_DIR}/pattern/pattern_serve ${HOME}/serve/ + +RUN yum install -y python3-pip.noarch +USER ${USER_NAME} +# USER_UID GROUP_ID 1003 + +RUN echo "install python wheel package"; \ + pip3.9 install -i http://mirrors.tools.huawei.com/pypi/simple --trusted-host=mirrors.tools.huawei.com/pypi/simple --user \ + protobuf==4.25.5 \ + cloudpickle==2.2.1 \ + urllib3==1.26.5 \ + msgpack==1.0.5 \ + grpcio==1.41.0 \ + redis==3.5.3 \ + numpy==1.24.3 \ + opentelemetry-api==1.14.0 \ + opentelemetry-sdk==1.14.0 \ + opentelemetry-exporter-otlp-proto-grpc==1.14.0 +# runtime +# USER_UID GROUP_ID 1003 +COPY --chown=snuser:snuser ${FUNCTIONSYSTEM_OUTPUT_DIR}/datasystem/sdk/*.whl ${HOME}/datasystem/python/ +COPY --chown=snuser:snuser ${FUNCTIONSYSTEM_OUTPUT_DIR}/runtime/sdk/cpp/lib ${SNLIB} +COPY --chown=snuser:snuser ${FUNCTIONSYSTEM_OUTPUT_DIR}/runtime/service ${HOME}/runtime + +RUN chmod 550 ${HOME}/serve; chown ${USER_UID}:${GROUP_ID} ${HOME}/serve && \ + chown -R ${USER_UID}:${GROUP_ID} ${HOME}/serve && \ + chmod 550 ${SNLIB};chown -R ${USER_UID}:${GROUP_ID} ${SNLIB} && \ + chmod 550 ${HOME}/datasystem; chown ${USER_UID}:${GROUP_ID} ${HOME}/datasystem; chown -R ${USER_UID}:${GROUP_ID} ${HOME}/datasystem +# for litebus.so +COPY --chown=snuser:snuser ${FUNCTIONSYSTEM_OUTPUT_DIR}/functionsystem/lib/ ${SNLIB}/ + +USER ${USER_NAME} +# USER_UID GROUP_ID 1000 + +RUN if [ -f ${HOME}/runtime/go/bin/goruntime ]; then \ + chmod 550 ${HOME}/runtime/go/bin/goruntime; fi && \ + if [ -f ${HOME}/runtime/cpp/bin/runtime ]; then \ + chmod 550 ${HOME}/runtime/cpp/bin/runtime; fi && \ + chmod -R 770 ${HOME}/.local && \ + chmod -R 550 ${HOME}/datasystem && \ + chmod -R 550 ${SNLIB} && \ + chmod 750 ${HOME}/.datasystem + +USER root + +# runtime-manager +COPY --chown=sn:sn "${FUNCTIONSYSTEM_OUTPUT_DIR}"/deploy/k8s/build/functionsystem/entrypoint/entrypoint-runtime-manager ${SNHOME}/bin/entrypoint-runtime-manager +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/deploy/k8s/build/functionsystem/entrypoint/health-check ${SNHOME}/bin +COPY --chown=sn:sn ${FUNCTIONSYSTEM_OUTPUT_DIR}/functionsystem/bin/runtime_manager ${SNHOME}/bin/runtime_manager + +# give runtime manager capabilities +RUN setcap 'cap_dac_override,cap_sys_admin,cap_kill,cap_setgid,cap_setuid+ep' ${RUNTIMEMANAGER} && \ + setcap 'cap_dac_override+ei' $(which du) && \ + if [ -f ${HOME}/runtime/go/bin/goruntime ]; then \ + setcap 'cap_dac_override+ep' ${HOME}/runtime/go/bin/goruntime; fi && \ + echo "sn ALL=NOPASSWD:/bin/bash -c /usr/bin/dmesg -T | tail -100" >> /etc/sudoers && \ + chmod -R 550 ${SNHOME}/bin + rm -rf ${HOME}/serve/*.whl && \ + rm -rf ${HOME}/serve/Ascend*.run && \ + chmod 550 ${HOME}/serve && \ + chmod -R 550 ${HOME}/serve && \ + chown ${USER_UID}:${GROUP_ID} ${HOME}/serve && \ + chown -R ${USER_UID}:${GROUP_ID} ${HOME}/serve && \ + echo "/home/snuser/snlib" >> /etc/ld.so.conf.d/snlib.conf && \ + echo "/home/snuser/runtime/go/bin" >> /etc/ld.so.conf.d/snlib.conf && sudo ldconfig + +USER ${FUNCTION_AGENT_UID} + +EXPOSE 8080 + +WORKDIR ${SNHOME} \ No newline at end of file diff --git a/deploy/k8s/build/functionsystem/entrypoints/agent-init/bootstrap b/deploy/k8s/build/functionsystem/entrypoints/agent-init/bootstrap new file mode 100644 index 0000000000000000000000000000000000000000..2647148f94b02e02d4c1f5f7b2c7ccfe74af1c72 --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/agent-init/bootstrap @@ -0,0 +1,10 @@ +#!/bin/sh -e + +echo "start init" + +wget ftp:// -O /home/package.zip +chmod 777 /opt/yuanrong/logs +mkdir /opt/yuanrong/logs/$RUN_POD_NAME +chmod 777 /opt/yuanrong/logs/$RUN_POD_NAME + +echo "end init" \ No newline at end of file diff --git a/deploy/k8s/build/functionsystem/entrypoints/entrypoint-function-agent b/deploy/k8s/build/functionsystem/entrypoints/entrypoint-function-agent new file mode 100644 index 0000000000000000000000000000000000000000..089ef644f3a94aed81331ab89e7b730b42f766f3 --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/entrypoint-function-agent @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +if [ whoami != "${USER_NAME}" ]; then + if [ -w /etc/passwd ]; then + echo "${USER_NAME}:x:$(id -u):$(id -g):${USER_NAME} user:${HOME}:/sbin/nologin" >> /etc/passwd + fi +fi + +# prevent from creating files that have incorrect permission +umask 0027 + +export LD_LIBRARY_PATH=${HOME}/lib:${LD_LIBRARY_PATH} + +[ ! -d "${LOG_PATH}" ] && mkdir -p "${LOG_PATH}" + +FS_LOG_CONFIG="{\"filepath\": \"{{logConfigPath}}\",\"level\": \"{{logLevel}}\"" +if [ -z "${LOG_PATTERN}" ]; then +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"pattern\": \"\"" +else +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"pattern\": {{logPattern}}" +fi +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"compress\": {{logCompressEnable}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"rolling\": {\"maxsize\": {{logRollingMaxSize}},\"maxfiles\": {{logRollingMaxFiles}}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"async\": {\"logBufSecs\": {{logAsyncBufSecs}},\"maxQueueSize\": {{logAsyncMaxQueueSize}},\"threadCount\": {{logAsyncThreadCount}}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"alsologtostderr\": {{logAlsologtostderr}}}" + +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logConfigPath\}\}/$LOG_PATH}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logLevel\}\}/$LOG_LEVEL}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logPattern\}\}/$LOG_PATTERN}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logCompressEnable\}\}/$LOG_COMPRESS_ENABLE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logRollingMaxSize\}\}/$LOG_ROLLING_MAXSIZE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logRollingMaxFiles\}\}/$LOG_ROLLING_MAXFILES}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncBufSecs\}\}/$LOG_ASYNC_LOGBUFSECS}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncMaxQueueSize\}\}/$LOG_ASYNC_MAXQUEUESIZE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncThreadCount\}\}/$LOG_ASYNC_THREADCOUNT}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAlsologtostderr\}\}/$LOG_ALSOLOGTOSTDERR}" + +exec "${FUNCTIONAGENT}" --ip="${POD_IP}" --node_id="${NODE_ID}" --local_node_id="${NODE_ID}" --log_config="${FS_LOG_CONFIG}" \ +--local_scheduler_address="${HOST_IP}:${FSPROXY_PORT}" --agent_listen_port="${FUNCTION_AGENT_PORT}" \ +--resource_path="${RESOURCE_PATH}" --access_key="${S3_ACCESS_KEY}" --secret_key="${S3_SECRET_KEY}" \ +--s3_protocol="${S3_PROTOCOL}" --credential_type="${S3_CREDENTIAL_TYPE}" \ +--s3_endpoint="${S3_ADDR}" --decrypt_algorithm="${DECRYPT_ALGORITHM}" --system_timeout="${SYSTEM_TIMEOUT}" \ +--ssl_enable="${SSL_ENABLE}" --metrics_ssl_enable="${METRICS_SSL_ENABLE}" --ssl_base_path="${SSL_BASE_PATH}" \ +--ssl_root_file="${SSL_ROOT_FILE}" --ssl_cert_file="${SSL_CERT_FILE}" --ssl_key_file="${SSL_KEY_FILE}" \ +--ssl_pwd_file="${SSL_PWD_FILE}" --ssl_decrypt_tool="${SSL_DECRYPT_TOOL}" --agent_uid="${POD_NAME}" \ +--scc_base_path="${SCC_BASE_PATH}" --scc_log_path="${SCC_LOG_PATH}" --etcd_ssl_base_path="${ETCD_SSL_BASE_PATH}" \ +--enable_metrics="${ENABLE_METRICS}" --metrics_config="${METRICS_CONFIG}" --metrics_config_file="${METRICS_CONFIG_FILE}" \ +--prometheus_pushgateway_port="${PROMETHEUS_PUSH_GATEWAY_PORT}" \ +--prometheus_pushgateway_ip="${PROMETHEUS_PUSH_GATEWAY_IP}" \ +--enable_trace="${ENABLE_TRACE}" --trace_config="${TRACE_CONFIG}" \ +--file_count_max="${S3_DOWNLOAD_MAXFILECOUNT}" --zip_file_size_max_MB="${S3_DOWNLOAD_MAXZIPSIZE}" \ +--unzip_file_size_max_MB="${S3_DOWNLOAD_MAXUNZIPSIZE}" --dir_depth_max="${S3_DOWNLOAD_MAXDIRDEPTH}" \ +--scc_enable="${SCC_ENABLE}" --scc_algorithm="${SCC_ALGORITHM}" --scc_primary_file="${SCC_PRIMARY_FILE}" --scc_standby_file="${SCC_STANDBY_FILE}" \ +--signature_validation="${ENABLE_SIGNATURE_VALIDATION}" --code_aging_time="${CODE_AGING_TIME}" \ +--system_auth_mode="${SYSTEM_AUTH_MODE}" \ +--custom_resources="${CUSTOM_RESOURCES}" diff --git a/deploy/k8s/build/functionsystem/entrypoints/entrypoint-function-agent-init b/deploy/k8s/build/functionsystem/entrypoints/entrypoint-function-agent-init new file mode 100644 index 0000000000000000000000000000000000000000..e1550fc48792984c5e10846c651c116033e975ea --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/entrypoint-function-agent-init @@ -0,0 +1,255 @@ +#!/bin/bash +# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +chown ${USER_NAME}:${USER_NAME} /home/${USER_NAME}/log +chmod 750 /home/${USER_NAME}/log +chown ${USER_NAME}:${USER_NAME} /dcache +chmod 750 /dcache +mkdir -p -m 750 /home/${SNUSER_NAME}/log/instances +mkdir -p -m 750 /home/${SNUSER_NAME}/log/exception +chmod 750 /home/${SNUSER_NAME}/log +chmod 750 /home/${SNUSER_NAME}/log/instances +chown ${SNUSER_NAME}:${SNUSER_NAME} /home/${SNUSER_NAME}/log +chown ${SNUSER_NAME}:${SNUSER_NAME} /home/${SNUSER_NAME}/log/exception +chown ${USER_NAME}:${USER_NAME} /home/${SNUSER_NAME}/log/instances +chown ${USER_NAME}:${USER_NAME} /opt/function/code +chmod 750 /opt/function/code +mkdir -p /home/${SNUSER_NAME}/alarms +chmod 750 -R /home/${SNUSER_NAME}/alarms +chown ${SNUSER_NAME}:${USER_NAME} -R /home/${SNUSER_NAME}/alarms +mkdir -p /home/${USER_NAME}/metrics +chmod 750 -R /home/${USER_NAME}/metrics +chown ${USER_NAME}:${USER_NAME} /home/${USER_NAME}/metrics +mkdir -p /home/${SNUSER_NAME}/metrics +chmod 750 /home/${SNUSER_NAME}/metrics +chmod 750 -R /home/${SNUSER_NAME}/metrics +chown ${SNUSER_NAME}:${USER_NAME} /home/${SNUSER_NAME}/metrics +mkdir -p -m 770 /home/${SNUSER_NAME}/var/log/ +mkdir -p -m 770 /home/${SNUSER_NAME}/var/log/ascend-dmi +if [ -d "/home/${SNUSER_NAME}/resource/scc" ]; then + mkdir -p -m 750 /home/${SNUSER_NAME}/secret + cp -r /home/${SNUSER_NAME}/resource/scc /home/${SNUSER_NAME}/secret/ + chown ${SNUSER_NAME}:${SNUSER_NAME} /home/${SNUSER_NAME}/secret + chown -R ${SNUSER_NAME}:${SNUSER_NAME} /home/${SNUSER_NAME}/secret +fi + +function get_pod_ip() { + # Note that "eth0" may not be the interface name used in all environments, + # so it may need to be adjusted based on different actual situation. + local pod_ip=$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1) + echo "${pod_ip}" +} + +function parse_whitelist_and_set_ipset_rule() { + echo "$1" | awk -F'[;]' ' + function process_port_protocol(parts, ip, j) + { + if (parts[j] != "") { + split(parts[j], port_proto, "/") + port = port_proto[1] + proto = port_proto[2] + printf " Port: %s, Protocol: %s\n", port, proto + # Network isolation configuration + if (proto == "TCP") { + printf "ipset -exist add thirdparty-whitelist-tcp %s,%s\n", ip, port + system("ipset -exist add thirdparty-whitelist-tcp " ip "," port) + } else if (proto == "UDP") { + printf "ipset -exist add thirdparty-whitelist-udp %s,%s\n", ip, port + system("ipset -exist add thirdparty-whitelist-udp " ip "," port) + } + } + } + function resolve_domain(domain, max_attempts) + { + printf "Lookup domain: %s\n", domain + + attempt = 0 + timeout = 1 + resolve_result = "" + while (attempt < max_attempts) { + command = sprintf("nslookup %s | grep Address | grep -v \"#53\" | awk \"{print $2}\" ", domain) + printf "command: %s\n", command + command | getline resolve_result + close(command) + + if (resolve_result != "") { + break + } + + print "failure! retrying in after " timeout "s..." > "/dev/stderr" + system("sleep " timeout) + timeout *= 2 + attempt++ + } + if (resolve_result != "") { + split(resolve_result, resolve_result_parts, ":") + ip = resolve_result_parts[2] + printf "after dns resolve, ip: %s\n", ip + return ip + } + return "" + } + { + max_attempts = 1 + for (i=1; i<=NF; i++) { + if ($i != "") { + split($i, parts, ",") + domain = parts[1] + printf "Domain: %s, IP: ", domain + ip = (parts[2] == "None") ? "None" : parts[2] + printf "%s\n", ip + if (ip != "None") { + # Process port/protocol pairs starting from the third element + for (j=3; j<=length(parts); j++) { + process_port_protocol(parts, ip, j) + } + } else { + # Resolve the domain name to an IP address + ip = resolve_domain(domain, max_attempts) + if (ip != "") { + for (j=3; j<=length(parts); j++) { + process_port_protocol(parts, ip, j) + } + continue + } + printf "Domain resolved failed: %s", domain + } + print "" + } + } + }' +} + +# iptables rules +if [ "${ENABLE_IPV4_TENANT_ISOLATION}" == "true" ]; then + if ! command -v ipset >/dev/null 2>&1; then + echo "Error: ipset command not found." 1>&2 + exit 1 + fi + + if ! command -v nslookup >/dev/null 2>&1; then + echo "Error: nslookup command not found." 1>&2 + exit 1 + fi + + # Flush all chains + iptables -F + # Delete all non-default chains + iptables -X + # Set default policies for each of the built-in chains + iptables -P INPUT ACCEPT + iptables -P FORWARD ACCEPT + iptables -P OUTPUT ACCEPT + + if ! ipset list | grep -q "thirdparty-whitelist-tcp"; then + ipset create thirdparty-whitelist-tcp hash:ip,port + fi + if ! ipset list | grep -q "thirdparty-whitelist-udp"; then + ipset create thirdparty-whitelist-udp hash:ip,port + fi + if ! ipset list | grep -q "tenant-podip-whitelist"; then + ipset create tenant-podip-whitelist hash:ip + fi + + # 0. Infrastructure + DEFAULT_ROUTE_IP=$(ip route | awk '/default/ {print $3}') + + # Container communication within POD + POD_IP=$(get_pod_ip) + echo "Debug: POD_IP: ${POD_IP}." 1>&2 + ip route replace "${POD_IP}" via "${DEFAULT_ROUTE_IP}" dev eth0 onlink + iptables -A OUTPUT -d "${POD_IP}" -j ACCEPT + + # 1. Communication with host + if [ -n "${HOST_IP}" ]; then + iptables -A OUTPUT -d "${HOST_IP}" -j ACCEPT + else + echo "Warn: HOST_IP environment variable is not set." 1>&2 + fi + + # 2. dns_servers + dns_servers=$(grep nameserver /etc/resolv.conf | awk '{print $2}') + for dns_server in $dns_servers; do + iptables -A OUTPUT -d "$dns_server" -p udp --dport 53 -j ACCEPT + iptables -A OUTPUT -d "$dns_server" -p tcp --dport 53 -j ACCEPT + done + + dns_server=$(grep nameserver /etc/resolv.conf | awk 'NR==1{print $2}') + + # 3. thirdparty-whitelist-tcp + iptables -I OUTPUT -m set --match-set thirdparty-whitelist-tcp dst,dst -p tcp -j ACCEPT + iptables -I OUTPUT -m set --match-set thirdparty-whitelist-udp dst,dst -p udp -j ACCEPT + + # tenant-podip-whitelist + iptables -I OUTPUT -m set --match-set tenant-podip-whitelist dst -p tcp -j ACCEPT + iptables -I OUTPUT -m set --match-set tenant-podip-whitelist dst -p udp -j ACCEPT + + + # 4. Parse and set the third-party whitelist + if [ -n "${THIRD_PARTY_WHITELIST}" ]; then + parse_whitelist_and_set_ipset_rule "${THIRD_PARTY_WHITELIST}" + fi + + # 5. tcp port Whitelist and udp port Whitelist + if [ -n "${TCP_PORT_WHITELIST}" ]; then + arr=$(echo "${TCP_PORT_WHITELIST}" | tr "," "\n") + for port in $arr; do + iptables -A OUTPUT -p tcp --dport $port -j ACCEPT + done + fi + if [ -n "${UDP_PORT_WHITELIST}" ]; then + arr=$(echo "${UDP_PORT_WHITELIST}" | tr "," "\n") + for port in $arr; do + iptables -A OUTPUT -p udp --dport $port -j ACCEPT + done + fi + + # 6. Outbound traffic from the current POD to other K8S container network services is prohibited. + if [ -n "${SVC_CIDR}" ]; then + iptables -A OUTPUT -d "${SVC_CIDR}" -p icmp --icmp-type 8 -j DROP + iptables -A OUTPUT -d "${SVC_CIDR}" -p udp -j DROP + iptables -A OUTPUT -d "${SVC_CIDR}" -p tcp -m tcp --tcp-flags SYN SYN -m state --state NEW -j DROP + iptables -A OUTPUT -d "${SVC_CIDR}" -j ACCEPT + fi + + # 7. Outbound traffic from the current POD to the POD network is prohibited. + if [ -n "${POD_CIDR}" ]; then + arr=$(echo "${POD_CIDR}" | tr "," "\n") + for value in $arr; do + iptables -A OUTPUT -d "${value}" -p icmp --icmp-type 8 -j DROP + iptables -A OUTPUT -d "${value}" -p udp -j DROP + iptables -A OUTPUT -d "${value}" -p tcp -m tcp --tcp-flags SYN SYN -m state --state NEW -j DROP + iptables -A OUTPUT -d "${value}" -j ACCEPT + done + fi + + # 8. Outbound traffic from the current POD to other host networks is prohibited. + if [ -n "${HOST_CIDR}" ]; then + arr=$(echo "${HOST_CIDR}" | tr "," "\n") + for value in $arr; do + iptables -A OUTPUT -d "${value}" -p icmp --icmp-type 8 -j DROP + iptables -A OUTPUT -d "${value}" -p udp -j DROP + iptables -A OUTPUT -d "${value}" -p tcp -m tcp --tcp-flags SYN SYN -m state --state NEW -j DROP + iptables -A OUTPUT -d "${value}" -j ACCEPT + done + fi + + # show rules in log + ip route + iptables -L -v -n + ipset list +fi \ No newline at end of file diff --git a/deploy/k8s/build/functionsystem/entrypoints/entrypoint-runtime-manager b/deploy/k8s/build/functionsystem/entrypoints/entrypoint-runtime-manager new file mode 100644 index 0000000000000000000000000000000000000000..7893a1242eeb3800e44f590e3461db51ada8b954 --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/entrypoint-runtime-manager @@ -0,0 +1,138 @@ +#!/bin/bash +# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +if [ whoami != "${USER_NAME}" ]; then + if [ -w /etc/passwd ]; then + echo "${USER_NAME}:x:$(id -u):$(id -g):${USER_NAME} user:${HOME}:/sbin/nologin" >> /etc/passwd + fi +fi + +# prevent from creating files that have incorrect permission +umask 0027 + +if [ -f "${SNHOME}"/bin/alias/runtime_manager_alias.sh ]; then + source "${SNHOME}"/bin/alias/runtime_manager_alias.sh +fi + +[ ! -d "${LOG_PATH}" ] && mkdir -p "${LOG_PATH}" + +if [ "${NPU_COLLECTION_MODE}" == "" ];then + NPU_COLLECTION_MODE="all" +fi + +if [ "${GPU_COLLECTION_ENABLE}" == "" ];then + GPU_COLLECTION_ENABLE=false +fi + +FS_LOG_CONFIG="{\"filepath\": \"{{logConfigPath}}\",\"level\": \"{{logLevel}}\"" +if [ -z "${LOG_PATTERN}" ]; then +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"pattern\": \"\"" +else +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"pattern\": {{logPattern}}" +fi +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"compress\": {{logCompressEnable}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"rolling\": {\"maxsize\": {{logRollingMaxSize}},\"maxfiles\": {{logRollingMaxFiles}}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"async\": {\"logBufSecs\": {{logAsyncBufSecs}},\"maxQueueSize\": {{logAsyncMaxQueueSize}},\"threadCount\": {{logAsyncThreadCount}}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"alsologtostderr\": {{logAlsologtostderr}}}" + +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logConfigPath\}\}/$LOG_PATH}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logLevel\}\}/$LOG_LEVEL}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logPattern\}\}/$LOG_PATTERN}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logCompressEnable\}\}/$LOG_COMPRESS_ENABLE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logRollingMaxSize\}\}/$LOG_ROLLING_MAXSIZE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logRollingMaxFiles\}\}/$LOG_ROLLING_MAXFILES}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncBufSecs\}\}/$LOG_ASYNC_LOGBUFSECS}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncMaxQueueSize\}\}/$LOG_ASYNC_MAXQUEUESIZE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncThreadCount\}\}/$LOG_ASYNC_THREADCOUNT}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAlsologtostderr\}\}/$LOG_ALSOLOGTOSTDERR}" + +RUNTIME_PRESTART_CONFIG="{\"java1.8\": {\"prestartCount\": {{javaPrestartCount}}, \"customArgs\": {{jvmCustomArgs}}}, +\"java11\": {\"prestartCount\": {{java11PrestartCount}}, \"customArgs\": {{jvmCustomArgs}}}, +\"python3.6\": {\"prestartCount\": {{python36PrestartCount}}}, +\"python3.7\": {\"prestartCount\": {{python37PrestartCount}}}, +\"python3.8\": {\"prestartCount\": {{python38PrestartCount}}}, +\"python3.9\": {\"prestartCount\": {{python39PrestartCount}}}, +\"python3.10\": {\"prestartCount\": {{python310PrestartCount}}}, +\"python3.11\": {\"prestartCount\": {{python311PrestartCount}}}, +\"cpp11\": {\"prestartCount\": {{cppPrestartCount}}}}" +RUNTIME_PRESTART_CONFIG=$(echo ${RUNTIME_PRESTART_CONFIG} | sed "s|{{javaPrestartCount}}|$JAVA_PRESTART_COUNT|g") +RUNTIME_PRESTART_CONFIG=$(echo ${RUNTIME_PRESTART_CONFIG} | sed "s|{{java11PrestartCount}}|$JAVA11_PRESTART_COUNT|g") +RUNTIME_PRESTART_CONFIG=$(echo ${RUNTIME_PRESTART_CONFIG} | sed "s|{{python36PrestartCount}}|$PYTHON36_PRESTART_COUNT|g") +RUNTIME_PRESTART_CONFIG=$(echo ${RUNTIME_PRESTART_CONFIG} | sed "s|{{python37PrestartCount}}|$PYTHON37_PRESTART_COUNT|g") +RUNTIME_PRESTART_CONFIG=$(echo ${RUNTIME_PRESTART_CONFIG} | sed "s|{{python38PrestartCount}}|$PYTHON38_PRESTART_COUNT|g") +RUNTIME_PRESTART_CONFIG=$(echo ${RUNTIME_PRESTART_CONFIG} | sed "s|{{python39PrestartCount}}|$PYTHON39_PRESTART_COUNT|g") +RUNTIME_PRESTART_CONFIG=$(echo ${RUNTIME_PRESTART_CONFIG} | sed "s|{{python310PrestartCount}}|$PYTHON310_PRESTART_COUNT|g") +RUNTIME_PRESTART_CONFIG=$(echo ${RUNTIME_PRESTART_CONFIG} | sed "s|{{python311PrestartCount}}|$PYTHON311_PRESTART_COUNT|g") +RUNTIME_PRESTART_CONFIG=$(echo ${RUNTIME_PRESTART_CONFIG} | sed "s|{{cppPrestartCount}}|$CPP_PRESTART_COUNT|g") +RUNTIME_PRESTART_CONFIG=$(echo ${RUNTIME_PRESTART_CONFIG} | sed "s|{{jvmCustomArgs}}|$JVM_CUSTOM_ARGS|g") + +RUNTIME_DEFAULT_CONFIG="{\"java1.8\": {{java8DefaultArgs}}, \"java11\": {{java11DefaultArgs}}, \"java17\": {{java17DefaultArgs}}, \"java21\": {{java21DefaultArgs}}}" +RUNTIME_DEFAULT_CONFIG=$(echo ${RUNTIME_DEFAULT_CONFIG} | sed "s|{{java8DefaultArgs}}|$JAVA8_DEFAULT_ARGS|g") +RUNTIME_DEFAULT_CONFIG=$(echo ${RUNTIME_DEFAULT_CONFIG} | sed "s|{{java11DefaultArgs}}|$JAVA11_DEFAULT_ARGS|g") +RUNTIME_DEFAULT_CONFIG=$(echo ${RUNTIME_DEFAULT_CONFIG} | sed "s|{{java17DefaultArgs}}|$JAVA17_DEFAULT_ARGS|g") +RUNTIME_DEFAULT_CONFIG=$(echo ${RUNTIME_DEFAULT_CONFIG} | sed "s|{{java21DefaultArgs}}|$JAVA21_DEFAULT_ARGS|g") +RUNTIME_DEFAULT_CONFIG=$(echo ${RUNTIME_DEFAULT_CONFIG} | sed "s|{{javaOpts}}|$JAVA_OPTS|g") +echo "RUNTIME_DEFAULT_CONFIG=$RUNTIME_DEFAULT_CONFIG" + +WRAPPER="/home/${USER_NAME}/runtime/fnruntime/server.py" +CLASSPATH="/dcache/layer" +ADDRESS="${POD_IP}:31530" +RPC_ADDRESS="${POD_IP}:31531" +HandlerFilePath="" +HandlerName="" +RUNTIMECONFIG_PATH="/home/${USER_NAME}/config/runtime.json" +JAVA_LIBRUNTIME_LIBRARY_PATH="/home/${USER_NAME}/runtime/java/lib" + +function getCfgValByKey() { + echo $(cat ${RUNTIMECONFIG_PATH} | sed 's/,/\n/g' | grep $1 | sed 's/:/\n/g' | sed '1d' | sed 's/}//g') +} + +# limiting the number of file handles +ulimit -n $(getCfgValByKey "maxHandlerNum") + +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$SNLIB +exec "${RUNTIMEMANAGER}" --runtime_dir="${HOME}/runtime" --runtime_logs_dir="${RUNTIME_LOG_DIR}" \ +--runtime_ld_library_path="${LD_LIBRARY_PATH}" \ +--snuser_lib_dir="${SNUSER_LIB_PATH}" \ +--virtual_env_idle_time_limit="${VIRTUAL_ENV_IDLE_TIME_LIMIT}" \ +--runtime_log_level="${RUNTIME_LOG_LEVEL}" --setCmdCred=true --python_dependency_path="${PYTHONPATH}:${HOME}/runtime/python" \ +--java_system_property="${HOME}/runtime/java/log4j2.xml" --log_config="${FS_LOG_CONFIG}" \ +--node_id="${NODE_ID}" --ip="${POD_IP}" --host_ip="${HOST_IP}" --proxy_ip="${PROXY_IP}" --port="${RUNTIME_MGR_PORT}" --agent_address="${POD_IP}:${FUNCTION_AGENT_PORT}" \ +--data_system_port="${DS_WORKER_PORT}" --driver_server_port="${FUNCTION_PROXY_GRPC_PORT}" \ +--runtime_initial_port="${RUNTIME_INIT_PORT}" --port_num="${RUNTIME_PORT_NUM}" \ +--metrics_collector_type="${METRICS_COLLECTOR_TYPE}" --proc_metrics_cpu="${CPU4COMP}" --proc_metrics_memory="${MEM4COMP}" \ +--runtime_prestart_config="${RUNTIME_PRESTART_CONFIG}" --runtime_default_config="${RUNTIME_DEFAULT_CONFIG}" \ +--disk_usage_monitor_notify_failure_enable="${DISK_USAGE_MONITOR_NOTIFY_FAILURE_ENABLE}" --disk_usage_monitor_path="${DISK_USAGE_MONITOR_PATH}" \ +--disk_usage_limit="${DISK_USAGE_LIMIT}" --disk_usage_monitor_duration="${DISK_USAGE_MONITOR_DURATION}" \ +--system_timeout="${SYSTEM_TIMEOUT}" --runtime_std_log_dir="instances" \ +--npu_collection_mode="${NPU_COLLECTION_MODE}" --gpu_collection_enable="${GPU_COLLECTION_ENABLE}" \ +--proxy_grpc_server_port="${FUNCTION_PROXY_GRPC_PORT}" --cluster_id="${CLUSTER_ID}" \ +--enable_metrics="${ENABLE_METRICS}" --metrics_config="${METRICS_CONFIG}" --metrics_config_file="${METRICS_CONFIG_FILE}" \ +--prometheus_pushgateway_port="${PROMETHEUS_PUSH_GATEWAY_PORT}" \ +--prometheus_pushgateway_ip="${PROMETHEUS_PUSH_GATEWAY_IP}" --runtime_uid="${RUNTIME_UID}" --runtime_gid="${RUNTIME_GID}" \ +--enable_trace="${ENABLE_TRACE}" --trace_config="${TRACE_CONFIG}" \ +--java_system_library_path="${JAVA_LIBRUNTIME_LIBRARY_PATH}" --is_protomsg_to_runtime="${IS_PROTOMSG_TO_RUNTIME}" --massif_enable="${MASSIF_ENABLE}" \ +--snuser_disk_usage_limit="${SNUSER_DIR_DISK_USAGE_LIMIT}" --tmp_disk_usage_limit="${TMP_DIR_DISK_USAGE_LIMIT}" \ +--log_expiration_enable="${LOG_EXPIRATION_ENABLE}" --log_expiration_cleanup_interval="${LOG_EXPIRATION_CLEANUP_INTERVAL}" \ +--log_expiration_time_threshold="${LOG_EXPIRATION_TIME_THRESHOLD}" --log_expiration_max_file_count="${LOG_EXPIRATION_MAX_FILE_COUNT}" \ +--log_reuse_enable="${LOG_REUSE_ENABLE}" \ +--runtime_direct_connection_enable="${RUNTIME_DIRECT_CONNECTION_ENABLE}" --memory_detection_interval="${MEMORY_DETECTION_INTERVAL}" --enable_inherit_env="${ENABLE_INHERIT_ENV}" \ +--oom_kill_enable="${OOM_KILL_ENABLE}" --oom_kill_control_limit="${OOM_KILL_CONTROL_LIMIT}" --oom_consecutive_detection_count="${OOM_CONSECUTIVE_DETECTION_COUNT}" \ +--runtime_home_dir="${RUNTIME_HOME_DIR}" --nodejs_entry="${NODEJS_ENTRY}" --resource_label_path="${RESOURCE_LABEL_PATH}" --npu_device_info_path="${NPU_DEVICE_INFO_PATH}" \ +--runtime_ds_connect_timeout="${RUNTIME_DS_CONNECT_TIMEOUT}" --kill_process_timeout_seconds="${KILL_PROCESS_TIMEOUT_SECONDS}" \ +--enable_clean_stream_producer="${ENABLE_CLEAN_STREAM_PRODUCER}" \ +--runtime_instance_debug_enable="${RUNTIME_INSTANCE_DEBUG_ENABLE}" --user_log_export_mode="${USER_LOG_EXPORT_MODE}" diff --git a/deploy/k8s/build/functionsystem/entrypoints/frontend/bootstrap b/deploy/k8s/build/functionsystem/entrypoints/frontend/bootstrap new file mode 100644 index 0000000000000000000000000000000000000000..2934ea59ef9e043bef9a7a247ca78263f5cc7d7c --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/frontend/bootstrap @@ -0,0 +1,53 @@ +#!/bin/sh -e + +export POD_IP=${RUNTIME_POD_IP} +export NODE_IP=${RUNTIME_HOST_IP} +export HOST_IP=${RUNTIME_HOST_IP} +export POD_NAME=${RUNTIME_POD_NAME} +export DATASYSTEM_ADDR=${RUNTIME_HOST_IP}:31501 +export ENABLE_SERVER_MODE=${ENABLE_SERVER_MODE:-"true"} +export YR_FUNCTION_LIB_PATH=${YR_FUNCTION_LIB_PATH:-"./"} +export LD_LIBRARY_PATH=${HOME}/bin:${HOME}/lib:${HOME}/snlib:${HOME_FRONTEND}/bin +export INSTANCE_ID=frontend-${RUNTIME_POD_NAME} +export FS_ADDRESS="${RUNTIME_POD_IP}:32568" + + +JOB_ID=runtime-$(< /dev/urandom tr -dc '0-9' | head -c8 ) +RUNTIME_ID=$(< /dev/urandom tr -dc '0-9' | head -c4)-$(date +%Y%m%d%H%M%S) + +FUNCTION_LIB_PATH=${HOME}/lib:${HOME_FRONTEND}/bin \ +nohup ${HOME_FRONTEND}/bin/goruntime -runtimeConfigPath=${INIT_ARGS_FILE_PATH} -jobId ${JOB_ID} \ + -runtimeId=${RUNTIME_ID} -instanceId=${INSTANCE_ID} -functionName='0/0-system-faasfrontend/$latest' -logLevel=${LOG_LEVEL} \ + -logPath=${LOG_PATH} -functionSystemAddress="${NODE_IP}:32568" -driverMode=true & + +FRONTEND_PID="$!" +echo "check frontend pid $FRONTEND_PID" + +function wait_pid_exit() { + local pid=$1 + echo "wait for pid $pid to exit" + while true + do + kill -0 $pid + if [ $? -eq 0 ]; then + sleep 1 + else + break + fi + done + echo "pid $pid already exited" +} + +function sigterm_handler() { + echo "start to kill frontend pid $FRONTEND_PID" + kill -15 $FRONTEND_PID + wait_pid_exit $FRONTEND_PID +} + +trap sigterm_handler SIGTERM + +sleep 5 +wait_pid_exit $FRONTEND_PID +echo "boostrap exit" + + diff --git a/deploy/k8s/build/functionsystem/entrypoints/frontend/health-check b/deploy/k8s/build/functionsystem/entrypoints/frontend/health-check new file mode 100644 index 0000000000000000000000000000000000000000..bc5c64252494abffbf766dc47b536d82fc3cbfe3 --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/frontend/health-check @@ -0,0 +1,20 @@ +#!/bin/bash +set -e + +function health_check() { + if [ $(netstat -plnut | grep tcp | grep 8888 | wc -l | xargs) -eq 0 ]; then + echo "frontend failed" >> /opt/yuanrong/logs/healthcheck.log + echo $ret_code >> /opt/yuanrong/logs/healthcheck.log + exit 1 + fi + + if [ $(netstat -plnut | grep tcp | grep 22423 | wc -l | xargs) -eq 0 ]; then + echo "proxy failed" >> /opt/yuanrong/logs/healthcheck.log + echo $ret_code >> /opt/yuanrong/logs/healthcheck.log + exit 1 + else + ret_code=200 + fi +} + +health_check \ No newline at end of file diff --git a/deploy/k8s/build/functionsystem/entrypoints/function-master/bootstrap b/deploy/k8s/build/functionsystem/entrypoints/function-master/bootstrap new file mode 100644 index 0000000000000000000000000000000000000000..7fde617168885052dd32211ad0eb2163eeece0fd --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/function-master/bootstrap @@ -0,0 +1,63 @@ +#!/bin/sh -e + +umask 0027 +export LD_LIBRARY_PATH=${HOME}/lib:${LD_LIBRARY_PATH} +if [ -f "${HOME}"/bin/alias/control_plane_alias.sh ]; then + bash "${HOME}"/bin/alias/control_plane_alias.sh "function_master" +fi + +[ ! -d "${LOG_PATH}" ] && mkdir -p "${LOG_PATH}" + +export POD_IP=${RUNTIME_POD_IP} +export HOST_IP=${RUNTIME_HOST_IP} + +FS_LOG_CONFIG="{\"filepath\": \"{{logConfigPath}}\",\"level\": \"{{logLevel}}\"" +if [ -z "${LOG_PATTERN}" ]; then +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"pattern\": \"\"" +else +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"pattern\": {{logPattern}}" +fi +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"compress\": {{logCompressEnable}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"rolling\": {\"maxsize\": {{logRollingMaxSize}},\"maxfiles\": {{logRollingMaxFiles}}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"async\": {\"logBufSecs\": {{logAsyncBufSecs}},\"maxQueueSize\": {{logAsyncMaxQueueSize}},\"threadCount\": {{logAsyncThreadCount}}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"alsologtostderr\": {{logAlsologtostderr}}}" + +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logConfigPath\}\}/$LOG_PATH}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logLevel\}\}/$LOG_LEVEL}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logPattern\}\}/$LOG_PATTERN}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logCompressEnable\}\}/$LOG_COMPRESS_ENABLE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logRollingMaxSize\}\}/$LOG_ROLLING_MAXSIZE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logRollingMaxFiles\}\}/$LOG_ROLLING_MAXFILES}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncBufSecs\}\}/$LOG_ASYNC_LOGBUFSECS}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncMaxQueueSize\}\}/$LOG_ASYNC_MAXQUEUESIZE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncThreadCount\}\}/$LOG_ASYNC_THREADCOUNT}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAlsologtostderr\}\}/$LOG_ALSOLOGTOSTDERR}" + +exec "${FUNCTION_MASTER}" --ip="${POD_IP}:${GLOBAL_SCHEDULER_PORT}" --meta_store_address="${META_STORE_ADDRESS}" \ +--log_config="${FS_LOG_CONFIG}" --node_id="${RUNTIME_POD_NAME}" --sys_func_retry_period="${SYSTEM_FUNCTION_RETRY_PERIOD}" \ +--k8s_base_path="${K8S_BASE_URL}" \ +--runtime_recover_enable="${RUNTIME_RECOVER_ENABLE}" --decrypt_algorithm="${DECRYPT_ALGORITHM}" \ +--election_mode="${ELECTION_MODE}" --k8s_namespace="${K8S_NAMESPACE}" --system_timeout="${SYSTEM_TIMEOUT}" \ +--ssl_enable="${SSL_ENABLE}" --metrics_ssl_enable="${METRICS_SSL_ENABLE}" --ssl_base_path="${SSL_BASE_PATH}" \ +--ssl_root_file="${SSL_ROOT_FILE}" --ssl_cert_file="${SSL_CERT_FILE}" --ssl_key_file="${SSL_KEY_FILE}" \ +--enable_print_resource_view="${ENABLE_PRINT_RESOURCE_VIEW}" \ +--enable_metrics="${ENABLE_METRICS}" --metrics_config="${METRICS_CONFIG}" --metrics_config_file="${METRICS_CONFIG_FILE}" \ +--migrate_prefix="${MIGRATE_PREFIX}" --taint_tolerance_list="${TAINT_TOLERANCE_LIST}" \ +--worker_taint_exclude_labels="${WORKER_TAINT_EXCLUDES}" --migrate_enable="${MIGRATE_ENABLE}" \ +--system_upgrade_watch_enable="${SYSTEM_UPGRADE_WATCH_ENABLE}" --az_id="${AZ_ID}" --system_upgrade_key="${SYSTEM_UPGRADE_KEY}" \ +--system_upgrade_address="${SYSTEM_UPGRADE_ADDRESS}" --grace_period_seconds="${GRACE_PERIOD_SECONDS}" \ +--etcd_auth_type="${ETCD_AUTH_TYPE}" --etcd_root_ca_file="${ETCD_ROOT_CA_FILE}" --etcd_cert_file="${ETCD_CERT_FILE}" --etcd_key_file="${ETCD_KEY_FILE}" \ +--etcd_decrypt_tool="${ETCD_DECRYPT_TOOL}" --etcd_secret_name="${ETCD_SECRET_NAME}" --etcd_target_name_override="${ETCD_TARGET_NAME_OVERRIDE}" \ +--schedule_plugins="${SCHEDULE_PLUGINS}" --cluster_id="${CLUSTER_ID}" \ +--max_tolerate_metastore_healthcheck_failed_times="${MAX_TOLERATE_META_STORE_FAILED_TIMES}" \ +--metastore_healthcheck_interval="${META_HEALTH_CHECK_INTERVAL_MS}" \ +--metastore_healthcheck_timeout="${META_HEALTH_CHECK_TIMEOUTS}" \ +--function_meta_path=${FUNCTION_META_PATH} \ +--etcd_ssl_base_path="${ETCD_SSL_BASE_PATH}" \ +--pool_config_path="${POOL_CONFIG_PATH}" --agent_template_path="${AGENT_TEMPLATE_PATH}" \ +--kube_client_retry_times="${KUBE_CLIENT_RETRY_TIMES}" --kube_api_retry_cycle="${KUBE_API_RETRY_CYCLE}" \ +--health_monitor_max_failure="${HEALTH_MONITOR_MAX_FAILURE}" --health_Monitor_retry_interval="${HEALTH_MONITOR_RETRY_INTERVAL}" \ +--evicted_taint_key="${EVICTED_TAINT_KEY}" --local_scheduler_port="${FSPROXY_PORT}" --self_taint_prefix="${SELF_TAINT_PREFIX}" \ +--max_priority="${MAX_PRIORITY}" --enable_preemption="${ENABLE_PREEMPTION}" --schedule_relaxed="${SCHEDULE_RELAXED}" --lib_path="${LIB_PATH}" --services_path="${SERVICES_PATH}" \ +--system_auth_mode="${SYSTEM_AUTH_MODE}" \ +--enable_meta_store="false" --meta_store_mode="local" --etcd_address="${ETCD_CLUSTER_ADDRESS}" \ No newline at end of file diff --git a/deploy/k8s/build/functionsystem/entrypoints/function-proxy/bootstrap b/deploy/k8s/build/functionsystem/entrypoints/function-proxy/bootstrap new file mode 100644 index 0000000000000000000000000000000000000000..7e40a513b60586242b86cb9bed08d426c05f9da6 --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/function-proxy/bootstrap @@ -0,0 +1,79 @@ +#!/bin/bash + +set -e +# prevent from creating files that have incorrect permission +umask 0027 + +export LD_LIBRARY_PATH=${HOME}/lib:${LD_LIBRARY_PATH} + +FS_LOG_CONFIG="{\"filepath\": \"{{logConfigPath}}\",\"level\": \"{{logLevel}}\"" +if [ -z "${LOG_PATTERN}" ]; then +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"pattern\": \"\"" +else +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"pattern\": {{logPattern}}" +fi +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"compress\": {{logCompressEnable}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"rolling\": {\"maxsize\": {{logRollingMaxSize}},\"maxfiles\": {{logRollingMaxFiles}}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"async\": {\"logBufSecs\": {{logAsyncBufSecs}},\"maxQueueSize\": {{logAsyncMaxQueueSize}},\"threadCount\": {{logAsyncThreadCount}}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"alsologtostderr\": {{logAlsologtostderr}}}" + +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logConfigPath\}\}/$LOG_PATH}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logLevel\}\}/$LOG_LEVEL}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logPattern\}\}/$LOG_PATTERN}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logCompressEnable\}\}/$LOG_COMPRESS_ENABLE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logRollingMaxSize\}\}/$LOG_ROLLING_MAXSIZE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logRollingMaxFiles\}\}/$LOG_ROLLING_MAXFILES}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncBufSecs\}\}/$LOG_ASYNC_LOGBUFSECS}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncMaxQueueSize\}\}/$LOG_ASYNC_MAXQUEUESIZE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncThreadCount\}\}/$LOG_ASYNC_THREADCOUNT}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAlsologtostderr\}\}/$LOG_ALSOLOGTOSTDERR}" + +exec "${FUNCTION_PROXY}" --address="${HOST_IP}:${FUNCTION_PROXY_PORT}" --meta_store_address="${META_STORE_ADDRESS}" \ +--node_id="${NODE_ID}" --unregister_while_stop=true --log_config="${FS_LOG_CONFIG}" \ +--runtime_heartbeat_enable="${RUNTIME_HEARTBEAT_ENABLE}" --runtime_max_heartbeat_timeout_times="${RUNTIME_MAX_HEARTBEAT_TIMEOUT_TIMES}" \ +--runtime_heartbeat_timeout_ms="${RUNTIME_HEARTBEAT_TIMEOUT_MS}" --cache_storage_host="${HOST_IP}" --cache_storage_port="${DS_WORKER_PORT}" \ +--ip="${HOST_IP}" --grpc_listen_port="${FUNCTION_PROXY_GRPC_PORT}" --enable_driver="${DRIVER_ENABLE}" \ +--max_grpc_size="${MAX_GRPC_SIZE}" --runtime_recover_enable="${RUNTIME_RECOVER_ENABLE}" --observability_agent_grpc_port="4317" --enable_trace="${ENABLE_TRACE}" \ +--state_storage_type="${STATE_STORAGE_TYPE}" --decrypt_algorithm="${DECRYPT_ALGORITHM}" --cache_storage_auth_enable="${CACHE_STORAGE_AUTH_ENABLE}" \ +--cache_storage_auth_type="${CACHE_STORAGE_AUTH_TYPE}" --cache_storage_auth_ak="${CACHE_STORAGE_AUTH_AK}" --cache_storage_auth_sk="${CACHE_STORAGE_AUTH_SK}" \ +--cache_storage_info_prefix="${CACHE_STORAGE_INFO_PREFIX}" \ +--lib_path="${LIB_PATH}" --services_path="${SERVICES_PATH}" --observability_prometheus_port="9392" --prometheus_pushgateway_port="${PROMETHEUS_PUSH_GATEWAY_PORT}" \ +--ds_health_check_path="${DS_HEALTH_CHECK_PATH}" \ +--ds_health_check_interval="${DS_HEALTH_CHECK_INTERVAL}" --max_ds_health_check_times="${MAX_DS_HEALTH_CHECK_TIMES}" \ +--election_mode="${ELECTION_MODE}" --system_timeout="${SYSTEM_TIMEOUT}" --runtime_shutdown_timeout_seconds="${RUNTIME_SHUTDOWN_TIMEOUT_SECONDS}" \ +--ssl_enable="${SSL_ENABLE}" --metrics_ssl_enable="${METRICS_SSL_ENABLE}" --ssl_base_path="${SSL_BASE_PATH}" \ +--ssl_root_file="${SSL_ROOT_FILE}" --ssl_cert_file="${SSL_CERT_FILE}" --ssl_key_file="${SSL_KEY_FILE}" --enable_iam="${ENABLE_IAM}" \ +--iam_base_path="${IAM_BASE_PATH}" --iam_policy_file="${IAM_POLICY_CONFIG_PATH}" --iam_credential_type=${IAM_CREDENTIAL_TYPE} \ +--min_instance_memory_size=${MIN_INSTANCE_MEMORY_SIZE} --min_instance_cpu_size=${MIN_INSTANCE_CPU_SIZE} \ +--max_instance_memory_size=${MAX_INSTANCE_MEMORY_SIZE} --max_instance_cpu_size=${MAX_INSTANCE_CPU_SIZE} \ +--enable_server_mode="${ENABLE_SERVER_MODE}" --enable_print_resource_view="${ENABLE_PRINT_RESOURCE_VIEW}" \ +--enable_metrics="${ENABLE_METRICS}" --metrics_config="${METRICS_CONFIG}" --metrics_config_file="${METRICS_CONFIG_FILE}" \ +--service_ttl=${SERVICE_TTL} --prometheus_pushgateway_ip="${PROMETHEUS_PUSH_GATEWAY_IP}" \ +--invoke_limitation_enable="${INVOKE_LIMITATION_ENABLE}" --low_memory_threshold="${LOW_MEMORY_THRESHOLD}" \ +--high_memory_threshold="${HIGH_MEMORY_THRESHOLD}" --message_size_threshold="${MESSAGE_SIZE_THRESHOLD}" \ +--create_limitation_enable="${CREATE_LIMITATION_ENABLE}" --token_bucket_capacity="${TOKEN_BUCKET_CAPACITY}" \ +--etcd_auth_type="${ETCD_AUTH_TYPE}" --etcd_root_ca_file="${ETCD_ROOT_CA_FILE}" --etcd_cert_file="${ETCD_CERT_FILE}" --etcd_key_file="${ETCD_KEY_FILE}" \ +--etcd_target_name_override="${ETCD_TARGET_NAME_OVERRIDE}" \ +--schedule_plugins="${SCHEDULE_PLUGINS}" --cluster_id="${CLUSTER_ID}" \ +--enable_tenant_affinity="${ENABLE_TENANT_AFFINITY}" \ +--tenant_pod_reuse_time_window="${TENANT_POD_REUSE_TIME_WINDOW}" \ +--enable_ipv4_tenant_isolation="${ENABLE_IPV4_TENANT_ISOLATION}" --runtime_ds_encrypt_enable="${RUNTIME_DS_ENCRYPT_ENABLE}" \ +--runtime_ds_auth_enable="${RUNTIME_DS_AUTH_ENABLE}" \ +--iam_meta_store_address="${IAM_META_STORE_ADDRESS}" \ +--k8s_base_path="${K8S_BASE_URL}" --k8s_namespace="${K8S_NAMESPACE}" \ +--fc_agent_mgr_retry_times="${FC_AGENT_MGR_RETRY_TIMES}" --fc_agent_mgr_retry_cycle="${FC_AGENT_MGR_RETRY_CYCLE}" \ +--max_tolerate_metastore_healthcheck_failed_times="${MAX_TOLERATE_META_STORE_FAILED_TIMES}" \ +--metastore_healthcheck_interval="${META_HEALTH_CHECK_INTERVAL_MS}" \ +--metastore_healthcheck_timeout="${META_HEALTH_CHECK_TIMEOUTS}" \ +--s3_credential_type="${S3_CREDENTIAL_TYPE}" \ +--oidc_workload_identity="${OIDC_WORKLOAD_IDENTITY}" \ +--oidc_audience="${OIDC_AUDIENCE}" \ +--oidc_project_id="${OIDC_PROJECT_ID}" \ +--oidc_project_name="${OIDC_PROJECT_NAME}" \ +--external_iam_endpoint="${EXTERNAL_IAM_ENDPOINT}" \ +--temporary_accessKey_expiration_seconds="${TEMPORARY_ACCESSKEY_EXPIRATION_SECONDS}" \ +--redis_conf_path="${REDIS_CONF_PATH}" --forward_compatibility="${FORWARD_COMPATIBILITY}" \ +--etcd_ssl_base_path="${ETCD_SSL_BASE_PATH}" \ +--function_meta_path=${FUNCTION_META_PATH} --curve_key_path="${CURVE_KEY_PATH}" --is_partial_watch_instances="${IS_PARTIAL_WATCH_INSTANCES}" \ +--max_priority="${MAX_PRIORITY}" --enable_preemption="${ENABLE_PREEMPTION}" --system_auth_mode="${SYSTEM_AUTH_MODE}" \ +--enable_meta_store="false" --meta_store_mode="local" diff --git a/deploy/k8s/build/functionsystem/entrypoints/health-check b/deploy/k8s/build/functionsystem/entrypoints/health-check new file mode 100644 index 0000000000000000000000000000000000000000..f904cad18d61e724c6ccd64c27133da6d4a3ec09 --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/health-check @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +function health_check() { + local port=$1 + local dest=$2 + local addr="${POD_IP}:${port}" + local node_id=${NODE_ID} + local ret_code=$(LD_LIBRARY_PATH="" timeout 3 curl -s -m 3 -H "Node-ID:${NODE_ID}" -H "PID:1" "http://${addr}/${dest}/healthy" -w %{http_code};echo $?) + if [ "x${ret_code:0:3}" != "x200" ]; then + exit 1 + fi +} + +health_check "$@" \ No newline at end of file diff --git a/deploy/k8s/build/functionsystem/entrypoints/iam-policy-config.json b/deploy/k8s/build/functionsystem/entrypoints/iam-policy-config.json new file mode 100644 index 0000000000000000000000000000000000000000..bc537f20ce287944582d3f9bd03ee83c1bd3f63a --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/iam-policy-config.json @@ -0,0 +1,49 @@ +{ + "tenant_group": { + "system": { + "0": [] + }, + "external": { + } + }, + + "white_list": { + }, + + "policy": { + "allow": { + "create": { + "system": { + "external": [ "*" ], + "system": [ "*" ] + }, + "external": { + "external": [ "*" ] + } + }, + "invoke": { + "system": { + "system": [ "*" ], + "external": [ "*" ] + }, + "external": { + "system": [ "*" ], + "external": [ "*" ] + } + }, + "kill": { + "system": { + "system": [ "*" ], + "external": [ "*" ] + }, + "external": { + "external": [ "*" ] + } + } + }, + "deny": { + "tenant_list": [], + "user_list": [] + } + } +} \ No newline at end of file diff --git a/deploy/k8s/build/functionsystem/entrypoints/iam-server/bootstrap b/deploy/k8s/build/functionsystem/entrypoints/iam-server/bootstrap new file mode 100644 index 0000000000000000000000000000000000000000..2eba3c9a83df110c82ad9bdb44097d7ab1c1a1fc --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/iam-server/bootstrap @@ -0,0 +1,45 @@ +#!/bin/sh -e +umask 0027 + +export POD_IP=${RUNTIME_POD_IP} +export NODE_ID=${RUNTIME_HOST_IP} +export LD_LIBRARY_PATH=${HOME}/lib:${LD_LIBRARY_PATH} + +[ ! -d "${LOG_PATH}" ] && mkdir -p "${LOG_PATH}" + +FS_LOG_CONFIG="{\"filepath\": \"{{logConfigPath}}\",\"level\": \"{{logLevel}}\"" +if [ -z "${LOG_PATTERN}" ]; then +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"pattern\": \"\"" +else +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"pattern\": {{logPattern}}" +fi +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"compress\": {{logCompressEnable}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"rolling\": {\"maxsize\": {{logRollingMaxSize}},\"maxfiles\": {{logRollingMaxFiles}}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"async\": {\"logBufSecs\": {{logAsyncBufSecs}},\"maxQueueSize\": {{logAsyncMaxQueueSize}},\"threadCount\": {{logAsyncThreadCount}}}" +FS_LOG_CONFIG="${FS_LOG_CONFIG}, \"alsologtostderr\": {{logAlsologtostderr}}}" + +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logConfigPath\}\}/$LOG_PATH}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logLevel\}\}/$LOG_LEVEL}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logPattern\}\}/$LOG_PATTERN}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logCompressEnable\}\}/$LOG_COMPRESS_ENABLE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logRollingMaxSize\}\}/$LOG_ROLLING_MAXSIZE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logRollingMaxFiles\}\}/$LOG_ROLLING_MAXFILES}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncBufSecs\}\}/$LOG_ASYNC_LOGBUFSECS}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncMaxQueueSize\}\}/$LOG_ASYNC_MAXQUEUESIZE}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAsyncThreadCount\}\}/$LOG_ASYNC_THREADCOUNT}" +FS_LOG_CONFIG="${FS_LOG_CONFIG//\{\{logAlsologtostderr\}\}/$LOG_ALSOLOGTOSTDERR}" + +exec "${IAM_SERVER}" --ip="${POD_IP}" --http_listen_port="${IAM_LISTEN_PORT}" --meta_store_address="${META_STORE_ADDRESS}" \ +--log_config="${FS_LOG_CONFIG}" --enable_iam="${ENABLE_IAM}" \ +--node_id="${RUNTIME_POD_NAME}" --token_expired_time_span="${TOKEN_EXPIRED_TIME_SPAN}" \ +--resource_path="${RESOURCE_PATH}" --decrypt_algorithm="${DECRYPT_ALGORITHM}" \ +--ssl_enable="${SSL_ENABLE}" --ssl_base_path="${SSL_BASE_PATH}" \ +--ssl_root_file="${SSL_ROOT_FILE}" --ssl_cert_file="${SSL_CERT_FILE}" --ssl_key_file="${SSL_KEY_FILE}" \ +--cluster_id="${CLUSTER_ID}" \ +--etcd_auth_type="${ETCD_AUTH_TYPE}" --etcd_root_ca_file="${ETCD_ROOT_CA_FILE}" --etcd_cert_file="${ETCD_CERT_FILE}" --etcd_key_file="${ETCD_KEY_FILE}" \ +--etcd_target_name_override="${ETCD_TARGET_NAME_OVERRIDE}" \ +--max_tolerate_metastore_healthcheck_failed_times="${MAX_TOLERATE_META_STORE_FAILED_TIMES}" \ +--metastore_healthcheck_interval="${META_HEALTH_CHECK_INTERVAL_MS}" \ +--metastore_healthcheck_timeout="${META_HEALTH_CHECK_TIMEOUTS}" \ +--election_mode="${ELECTION_MODE}" --credential_host_address=${CREDENTIAL_HOST_ADDRESS} --k8s_namespace="${K8S_NAMESPACE}" --k8s_base_path="${K8S_BASE_URL}" \ +--system_auth_mode="${SYSTEM_AUTH_MODE}" --iam_credential_type=${IAM_CREDENTIAL_TYPE} --permanent_cred_conf_path=${PERMANENT_CREDENTIAL_CONFIG_PATH} diff --git a/deploy/k8s/build/functionsystem/entrypoints/manager/bootstrap b/deploy/k8s/build/functionsystem/entrypoints/manager/bootstrap new file mode 100644 index 0000000000000000000000000000000000000000..3da994d2497cd63d660be4078e809dc59cdfa942 --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/manager/bootstrap @@ -0,0 +1,51 @@ +#!/bin/sh -e + +export POD_IP=${RUNTIME_POD_IP} +export NODE_IP=${RUNTIME_HOST_IP} +export HOST_IP=${RUNTIME_HOST_IP} +export POD_NAME=${RUNTIME_POD_NAME} +export ENABLE_SERVER_MODE=${ENABLE_SERVER_MODE:-"true"} +export YR_FUNCTION_LIB_PATH=${YR_FUNCTION_LIB_PATH:-"./"} +export LD_LIBRARY_PATH=${HOME}/bin:${HOME}/lib:${HOME}/snlib:${HOME_MANAGER}/bin +export INSTANCE_ID=manager-${RUNTIME_POD_NAME} +export FS_ADDRESS="${RUNTIME_POD_IP}:32568" + + +JOB_ID=runtime-$(< /dev/urandom tr -dc '0-9' | head -c8 ) +RUNTIME_ID=$(< /dev/urandom tr -dc '0-9' | head -c4)-$(date +%Y%m%d%H%M%S) + +FUNCTION_LIB_PATH=${HOME_MANAGER}/bin \ +nohup ${HOME_MANAGER}/bin/goruntime -runtimeConfigPath=${INIT_ARGS_FILE_PATH} -jobId ${JOB_ID} \ + -runtimeId=${RUNTIME_ID} -instanceId=${INSTANCE_ID} -functionName='0/0-system-faasmanager/$latest' -logLevel=${LOG_LEVEL} \ + -logPath=${LOG_PATH} -functionSystemAddress="${NODE_IP}:32568" -driverMode=true & +MANAGER_PID="$!" +echo "check manager pid $MANAGER_PID" + +function wait_pid_exit() { + local pid=$1 + echo "wait for pid $pid to exit" + while true + do + kill -0 $pid + if [ $? -eq 0 ]; then + sleep 1 + else + break + fi + done + echo "pid $pid already exited" +} + +function sigterm_handler() { + echo "start to kill manager pid $MANAGER_PID" + kill -15 $MANAGER_PID + wait_pid_exit $MANAGER_PID +} + +trap sigterm_handler SIGTERM + +sleep 5 +wait_pid_exit $MANAGER_PID +echo "boostrap exit" + + diff --git a/deploy/k8s/build/functionsystem/entrypoints/manager/health-check b/deploy/k8s/build/functionsystem/entrypoints/manager/health-check new file mode 100644 index 0000000000000000000000000000000000000000..1aa860e9e1066b8a724516a53e074785ba8b47a6 --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/manager/health-check @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +function health_check() { + if [ $(netstat -plnut | grep tcp | grep 9994 | wc -l | xargs) -eq 0 ]; then + echo "frontend failed" >> /opt/yuanrong/logs/healthcheck.log + echo $ret_code >> /opt/yuanrong/logs/healthcheck.log + exit 1 + fi + + + if [ $(netstat -plnut | grep tcp | grep 22423 | wc -l | xargs) -eq 0 ]; then + echo "proxy failed" >> /opt/yuanrong/logs/healthcheck.log + echo $ret_code >> /opt/yuanrong/logs/healthcheck.log + exit 1 + else + ret_code=200 + fi +} + +health_check \ No newline at end of file diff --git a/deploy/k8s/build/functionsystem/entrypoints/scheduler/bootstrap b/deploy/k8s/build/functionsystem/entrypoints/scheduler/bootstrap new file mode 100644 index 0000000000000000000000000000000000000000..cc5fd79f4e6bed972b86db8b24c240fc6233dfcf --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/scheduler/bootstrap @@ -0,0 +1,51 @@ +#!/bin/sh -e + +export POD_IP=${RUNTIME_POD_IP} +export NODE_IP=${RUNTIME_HOST_IP} +export HOST_IP=${RUNTIME_HOST_IP} +export POD_NAME=${RUNTIME_POD_NAME} +export ENABLE_SERVER_MODE=${ENABLE_SERVER_MODE:-"true"} +export YR_FUNCTION_LIB_PATH=${YR_FUNCTION_LIB_PATH:-"./"} +export LD_LIBRARY_PATH=${HOME}/bin:${HOME}/lib:${HOME}/snlib:${HOME_SCHEDULER}/bin +export INSTANCE_ID=scheduler-${RUNTIME_POD_NAME} +export FS_ADDRESS="${RUNTIME_POD_IP}:32568" + + +JOB_ID=runtime-$(< /dev/urandom tr -dc '0-9' | head -c8 ) +RUNTIME_ID=$(< /dev/urandom tr -dc '0-9' | head -c4)-$(date +%Y%m%d%H%M%S) + +FUNCTION_LIB_PATH=${HOME}/lib:${HOME_SCHEDULER}/bin \ +nohup ${HOME_SCHEDULER}/bin/goruntime -runtimeConfigPath=${INIT_ARGS_FILE_PATH} -jobId ${JOB_ID} \ + -runtimeId=${RUNTIME_ID} -instanceId=${INSTANCE_ID} -functionName='0/0-system-faasscheduler/$latest' -logLevel=${LOG_LEVEL} \ + -logPath=${LOG_PATH} -functionSystemAddress="${NODE_IP}:32568" -driverMode=true & +SCHEDULER_PID="$!" +echo "check scheduler pid $SCHEDULER_PID" + +function wait_pid_exit() { + local pid=$1 + echo "wait for pid $pid to exit" + while true + do + kill -0 $pid + if [ $? -eq 0 ]; then + sleep 1 + else + break + fi + done + echo "pid $pid already exited" +} + +function sigterm_handler() { + echo "start to kill scheduler pid $SCHEDULER_PID" + kill -15 $SCHEDULER_PID + wait_pid_exit $SCHEDULER_PID +} + +trap sigterm_handler SIGTERM + +sleep 5 +wait_pid_exit $SCHEDULER_PID +echo "boostrap exit" + + diff --git a/deploy/k8s/build/functionsystem/entrypoints/scheduler/health-check b/deploy/k8s/build/functionsystem/entrypoints/scheduler/health-check new file mode 100644 index 0000000000000000000000000000000000000000..1aa860e9e1066b8a724516a53e074785ba8b47a6 --- /dev/null +++ b/deploy/k8s/build/functionsystem/entrypoints/scheduler/health-check @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +function health_check() { + if [ $(netstat -plnut | grep tcp | grep 9994 | wc -l | xargs) -eq 0 ]; then + echo "frontend failed" >> /opt/yuanrong/logs/healthcheck.log + echo $ret_code >> /opt/yuanrong/logs/healthcheck.log + exit 1 + fi + + + if [ $(netstat -plnut | grep tcp | grep 22423 | wc -l | xargs) -eq 0 ]; then + echo "proxy failed" >> /opt/yuanrong/logs/healthcheck.log + echo $ret_code >> /opt/yuanrong/logs/healthcheck.log + exit 1 + else + ret_code=200 + fi +} + +health_check \ No newline at end of file diff --git a/deploy/k8s/charts/Chart.yaml b/deploy/k8s/charts/Chart.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8eb2ac478fce56bc819ce661e797b30bb0d97441 --- /dev/null +++ b/deploy/k8s/charts/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: YuanRong +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 202.3.1 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.16.0 diff --git a/deploy/k8s/charts/templates/frontend/frontend-configmap.yaml b/deploy/k8s/charts/templates/frontend/frontend-configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee7b3ae62df62e49b8377d5f202fd538c4664f20 --- /dev/null +++ b/deploy/k8s/charts/templates/frontend/frontend-configmap.yaml @@ -0,0 +1,213 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: frontend-config + namespace: default +data: + config.json: |- + { + "instanceNum": 2, + "azID": "5", + "cpu": 5000, + "memory": 10240, + "clusterID": "cluster001", + "clusterName": "ers-cceturbo-caas-gy-arm-az4", + "regionName": "gy", + "defaultTenantId": "244177614494719500", + "alarmConfig": { + "enableAlarm": true, + "minInsStartInterval": 15, + "minInsCheckInterval": 15, + "alarmLogConfig": { + "enableAlarm": true, + "filepath": "/home/snuser/alarms", + "level": "Info", + "tick": 0, + "first": 0, + "thereafter": 0, + "singlesize": 500, + "threshold": 3, + "disable": false + }, + "xiangYunFourConfig": { + "site": "aaa.bbb", + "tenantID": "T014", + "applicationID": "aaa.bbb", + "serviceID": "aaa.bbb" + } + }, + "runtime": { + "systemAuthConfig": { + "enable": true, + "accessKey": "", + "secretKey": "" + }, + "logConfig": { + "filepath": "/opt/yuanrong/logs", + "level": "INFO", + "tick": 0, + "first": 0, + "thereafter": 0, + "singlesize": 100, + "threshold": 10, + "disable": false + }, + "enableServerMode": true, + "enableSigaction": false + }, + "nodeSelector": { + "node-role": "edge" + }, + "http": { + "resptimeout": 5, + "workerInstanceReadTimeOut": 600, + "maxRequestBodySize": 6 + }, + "httpsConfig": { + "httpsEnable": false, + "tlsProtocol": "TLSv1.2", + "tlsCiphers": "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384" + }, + "localAuth": { + "aKey": "", + "sKey": "" + }, + "businessType": 0, + "systemAuth": "", + "diskMonitorEnable": false, + "authenticationEnable": false, + "authConfig": { + "saConfig": { + "serviceId": "aaa.bbb", + "publicKeyUrl": "", + "acceptLeeWayInSeconds": 5, + "clientIdWhiteList": [ + "2010000192" + ] + }, + "clientTokenAuthDomain": "", + "oauthConfig": { + "domain": "", + "clientId": "101489783", + "clientSecret": "", + "dmqConfig": { + "messageUrl": "", + "timeout": 10, + "getMessageSleepMills": 500, + "receiptMessageUrl": "", + "dmqPullMessageRoutineNum": 0, + "dmqRoutineNum": 0, + "topic": "oauth2.atinvalid" + } + }, + "authUserServiceIds": [ + "EyeAdjustment", + "WallPaperAIGC" + ], + "policyAuthConfig": { + "tenant_group": "{\"system\":{\"0\":[\"func0\"]},\"external\":{}}", + "whitelist": "{}", + "policy": { + "allow": "{\"invoke\":{\"external\":{\"external\":[\"white_list\",\"=\"]}}}", + "deny": "{\"tenant_list\":[]}" + } + }, + "crossClusterAuthConfig": { + "enable": false, + "encryptedSecret": { + "" + } + } + }, + "shareKeys": { + "accessKey": "" + }, + "trafficLimitDisable": true, + "functionCapability": 1, + "urpcConfig": { + "enabled": false, + "port": 19996, + "workerNum": 10, + "pollingNum": 0, + "poolSize": 500000 + }, + "smsConfig": { + "accessKey": "", + "secretKey": "", + "masterKeySk": "", + "urpcEnable": false, + "skExpireTime": 600, + "authServiceSaId": "clouddevelopproxy", + "saIdPermissions": "{\"media_cloud_enhance_service_Camera\": {\"appId\":\"102190845\", \"projectId\":\"244177614494719500\"}, \"media_cloud_enhance_service_WallPaperAIGC\": {\"appId\":\"102190845\", \"projectId\":\"244177614494719500\"}, \"media_cloud_enhance_service_EyeAdjustment\": {\"appId\":\"102190845\", \"projectId\":\"244177614494719500\"}}", + "delaySessionTTL": 0 + }, + "taskServerAuthConfig": { + "enable": false, + "accessKey": "", + "secretKey": "" + }, + "dataSystemConfig": { + "isAuthEnable": false, + "uploadWriteMode": "NoneL2Cache", + "executeWriteMode": "NoneL2Cache", + "uploadTTLSec": 86400, + "executeTTLSec": 1800, + "timeoutMs": 60000, + "cluster": "" + }, + "dataEncryptConfig": { + "enable": true, + "tenantMap": { + } + }, + "autoScaleConfig": { + "slaQuota": 1000, + "scaleDownTime": 60000, + "burstScaleNum": 1000 + }, + "leaseSpan": 5000, + "functionLimitRate": 5000, + "dockerRootPath": "/var/lib/docker", + "routerEtcd": { + "servers": ["{{ .Values.global.etcdManagement.detcd }}"], + {{- if eq .Values.global.etcdManagement.authType "TLS" }} + "sslEnable": true, + {{- else }} + "sslEnable": false, + {{- end}} + "user":"", + "password":"", + "authType": {{ quote .Values.global.etcdManagement.authType }}, + "useSecret": {{ .Values.global.etcdManagement.useSecret }}, + "secretName": {{ quote .Values.global.etcdManagement.secretName }} + }, + "metaEtcd": { + "servers": ["{{ .Values.global.etcdManagement.detcd }}"], + {{- if eq .Values.global.etcdManagement.authType "TLS" }} + "sslEnable": true, + {{- else }} + "sslEnable": false, + {{- end}} + "user":"", + "password":"", + "authType": {{ quote .Values.global.etcdManagement.authType }}, + "useSecret": {{ .Values.global.etcdManagement.useSecret }}, + "secretName": {{ quote .Values.global.etcdManagement.secretName }} + }, + "tlsConfig": { + "caContent": "${CA_CONTENT}", + "keyContent": "${KEY_CONTENT}", + "certContent": "${CERT_CONTENT}" + }, + "defaultTenantLimitQuota": 1800, + "memoryEvaluatorConfig": { + "requestMemoryEvaluator": 2 + }, + "memoryControlConfig": { + "lowerMemoryPercent": 0.6, + "highMemoryPercent": 0.8, + "statefulHighMemoryPercent": 0.85, + "bodyThreshold": 40000, + "memDetectIntervalMs": 20 + } + } \ No newline at end of file diff --git a/deploy/k8s/charts/templates/frontend/frontend-lb.yaml b/deploy/k8s/charts/templates/frontend/frontend-lb.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f6130dcdc006f843f3a4d6a81a20204ee3b037a --- /dev/null +++ b/deploy/k8s/charts/templates/frontend/frontend-lb.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + kubernetes.io/elb.id: {{ quote .Values.global.network.elbId }} + labels: + app: faas-frontend + name: faas-frontend-lb + namespace: {{ .Values.global.namespace }} +spec: + externalTrafficPolicy: Cluster + type: LoadBalancer + selector: + app: faas-frontend + ports: + - name: faas-frontend-lb + protocol: TCP + port: {{ .Values.global.port.faasFrontendNodePort }} + targetPort: {{ .Values.global.port.fassFrontendPort }} + nodePort: {{ .Values.global.port.faasFrontendNodePort }} \ No newline at end of file diff --git a/deploy/k8s/charts/templates/frontend/frontend-service.yaml b/deploy/k8s/charts/templates/frontend/frontend-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3de812cd9c8071c0dbd57060a3f12fc2d100d816 --- /dev/null +++ b/deploy/k8s/charts/templates/frontend/frontend-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: faas-frontend + labels: + app: faas-frontend + namespace: {{ .Values.global.namespace }} +spec: + type: ClusterIP + ports: + - port: {{ .Values.global.port.fassFrontendPort }} + protocol: TCP + targetPort: {{ .Values.global.port.fassFrontendPort }} + selector: + app: faas-frontend diff --git a/deploy/k8s/charts/templates/frontend/frontend.yaml b/deploy/k8s/charts/templates/frontend/frontend.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2dfa5e97a14b9d2a30ac01c8eab05dbba49edfea --- /dev/null +++ b/deploy/k8s/charts/templates/frontend/frontend.yaml @@ -0,0 +1,456 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: faas-frontend + namespace: {{ .Values.global.namespace }} +spec: + replicas: {{ .Values.global.replicas.faasFrontend }} + selector: + matchLabels: + app: faas-frontend + template: + metadata: + labels: + app: faas-frontend + spec: + volumes: + - name: iam-policy-config-volume + configMap: + name: iam-policy-config + items: + - key: iam-policy-config.json + path: iam-policy-config.json + defaultMode: 416 + - name: volume-config + configMap: + name: frontend-config + items: + - key: config.json + path: config.json + defaultMode: 420 + - name: log-volume + hostPath: + path: "{{ .Values.global.log.hostPath.componentLog }}" + type: DirectoryOrCreate + - name: vol-home-uds + hostPath: + path: /home/uds + type: "" + - name: data-volume + emptyDir: + sizeLimit: 5Gi + initContainers: + - name: agent-init-container + image: {{ .Values.global.imageRegistry }}{{ .Values.global.images.agentInit }} + command: ["/bin/sh", "-c", "chown 1002:1002 /opt/yuanrong/logs && chmod 777 /opt/yuanrong/logs"] + resources: + limits: + cpu: {{ .Values.global.resources.functionAgentInit.limits.cpu }} + memory: {{ .Values.global.resources.functionAgentInit.limits.memory }} + requests: + cpu: {{ .Values.global.resources.functionAgentInit.requests.cpu }} + memory: {{ .Values.global.resources.functionAgentInit.requests.memory }} + volumeMounts: + - name: log-volume + mountPath: /opt/yuanrong/logs + subPathExpr: $(RUNTIME_POD_NAME) + env: + - name: RUNTIME_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: IfNotPresent + securityContext: + capabilities: + add: + - CHOWN + - NET_RAW + - NET_ADMIN + - SYS_ADMIN + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + - FOWNER + - FSETID + drop: + - ALL + runAsUser: 0 + containers: + - name: service-container + image: {{ .Values.global.imageRegistry }}{{ .Values.global.images.common }} + command: ["/bin/sh", "-l", "/home/sn/frontend/bin/bootstrap"] + ports: + - containerPort: {{ .Values.global.port.fassFrontendPort }} + protocol: TCP + env: + - name: RUNTIME_HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: RUNTIME_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: RUNTIME_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: RUNTIME_POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: RUNTIME_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: RUNTIME_POD_UID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.uid + - name: X_WISECLOUD_SITE + value: aaa.bbb + - name: X_WISECLOUD_TENANT_ID + value: aaa.bbb + - name: X_WISECLOUD_APPLICATION_ID + value: aaa.bbb + - name: X_WISECLOUD_SERVICE_ID + value: aaa.bbb + - name: X_WISECLOUD_ENVIRONMENT_ID + value: a506e551a0834d1daccd7d93cb202ae4 + - name: RUNTIME_MICROSERVICE_NAME + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_CLUSTER + value: cn-dev-scheduler-green + - name: LANG + value: en_US.UTF-8 + - name: RUNTIME_MICROSERVICE_ENVIRONMENT + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_SERVICE_NAME + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_BUSINESS + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_REGION + value: cn-north-4 + - name: RUNTIME_MICROSERVICE_AZ + value: cn-north-4g + - name: X_WISECLOUD_CLOUDMAP_ID + value: aaa.bbb + - name: NUWA_CLOUDMAP_NAMESPACENAME + value: aaa.bbb + - name: NUWA_CLOUDMAP_SERVERADDR + value: http:// + - name: NUWA_CLOUDMAP_DUAL_SERVERADDR + value: http:// + - name: TZ + value: Asia/Shanghai + - name: WISECLOUD_ACMS_ENDPOINT + value: "" + - name: CURRENT_VERSION + value: green + - name: YR_LOG_LEVEL + value: DEBUG + - name: RUNTIME_MGR_PORT + value: "{{ .Values.global.port.runtimeMgrPort }}" + - name: RUNTIME_INIT_PORT + value: "{{ .Values.global.port.runtimeInitPort }}" + - name: RUNTIME_PORT_NUM + value: "{{ .Values.global.port.runtimePortNum }}" + - name: FUNCTION_AGENT_PORT + value: "{{ .Values.global.port.functionAgentPort }}" + - name: METRICS_COLLECTOR_TYPE + value: {{ .Values.global.runtime.metricsCollectorType }} + - name: DISK_USAGE_MONITOR_PATH + value: {{ quote .Values.global.runtime.diskUsageMonitor.path }} + - name: DISK_USAGE_LIMIT + value: "{{ .Values.global.runtime.diskUsageMonitor.limit }}" + - name: DISK_USAGE_MONITOR_DURATION + value: "{{ .Values.global.runtime.diskUsageMonitor.duration }}" + - name: CPU4COMP + value: "5000" + - name: MEM4COMP + value: "10240" + - name: INIT_LABELS + value: '{"resource.owner":"30450000-0000-4000-8069-949f37caf04c"}' + - name: RUNTIME_LOG_DIR + value: /opt/yuanrong/logs + - name: RUNTIME_LOG_LEVEL + value: INFO + - name: IS_NEW_RUNTIME_PATH + value: "true" + - name: JAVA_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.java8 }}" + - name: JAVA11_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.java11 }}" + - name: PYTHON36_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.python36 }}" + - name: PYTHON38_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.python38 }}" + - name: PYTHON39_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.python39 }}" + - name: CPP_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.cpp }}" + - name: JVM_CUSTOM_ARGS + value: "{{ .Values.global.runtime.jvmCustomArgs }}" + - name: RUNTIME_GID + value: "1002" + - name: RUNTIME_UID + value: "1002" + - name: INIT_HANDLER + value: faasfrontend.InitHandler + - name: CALL_HANDLER + value: faasfrontend.CallHandler + - name: CHECKPOINT_HANDLER + value: faasfrontend.CheckpointHandler + - name: SHUTDOWN_HANDLER + value: faasfrontend.ShutdownHandler + - name: SIGNAL_HANDLER + value: faasfrontend.SignalHandler + - name: YR_FUNCTION_LIB_PATH + value: /home/sn/frontend/bin + - name: INIT_ARGS_FILE_PATH + value: /home/sn/config/config.json + - name: GLOG_log_dir + value: /opt/yuanrong/logs + - name: LOG_PATTERN + value: |- + { + "separator": " | ", + "placeholders": [ + {"flags": "%Y-%m-%d %H:%M:%S.%e"}, + {"flags": "%l"}, + {"flags": "%s:%#"}, + {"env": "POD_NAME"}, + {"env": "CLUSTER_ID"}, + {"flags": ""} + ] + } + - name: IS_PROTOMSG_TO_RUNTIME + value: "{{ .Values.global.runtime.isProtoMsgToRuntime }}" + - name: MAX_PRIORITY + value: "{{ .Values.global.common.prioritySchedule.maxPriority }}" + - name: CLUSTER_NAME + value: "cn-dev" + - name: FUNCTION_PROXY_PORT + value: "{{ .Values.global.port.functionProxyPort }}" + - name: FUNCTION_PROXY_GRPC_PORT + value: "{{ .Values.global.port.functionProxyGrpcPort }}" + - name: META_STORE_ADDRESS + {{- if .Values.global.metaStore.enable }} + value: "{{ .Values.global.metaStore.address }}" + {{- else }} + value: "{{ .Values.global.etcdManagement.detcd }}" + {{- end }} + - name: IAM_META_STORE_ADDRESS + {{- if .Values.global.metaStore.enable }} + value: "{{ .Values.global.metaStore.address }}" + {{- else }} + value: "{{ .Values.global.etcdManagement.metcd }}" + {{- end }} + - name: SslTargetName + value: "aaa.bbb" + - name: DS_WORKER_PORT + value: "{{ .Values.global.port.worker }}" + - name: ENABLE_TRACE + value: "{{ .Values.global.observer.enableTrace }}" + - name: LOG_PATH + value: "/opt/yuanrong/logs" + - name: LOG_LEVEL + value: "DEBUG" + - name: LOG_ROLLING_MAXSIZE + value: "1000" + - name: LOG_ROLLING_MAXFILES + value: "3" + - name: LOG_ASYNC_LOGBUFSECS + value: "30" + - name: LOG_ASYNC_MAXQUEUESIZE + value: "51200" + - name: LOG_ASYNC_THREADCOUNT + value: "1" + - name: LOG_ALSOLOGTOSTDERR + value: "false" + - name: ENABLE_METRICS + value: "{{ .Values.global.observer.metrics.enable }}" + - name: STS_CONFIG + value: "{}" + - name: METRICS_CONFIG_FILE + value: '{{ quote .Values.global.observer.metrics.metricsConfigFile }}' + - name: MEM_THRESHOLD_PERCENTAGE + value: "90" + - name: ResourcePath + value: "/home/wisfunction/resource" + - name: RUNTIME_HEARTBEAT_ENABLE + value: "true" + - name: RUNTIME_MAX_HEARTBEAT_TIMEOUT_TIMES + value: "{{ .Values.global.runtime.runtimeMaxHeartbeatTimeoutTimes }}" + - name: MAX_STORAGE_OPERATE_RETRY_TIMES + value: "60" + - name: RUNTIME_HEARTBEAT_TIMEOUT_MS + value: "{{ .Values.global.runtime.runtimeHeartbeatTimeoutMS }}" + - name: RUNTIME_RECOVER_ENABLE + value: "false" + - name: DRIVER_ENABLE + value: "{{ .Values.global.common.driverEnable }}" + - name: STATE_STORAGE_TYPE + value: "{{ .Values.global.common.stateStorageType }}" + - name: ELECTION_MODE + value: "{{ .Values.global.common.electionMode }}" + - name: MAX_GRPC_SIZE + value: "{{ .Values.global.common.maxGrpcSize }}" + - name: DS_HEALTH_CHECK_INTERVAL + value: "1000" + - name: MAX_DS_HEALTH_CHECK_TIMES + value: "12" + - name: DS_HEALTH_CHECK_PATH + value: "/home/sn/datasystem/health" + - name: SERVICES_PATH + value: "/home/sn/service-config/services.yaml" + - name: SYSTEM_TIMEOUT + value: "{{ .Values.global.common.systemTimeout }}" + - name: SERVICE_TTL + value: "60000" + - name: RUNTIME_SHUTDOWN_TIMEOUT_SECONDS + value: "{{ .Values.global.runtime.runtimeShutdownTimeoutSeconds }}" + - name: CACHE_STORAGE_AUTH_ENABLE + value: "{{ .Values.global.dataSystem.authEnabled }}" + - name: SSL_ENABLE + value: "{{ .Values.global.mutualSSLConfig.sslEnable }}" + - name: DECRYPT_ALGORITHM + value: "{{ .Values.global.common.decryptAlgorithm }}" + - name: MIN_INSTANCE_CPU_SIZE + value: "{{ .Values.global.runtime.minInstanceCpuSize }}" + - name: MIN_INSTANCE_MEMORY_SIZE + value: "{{ .Values.global.runtime.minInstanceMemorySize }}" + - name: MAX_INSTANCE_CPU_SIZE + value: "{{ .Values.global.runtime.maxInstanceCpuSize }}" + - name: MAX_INSTANCE_MEMORY_SIZE + value: "{{ .Values.global.runtime.maxInstanceMemorySize }}" + - name: ENABLE_SERVER_MODE + value: "{{ .Values.global.runtime.serverModeEnable }}" + - name: LOG_COMPRESS_ENABLE + value: "true" + - name: ENABLE_PRINT_RESOURCE_VIEW + value: "{{ .Values.global.common.enablePrintResourceView }}" + - name: PROMETHEUS_PUSH_GATEWAY_IP + value: "{{ .Values.global.observer.proGatewayIP }}" + - name: PROMETHEUS_PUSH_GATEWAY_PORT + value: "{{ .Values.global.observer.gatewayPort }}" + - name: CLUSTER_ID + value: {{ quote .Values.global.clusterId }} + - name: INVOKE_LIMITATION_ENABLE + value: "{{ .Values.global.rateLimit.invokeRateLimit.enable }}" + - name: LOW_MEMORY_THRESHOLD + value: "{{ .Values.global.rateLimit.invokeRateLimit.lowThreshold }}" + - name: HIGH_MEMORY_THRESHOLD + value: "{{ .Values.global.rateLimit.invokeRateLimit.highThreshold }}" + - name: MESSAGE_SIZE_THRESHOLD + value: "{{ .Values.global.rateLimit.invokeRateLimit.msgSize }}" + - name: RUNTIME_DS_AUTH_ENABLE + value: "{{ .Values.global.runtime.dataSystem.authEnable }}" + - name: RUNTIME_DS_ENCRYPT_ENABLE + value: {{ quote .Values.global.runtime.dataSystem.encryptEnable }} + - name: RUNTIME_DS_CLIENT_PUBLICKEY + value: "" + - name: RUNTIME_DS_CLIENT_PRIVATEKEY + value: "" + - name: RUNTIME_DS_SERVER_PUBLICKEY + value: "" + - name: ETCD_AUTH_TYPE + value: "{{ .Values.global.etcdManagement.authType }}" + - name: SCC_ENABLE + value: "{{ .Values.global.scc.enable }}" + - name: IAM_POLICY_CONFIG_PATH + value: "/home/sn/iam-config/iam-policy-config.json" + - name: K8S_BASE_URL + value: "{{ .Values.global.kubernetes.kubeApiBaseUrl }}" + - name: K8S_NAMESPACE + value: "default" + - name: MAX_TOLERATE_META_STORE_FAILED_TIMES + value: "{{ .Values.global.etcdManagement.maxTolerateMetaStoreFailedTimes }}" + - name: META_HEALTH_CHECK_INTERVAL_MS + value: "{{ .Values.global.etcdManagement.metaStoreCheckHealthIntervalMs }}" + - name: META_HEALTH_CHECK_TIMEOUTS + value: "{{ .Values.global.etcdManagement.metaStoreTimeoutMs }}" + - name: ENABLE_IAM + value: "{{ .Values.global.iam.enable }}" + - name: IAM_CREDENTIAL_TYPE + value: "{{ .Values.global.iam.credentialType }}" + - name: IAM_BASE_PATH + value: "http://iam-adaptor.{{ .Values.global.namespace }}.svc.cluster.local:{{ .Values.global.port.iamAdapterHttpPort }}" + - name: SYSTEM_AUTH_MODE + value: "{{ .Values.global.common.systemAuthMode }}" + - name: ETCD_TARGET_NAME_OVERRIDE + value: "aaa.bbb" + - name: CACHE_STORAGE_AUTH_TYPE + value: "" + - name: FUNCTION_META_PATH + value: "/home/sn/function-metas" + - name: RESOURCE_PATH + value: "/home/sn/resource" + - name: LIB_PATH + value: "{{ .Values.global.runtime.libPath }}" + resources: + limits: + cpu: {{ .Values.global.resources.faasFrontend.limits.cpu }} + memory: {{ .Values.global.resources.faasFrontend.limits.memory }} + requests: + cpu: {{ .Values.global.resources.faasFrontend.requests.cpu }} + memory: {{ .Values.global.resources.faasFrontend.requests.memory }} + volumeMounts: + - name: volume-config + mountPath: /home/sn/config + - name: vol-home-uds + mountPath: /home/uds + - name: log-volume + mountPath: /opt/yuanrong/logs + subPathExpr: $(RUNTIME_POD_NAME) + - name: data-volume + mountPath: /opt/yuanrong/data + - name: iam-policy-config-volume + mountPath: /home/sn/iam-config + livenessProbe: + tcpSocket: + port: {{ .Values.global.port.fassFrontendPort }} + initialDelaySeconds: 6 + timeoutSeconds: 5 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + tcpSocket: + port: {{ .Values.global.port.fassFrontendPort }} + initialDelaySeconds: 6 + timeoutSeconds: 5 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 10 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - kill -15 $(ps aux | grep -i function_proxy | awk '{print $2}') 2>/dev/null + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 1002 + runAsNonRoot: true + restartPolicy: Always + terminationGracePeriodSeconds: 300 + dnsPolicy: ClusterFirst + securityContext: + fsGroup: {{ .Values.global.runtime.fsGroup }} + imagePullSecrets: + - name: default-secret + revisionHistoryLimit: 5 \ No newline at end of file diff --git a/deploy/k8s/charts/templates/function_proxy/curve-secret.yaml b/deploy/k8s/charts/templates/function_proxy/curve-secret.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d73b5bd46dc0b000587549bdcdd7a12326991f19 --- /dev/null +++ b/deploy/k8s/charts/templates/function_proxy/curve-secret.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Secret +metadata: + namespace: {{ .Values.global.namespace }} + name: curve-secret +type: Opaque +data: + client.key_secret: "{{ .Values.global.curve.clientPrivateKey }}" + client.key: "{{ .Values.global.curve.clientPublicKey }}" + worker.key: "{{ .Values.global.curve.workerPublicKey }}" \ No newline at end of file diff --git a/deploy/k8s/charts/templates/function_proxy/function-proxy-configmap.yaml b/deploy/k8s/charts/templates/function_proxy/function-proxy-configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b09d6e03d085f6e350050c0cfd282a2f1c2b356 --- /dev/null +++ b/deploy/k8s/charts/templates/function_proxy/function-proxy-configmap.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +metadata: + name: function-proxy-config + namespace: {{ .Values.global.namespace }} +kind: ConfigMap +data: + redisConfig.json: |- + { + "serverAddr": "{{ .Values.global.redisManagement.serverAddr }}", + "password": "{{ .Values.global.redisManagement.password }}", + "port": {{ .Values.global.redisManagement.port }}, + "connectTimeout": {{ .Values.global.redisManagement.connectTimeout }}, + "commandTimeout": {{ .Values.global.redisManagement.commandTimeout }}, + "needAuth": {{ .Values.global.redisManagement.needAuth }} + } diff --git a/deploy/k8s/charts/templates/function_proxy/function-proxy-daemonset.yaml b/deploy/k8s/charts/templates/function_proxy/function-proxy-daemonset.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62c992fd2cac078fea1edf9c477390c4c67192d8 --- /dev/null +++ b/deploy/k8s/charts/templates/function_proxy/function-proxy-daemonset.yaml @@ -0,0 +1,413 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + app: function-proxy + name: function-proxy + namespace: {{ .Values.global.namespace }} +spec: + selector: + matchLabels: + app: function-proxy + updateStrategy: + rollingUpdate: + maxUnavailable: 1 + type: RollingUpdate + template: + metadata: + labels: + app: function-proxy + spec: + dnsPolicy: ClusterFirstWithHostNet + dnsConfig: + options: + - name: single-request-reopen + hostNetwork: true + restartPolicy: Always + imagePullSecrets: + - name: default-secret + securityContext: + fsGroup: 1002 + serviceAccountName: function-proxy + {{- if .Values.global.enableNonPreemptive }} + priorityClassName: proxy-custom-priority-nonpreempting-{{ .Values.global.namespace }} + {{- else }} + priorityClassName: system-cluster-critical + {{- end }} + {{- if .Values.global.functionProxy.nodeSelector }} + {{- with .Values.global.functionProxy.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + {{- if .Values.global.functionProxy.nodeAffinity }} + {{- with .Values.global.functionProxy.nodeAffinity }} + affinity: + nodeAffinity: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- end }} + {{- if .Values.global.controlPlane.tolerations }} + {{- with .Values.global.controlPlane.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + volumes: + - name: local-time + hostPath: + path: /etc/localtime + - emptyDir: { } + name: metrics-parent-dir + - emptyDir: { } + name: metrics-dir + - name: varlog + hostPath: + path: "{{ .Values.global.log.hostPath.componentLog }}" + type: DirectoryOrCreate + - configMap: + defaultMode: 0440 + items: + - key: redisConfig.json + path: conf.json + name: function-proxy-config + name: function-proxy-config + {{- if .Values.global.iam.enable }} + - configMap: + defaultMode: 0440 + items: + - key: iam-policy-config.json + path: iam-policy-config.json + name: iam-policy-config + name: iam-policy-config + {{- end }} + - name: log-volume + hostPath: + path: "{{ .Values.global.log.hostPath.componentLog }}" + type: DirectoryOrCreate + initContainers: + - name: function-proxy-init + image: {{ .Values.global.imageRegistry }}{{ .Values.global.images.agentInit }} + command: [ "/bin/sh", "-c", "chown 1002:1002 /opt/yuanrong/logs && chmod 777 /opt/yuanrong/logs" ] + env: + - name: RUNTIME_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + resources: + limits: + cpu: {{ .Values.global.resources.functionAgentInit.limits.cpu }} + memory: {{ .Values.global.resources.functionAgentInit.limits.memory }} + requests: + cpu: {{ .Values.global.resources.functionAgentInit.requests.cpu }} + memory: {{ .Values.global.resources.functionAgentInit.requests.memory }} + volumeMounts: + - name: log-volume + mountPath: /opt/yuanrong/logs + subPathExpr: $(RUNTIME_POD_NAME) + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: IfNotPresent + securityContext: + capabilities: + add: + - CHOWN + - NET_RAW + - NET_ADMIN + - SYS_ADMIN + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + - FOWNER + - FSETID + drop: + - ALL + runAsUser: 0 + containers: + - name: function-proxy + image: {{ .Values.global.imageRegistry }}{{ .Values.global.images.common }} + imagePullPolicy: IfNotPresent + command: ["/bin/sh", "-l", "/home/sn/function-proxy/bin/bootstrap"] + env: + - name: NODE_ID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: MEM_LIMIT + valueFrom: + resourceFieldRef: + containerName: function-proxy + resource: limits.memory + - name: FUNCTION_PROXY_PORT + value: "{{ .Values.global.port.functionProxyPort }}" + - name: FUNCTION_PROXY_GRPC_PORT + value: "{{ .Values.global.port.functionProxyGrpcPort }}" + - name: META_STORE_ADDRESS + {{- if .Values.global.metaStore.enable }} + value: "{{ .Values.global.metaStore.address }}" + {{- else }} + value: "{{ .Values.global.etcdManagement.detcd }}" + {{- end }} + - name: IAM_META_STORE_ADDRESS + {{- if .Values.global.metaStore.enable }} + value: "{{ .Values.global.metaStore.address }}" + {{- else }} + value: "{{ .Values.global.etcdManagement.metcd }}" + {{- end }} + - name: CLUSTER_ID + value: {{ quote .Values.global.clusterId }} + - name: DS_WORKER_PORT + value: "{{ .Values.global.port.worker }}" + - name: CACHE_STORAGE_AUTH_AK + value: {{ quote .Values.global.dataSystem.ak}} + - name: CACHE_STORAGE_AUTH_SK + value: {{ quote .Values.global.dataSystem.sk}} + - name: CACHE_STORAGE_AUTH_TYPE + value: {{ quote .Values.global.dataSystem.authType}} + - name: PROMETHEUS_PUSH_GATEWAY_IP + value: {{ quote .Values.global.observer.proGatewayIP }} + - name: PROMETHEUS_PUSH_GATEWAY_PORT + value: {{ quote .Values.global.observer.gatewayPort }} + - name: LOG_PATH + value: {{ .Values.global.log.functionSystem.path }} + - name: LOG_LEVEL + value: {{ .Values.global.log.functionSystem.level }} + - name: LOG_PATTERN + value: {{ quote .Values.global.log.functionSystem.pattern }} + - name: LOG_COMPRESS_ENABLE + value: {{ quote .Values.global.log.functionSystem.compress }} + - name: LOG_ROLLING_MAXSIZE + value: {{ quote .Values.global.log.functionSystem.rolling.maxSize }} + - name: LOG_ROLLING_MAXFILES + value: {{ quote .Values.global.log.functionSystem.rolling.maxfiles }} + - name: LOG_ASYNC_LOGBUFSECS + value: {{ quote .Values.global.log.functionSystem.async.logBufSecs }} + - name: LOG_ASYNC_MAXQUEUESIZE + value: "51200" + - name: LOG_ASYNC_THREADCOUNT + value: "1" + - name: LOG_ALSOLOGTOSTDERR + value: "false" + - name: RUNTIME_INSTANCE_DEBUG_ENABLE + value: "false" + - name: ENABLE_METRICS + value: {{ quote .Values.global.observer.metrics.enable }} + - name: METRICS_CONFIG + value: {{ quote .Values.global.observer.metrics.metricsConfig }} + - name: METRICS_CONFIG_FILE + value: {{ quote .Values.global.observer.metrics.metricsConfigFile }} + - name: ENABLE_TRACE + value: {{ quote .Values.global.observer.trace.enable }} + - name: TRACE_CONFIG + value: {{ quote .Values.global.observer.trace.traceConfig }} + - name: MEM_THRESHOLD_PERCENTAGE + value: "90" + - name: HOME + value: /home/sn + - name: ResourcePath + value: /home/sn/resource + - name: RUNTIME_HEARTBEAT_ENABLE + value: "true" + - name: RUNTIME_MAX_HEARTBEAT_TIMEOUT_TIMES + value: {{ quote .Values.global.runtime.runtimeMaxHeartbeatTimeoutTimes }} + - name: RUNTIME_HEARTBEAT_TIMEOUT_MS + value: {{ quote .Values.global.runtime.runtimeHeartbeatTimeoutMS }} + - name: DRIVER_ENABLE + value: {{ quote .Values.global.common.driverEnable }} + - name: STATE_STORAGE_TYPE + value: {{ quote .Values.global.common.stateStorageType }} + - name: ELECTION_MODE + value: {{ quote .Values.global.common.electionMode }} + - name: MAX_GRPC_SIZE + value: {{ quote .Values.global.common.maxGrpcSize }} + - name: SERVICES_PATH + value: /home/sn/service-config/services.yaml + - name: ENABLE_IAM + value: "{{ .Values.global.iam.enable }}" + - name: ENABLE_SERVER_MODE + value: "{{ .Values.global.runtime.serverModeEnable }}" + - name: ENABLE_PRINT_RESOURCE_VIEW + value: {{ quote .Values.global.common.enablePrintResourceView }} + - name: SYSTEM_TIMEOUT + value: {{ quote .Values.global.common.systemTimeout }} + - name: RUNTIME_SHUTDOWN_TIMEOUT_SECONDS + value: {{ quote .Values.global.runtime.runtimeShutdownTimeoutSeconds }} + - name: DECRYPT_ALGORITHM + value: {{ quote .Values.global.common.decryptAlgorithm }} + - name: CACHE_STORAGE_AUTH_ENABLE + value: {{ quote .Values.global.dataSystem.authEnabled }} + - name: MIN_INSTANCE_CPU_SIZE + value: {{ quote .Values.global.runtime.minInstanceCpuSize }} + - name: MIN_INSTANCE_MEMORY_SIZE + value: {{ quote .Values.global.runtime.minInstanceMemorySize }} + - name: MAX_INSTANCE_CPU_SIZE + value: {{ quote .Values.global.runtime.maxInstanceCpuSize }} + - name: MAX_INSTANCE_MEMORY_SIZE + value: {{ quote .Values.global.runtime.maxInstanceMemorySize }} + - name: SSL_ROOT_FILE + value: "ca.crt" + - name: SSL_CERT_FILE + value: "module.crt" + - name: SSL_KEY_FILE + value: "module.key" + - name: SSL_PWD_FILE + value: "cert_pwd" + - name: SCC_ENABLE + value: {{ quote .Values.global.scc.enable }} + - name: SCC_ALGORITHM + value: {{ quote .Values.global.scc.algorithm }} + - name: SCC_PRIMARY_FILE + value: "primary.ks" + - name: SCC_STANDBY_FILE + value: "standby.ks" + - name: ETCD_AUTH_TYPE + value: {{ quote .Values.global.etcdManagement.authType }} + - name: ETCD_ROOT_CA_FILE + value: "ca.crt" + - name: ETCD_CERT_FILE + value: "client.crt" + - name: ETCD_KEY_FILE + value: "client.key" + - name: ETCD_PWD_FILE + value: "passphrase" + - name: ETCD_TARGET_NAME_OVERRIDE + value: {{ quote .Values.global.etcdManagement.targetNameOverride }} + - name: MAX_TOLERATE_META_STORE_FAILED_TIMES + value: {{ quote .Values.global.etcdManagement.maxTolerateMetaStoreFailedTimes }} + - name: META_HEALTH_CHECK_INTERVAL_MS + value: {{ quote .Values.global.etcdManagement.metaStoreCheckHealthIntervalMs }} + - name: META_HEALTH_CHECK_TIMEOUTS + value: {{ quote .Values.global.etcdManagement.metaStoreTimeoutMs }} + - name: INVOKE_LIMITATION_ENABLE + value: {{ quote .Values.global.rateLimit.invokeRateLimit.enable }} + - name: LOW_MEMORY_THRESHOLD + value: {{ quote .Values.global.rateLimit.invokeRateLimit.lowThreshold }} + - name: HIGH_MEMORY_THRESHOLD + value: {{ quote .Values.global.rateLimit.invokeRateLimit.highThreshold }} + - name: MESSAGE_SIZE_THRESHOLD + value: {{ quote .Values.global.rateLimit.invokeRateLimit.msgSize }} + - name: IAM_BASE_PATH + value: "http://iam-adaptor.{{ .Values.global.namespace }}.svc.cluster.local:{{ .Values.global.port.iamAdapterHttpPort }}" + - name: IAM_POLICY_CONFIG_PATH + value: /home/sn/iam-config/iam-policy-config.json + - name: IAM_CREDENTIAL_TYPE + value: {{ quote .Values.global.iam.credentialType }} + - name: REDIS_CONF_PATH + value: /home/sn/conf/conf.json + - name: RUNTIME_DS_AUTH_ENABLE + value: {{ quote .Values.global.runtime.dataSystem.authEnable }} + - name: RUNTIME_DS_ENCRYPT_ENABLE + value: {{ quote .Values.global.runtime.dataSystem.encryptEnable }} + - name: STS_CONFIG + value: "{}" + - name: ENABLE_PRINT_PERF + value: "false" + - name: K8S_BASE_URL + value: {{ quote .Values.global.kubernetes.kubeApiBaseUrl }} + - name: K8S_NAMESPACE + value: {{ quote .Values.global.namespace }} + - name: MAX_PRIORITY + value: {{ quote .Values.global.common.prioritySchedule.maxPriority }} + - name: ENABLE_PREEMPTION + value: {{ quote .Values.global.common.prioritySchedule.enablePreemption }} + - name: SYSTEM_AUTH_MODE + value: {{ quote .Values.global.common.systemAuthMode }} + - name: SCC_BASE_PATH + value: /home/sn/resource/scc + - name: SCC_LOG_PATH + value: /home/sn/log + - name: FUNCTION_META_PATH + value: /home/sn/function-metas + - name: RESOURCE_PATH + value: /home/sn/resource + - name: ETCD_SSL_BASE_PATH + value: /home/sn/resource/etcd + - name: CURVE_KEY_PATH + value: /home/sn/curve + - name: ENABLE_META_STORE + value: {{ quote .Values.global.metaStore.enable }} + - name: META_STORE_MODE + value: {{ quote .Values.global.metaStore.mode }} + - name: ETCD_CLUSTER_ADDRESS + value: {{ quote .Values.global.etcdManagement.detcd }} + - name: SCHEDULE_PLUGINS + value: {{ quote .Values.global.common.schedulePlugins.local }} + ports: + - containerPort: {{ .Values.global.port.functionProxyPort }} + hostPort: {{ .Values.global.port.functionProxyPort }} + protocol: TCP + {{- if or (eq .Values.global.common.driverEnable true) (eq .Values.global.runtime.serverModeEnable true) }} + - containerPort: {{ .Values.global.port.functionProxyGrpcPort }} + hostPort: {{ .Values.global.port.functionProxyGrpcPort }} + protocol: TCP + {{- end}} + livenessProbe: + failureThreshold: 3 + tcpSocket: + port: {{ .Values.global.port.functionProxyPort }} + initialDelaySeconds: 1 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 5 + resources: + limits: + cpu: {{ .Values.global.resources.functionProxy.limits.cpu }} + memory: {{ .Values.global.resources.functionProxy.limits.memory }} + requests: + cpu: {{ .Values.global.resources.functionProxy.requests.cpu }} + memory: {{ .Values.global.resources.functionProxy.requests.memory }} + securityContext: + capabilities: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: {{ .Values.global.log.functionSystem.path }} + name: varlog + {{- if .Values.global.log.hostPath.enable }} + subPathExpr: $(POD_NAME) + {{- end}} + - mountPath: /etc/localtime + name: local-time + {{- if .Values.global.scc.enable }} + - name: scc-ks + mountPath: /home/sn/resource/scc + readOnly: true + {{- end }} + {{- if .Values.global.etcdManagement.useSecret }} + - name: etcd-client-certs + mountPath: /home/sn/resource/etcd + readOnly: true + {{- end }} + {{- if .Values.global.iam.enable }} + - name: iam-policy-config + mountPath: /home/sn/iam-config + readOnly: true + {{- end }} + - name: function-proxy-config + mountPath: /home/sn/conf + readOnly: true + - mountPath: /home/sn/metrics + name: metrics-parent-dir diff --git a/deploy/k8s/charts/templates/function_proxy/proxy-custom-priority.yaml b/deploy/k8s/charts/templates/function_proxy/proxy-custom-priority.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7de6e3624f7e5df8d44656c4ba138cc538327588 --- /dev/null +++ b/deploy/k8s/charts/templates/function_proxy/proxy-custom-priority.yaml @@ -0,0 +1,11 @@ +{{- if .Values.global.enableNonPreemptive }} +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: proxy-custom-priority-nonpreempting-{{ .Values.global.namespace }} + namespace: {{ .Values.global.namespace }} +value: 1000000 +preemptionPolicy: Never +globalDefault: false +description: "This priority class will not cause other pods to be preempted." +{{- end }} \ No newline at end of file diff --git a/deploy/k8s/charts/templates/function_proxy/role.yaml b/deploy/k8s/charts/templates/function_proxy/role.yaml new file mode 100644 index 0000000000000000000000000000000000000000..29003169321ddcfd3261c6fffae9bd18fde41350 --- /dev/null +++ b/deploy/k8s/charts/templates/function_proxy/role.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: function-proxy-{{ .Values.global.namespace }} + namespace: {{ .Values.global.namespace }} +rules: + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - create + - update \ No newline at end of file diff --git a/deploy/k8s/charts/templates/function_proxy/role_binding.yaml b/deploy/k8s/charts/templates/function_proxy/role_binding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..19323aceb71271a8bca7fb9dcac78a8418eb74f8 --- /dev/null +++ b/deploy/k8s/charts/templates/function_proxy/role_binding.yaml @@ -0,0 +1,13 @@ +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: function-proxy-{{ .Values.global.namespace }} + namespace: {{ .Values.global.namespace }} +subjects: +- kind: ServiceAccount + name: function-proxy + namespace: {{ quote .Values.global.namespace }} +roleRef: + kind: ClusterRole + name: function-proxy-{{ .Values.global.namespace }} + apiGroup: rbac.authorization.k8s.io diff --git a/deploy/k8s/charts/templates/function_proxy/service-account.yaml b/deploy/k8s/charts/templates/function_proxy/service-account.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbef0dbf49545a1117646e20918e9353a90932c0 --- /dev/null +++ b/deploy/k8s/charts/templates/function_proxy/service-account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: function-proxy + namespace: {{ .Values.global.namespace }} \ No newline at end of file diff --git a/deploy/k8s/charts/templates/iam/iam-policy-config.yaml b/deploy/k8s/charts/templates/iam/iam-policy-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e0c875fe4af438f7df72fc0356184e4a2e65efa --- /dev/null +++ b/deploy/k8s/charts/templates/iam/iam-policy-config.yaml @@ -0,0 +1,57 @@ +apiVersion: v1 +metadata: + name: iam-policy-config + namespace: {{ .Values.global.namespace }} +kind: ConfigMap +data: + iam-policy-config.json: |- + { + "tenant_group": { + "system": { + "0": ["0/0-system-faasfrontend/$latest","0/0-system-faasscheduler/$latest", + "0/0-system-faascontroller/$latest","0/0-system-faasmanager/$latest"] + }, + "external": { + } + }, + + "white_list": { + }, + + "policy": { + "allow": { + "create": { + "system": { + "external": [ "*" ], + "system": [ "*" ] + }, + "external": { + "external": [ "*" ] + } + }, + "invoke": { + "system": { + "system": [ "*" ], + "external": [ "*" ] + }, + "external": { + "external": [ "=" ], + "system": [ "*" ] + } + }, + "kill": { + "system": { + "system": [ "*" ], + "external": [ "*" ] + }, + "external": { + "external": [ "=" ] + } + } + }, + "deny": { + "tenant_list": [], + "user_list": [] + } + } + } diff --git a/deploy/k8s/charts/templates/iam/iam-service.yaml b/deploy/k8s/charts/templates/iam/iam-service.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c7fdd3093199ff94db415c36a03283d9906287e --- /dev/null +++ b/deploy/k8s/charts/templates/iam/iam-service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app: iam-adaptor + name: iam-adaptor + namespace: {{ .Values.global.namespace }} +spec: + ports: + - port: {{ .Values.global.port.iamAdapterHttpPort }} + protocol: TCP + targetPort: {{ .Values.global.port.iamAdapterHttpPort }} + selector: + app: iam-adaptor + clusterIP: None + sessionAffinity: None + type: ClusterIP diff --git a/deploy/k8s/charts/templates/iam/iam.yaml b/deploy/k8s/charts/templates/iam/iam.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b13c2ca6a29e38f6a81f9e0f4d9817298980a5b9 --- /dev/null +++ b/deploy/k8s/charts/templates/iam/iam.yaml @@ -0,0 +1,271 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: iam-adaptor + namespace: {{ .Values.global.namespace }} +spec: + replicas: {{ .Values.global.replicas.iamAdaptor }} + selector: + matchLabels: + app: iam-adaptor + template: + metadata: + labels: + app: iam-adaptor + spec: + volumes: + - name: log-volume + hostPath: + path: "{{ .Values.global.log.hostPath.componentLog }}" + type: DirectoryOrCreate + - name: data-volume + emptyDir: + sizeLimit: 5Gi + initContainers: + - name: agent-init-container + image: {{ .Values.global.imageRegistry }}{{ .Values.global.images.agentInit }} + command: ["/bin/sh", "-c", "chown 1002:1002 /opt/yuanrong/logs && chmod 777 /opt/yuanrong/logs"] + resources: + limits: + cpu: {{ .Values.global.resources.functionAgentInit.limits.cpu }} + memory: {{ .Values.global.resources.functionAgentInit.limits.memory }} + requests: + cpu: {{ .Values.global.resources.functionAgentInit.requests.cpu }} + memory: {{ .Values.global.resources.functionAgentInit.requests.memory }} + volumeMounts: + - name: log-volume + mountPath: /opt/yuanrong/logs + subPathExpr: $(RUNTIME_POD_NAME) + env: + - name: RUNTIME_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: IfNotPresent + securityContext: + capabilities: + add: + - CHOWN + - NET_RAW + - NET_ADMIN + - SYS_ADMIN + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + - FOWNER + - FSETID + drop: + - ALL + runAsUser: 0 + containers: + - name: service-container + image: {{ .Values.global.imageRegistry }}{{ .Values.global.images.common }} + command: ["/bin/sh", "-l", "/home/sn/iam-adaptor/bin/bootstrap"] + ports: + - containerPort: {{ .Values.global.port.iamAdapterHttpPort }} + protocol: TCP + env: + - name: RUNTIME_HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: RUNTIME_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: RUNTIME_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: RUNTIME_POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: RUNTIME_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: RUNTIME_POD_UID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.uid + - name: X_WISECLOUD_SITE + value: aaa.bbb + - name: X_WISECLOUD_TENANT_ID + value: aaa.bbb + - name: X_WISECLOUD_APPLICATION_ID + value: aaa.bbb + - name: X_WISECLOUD_SERVICE_ID + value: aaa.bbb + - name: X_WISECLOUD_ENVIRONMENT_ID + value: a506e551a0834d1daccd7d93cb202ae4 + - name: RUNTIME_MICROSERVICE_NAME + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_CLUSTER + value: cn-dev-iam-adaptor + - name: LANG + value: en_US.UTF-8 + - name: RUNTIME_MICROSERVICE_ENVIRONMENT + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_SERVICE_NAME + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_BUSINESS + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_REGION + value: cn-north-4 + - name: RUNTIME_MICROSERVICE_AZ + value: cn-north-4g + - name: X_WISECLOUD_CLOUDMAP_ID + value: aaa.bbb + - name: NUWA_CLOUDMAP_NAMESPACENAME + value: aaa.bbb + - name: NUWA_CLOUDMAP_SERVERADDR + value: + - name: NUWA_CLOUDMAP_DUAL_SERVERADDR + value: + - name: TZ + value: Asia/Shanghai + - name: WISECLOUD_ACMS_ENDPOINT + value: + - name: META_STORE_ADDRESS + {{- if .Values.global.metaStore.enable }} + value: "{{ .Values.global.metaStore.address }}" + {{- else }} + value: "{{ .Values.global.etcdManagement.metcd }}" + {{- end }} + - name: LOG_PATH + value: "/opt/yuanrong/logs" + - name: LOG_LEVEL + value: "DEBUG" + - name: CLUSTER_ID + value: {{ quote .Values.global.clusterId }} + - name: LOG_COMPRESS_ENABLE + value: "true" + - name: LOG_ROLLING_MAXSIZE + value: "1000" + - name: LOG_ROLLING_MAXFILES + value: "3" + - name: LOG_ASYNC_LOGBUFSECS + value: "30" + - name: LOG_ASYNC_MAXQUEUESIZE + value: "51200" + - name: LOG_ASYNC_THREADCOUNT + value: "1" + - name: LOG_ALSOLOGTOSTDERR + value: "false" + - name: DECRYPT_ALGORITHM + value: "{{ .Values.global.common.decryptAlgorithm }}" + - name: TOKEN_EXPIRED_TIME_SPAN + value: "{{ .Values.global.iam.tokenExpiredTimeSpan }}" + - name: IAM_LISTEN_PORT + value: "{{ .Values.global.port.iamAdapterHttpPort }}" + - name: ENABLE_IAM + value: "{{ .Values.global.iam.enable }}" + - name: IAM_CREDENTIAL_TYPE + value: "{{ .Values.global.iam.credentialType }}" + - name: CREDENTIAL_HOST_ADDRESS + value: "" + - name: MAX_STORAGE_OPERATE_RETRY_TIMES + value: "60" + - name: SYSTEM_AUTH_MODE + value: "{{ .Values.global.common.systemAuthMode }}" + - name: SSL_ENABLE + value: "{{ .Values.global.mutualSSLConfig.sslEnable }}" + - name: RESOURCE_PATH + value: "/home/sn/resource" + - name: SCC_ENABLE + value: "{{ .Values.global.scc.enable }}" + - name: SCC_ALGORITHM + value: "AES256_GCM" + - name: ETCD_AUTH_TYPE + value: "{{ .Values.global.etcdManagement.authType }}" + - name: K8S_BASE_URL + value: "{{ .Values.global.kubernetes.kubeApiBaseUrl }}" + - name: K8S_NAMESPACE + value: "default" + - name: MAX_TOLERATE_META_STORE_FAILED_TIMES + value: "{{ .Values.global.etcdManagement.maxTolerateMetaStoreFailedTimes }}" + - name: META_HEALTH_CHECK_INTERVAL_MS + value: "{{ .Values.global.etcdManagement.metaStoreCheckHealthIntervalMs }}" + - name: META_HEALTH_CHECK_TIMEOUTS + value: "{{ .Values.global.etcdManagement.metaStoreTimeoutMs }}" + - name: ELECTION_MODE + value: "{{ .Values.global.common.electionMode }}" + - name: ETCD_TARGET_NAME_OVERRIDE + value: "aaa.bbb" + - name: PERMANENT_CREDENTIAL_CONFIG_PATH + value: "/home/sn/permanent-credential-config.json" + - name: LOG_PATTERN + value: '{ + "separator": " | ", + "placeholders": [ + {"flags": "%Y-%m-%dT%H:%M:%S.%f"}, + {"flags": "%l"}, + {"flags": "%s:%#"}, + {"env": "POD_NAME"}, + {"env": "CLUSTER_ID"}, + {"flags": ""} + ] + }' + - name: STS_CONFIG + value: "{}" + resources: + limits: + cpu: {{ .Values.global.resources.iamAdapter.limits.cpu }} + memory: {{ .Values.global.resources.iamAdapter.limits.memory }} + requests: + cpu: {{ .Values.global.resources.iamAdapter.requests.cpu }} + memory: {{ .Values.global.resources.iamAdapter.requests.memory }} + volumeMounts: + - name: log-volume + mountPath: /opt/yuanrong/logs + subPathExpr: $(RUNTIME_POD_NAME) + - name: data-volume + mountPath: /opt/yuanrong/data + livenessProbe: + tcpSocket: + port: {{ .Values.global.port.iamAdapterHttpPort }} + initialDelaySeconds: 3 + timeoutSeconds: 5 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + tcpSocket: + port: {{ .Values.global.port.iamAdapterHttpPort }} + initialDelaySeconds: 3 + timeoutSeconds: 5 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 10 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - sleep 1 + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 1002 + runAsNonRoot: true + restartPolicy: Always + terminationGracePeriodSeconds: 300 + dnsPolicy: ClusterFirst + securityContext: + fsGroup: {{ .Values.global.runtime.fsGroup }} + imagePullSecrets: + - name: default-secret + schedulerName: default-scheduler \ No newline at end of file diff --git a/deploy/k8s/charts/templates/manager/manager-configmap.yaml b/deploy/k8s/charts/templates/manager/manager-configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac54d8caca0b5303647a433f510d42467b1e4cc4 --- /dev/null +++ b/deploy/k8s/charts/templates/manager/manager-configmap.yaml @@ -0,0 +1,66 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: manager-config + namespace: default +data: + config.json: |- + { + "authenticationEnable": false, + "enableHealthCheck": true, + "functionCapability": 1, + "leaseRenewMinute": 5, + "httpsEnable": false, + "routerEtcd": { + "servers": ["{{ .Values.global.etcdManagement.detcd }}"], + {{- if eq .Values.global.etcdManagement.authType "TLS" }} + "sslEnable": true, + {{- else }} + "sslEnable": false, + {{- end}} + "user":"", + "password":"", + "authType": {{ quote .Values.global.etcdManagement.authType }}, + "useSecret": {{ .Values.global.etcdManagement.useSecret }}, + "secretName": {{ quote .Values.global.etcdManagement.secretName }} + }, + "metaEtcd": { + "servers": ["{{ .Values.global.etcdManagement.detcd }}"], + {{- if eq .Values.global.etcdManagement.authType "TLS" }} + "sslEnable": true, + {{- else }} + "sslEnable": false, + {{- end}} + "user":"", + "password":"", + "authType": {{ quote .Values.global.etcdManagement.authType }}, + "useSecret": {{ .Values.global.etcdManagement.useSecret }}, + "secretName": {{ quote .Values.global.etcdManagement.secretName }} + }, + "sccConfig": { + "enable": {{ .Values.global.scc.enable }}, + "secretName": {{ quote .Values.global.scc.secretName }}, + "algorithm": {{ quote .Values.global.scc.algorithm }} + }, + "alarmConfig": { + "enableAlarm": false, + "minInsStartInterval": 15, + "minInsCheckInterval": 15, + "alarmLogConfig": { + "filepath": "/opt/yuanrong/logs/alarms", + "level": "Info", + "tick": 0, + "first": 0, + "thereafter": 0, + "singlesize": 500, + "threshold": 3, + "disable": false + }, + "xiangYunFourConfig": { + "site": "aaa.bbb", + "tenantID": "T014", + "applicationID": "aaa.bbb", + "serviceID": "aaa.bbb" + } + } + } \ No newline at end of file diff --git a/deploy/k8s/charts/templates/manager/manager.yaml b/deploy/k8s/charts/templates/manager/manager.yaml new file mode 100644 index 0000000000000000000000000000000000000000..036679ff6f416045e26fbc2dc502a73fb34cb017 --- /dev/null +++ b/deploy/k8s/charts/templates/manager/manager.yaml @@ -0,0 +1,450 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: faas-manager + namespace: {{ .Values.global.namespace }} +spec: + replicas: {{ .Values.global.replicas.faasManager }} + selector: + matchLabels: + app: faas-manager + template: + metadata: + labels: + app: faas-manager + spec: + volumes: + - name: volume-config + configMap: + name: manager-config + items: + - key: config.json + path: config.json + defaultMode: 420 + - name: iam-policy-config-volume + configMap: + name: iam-policy-config + items: + - key: iam-policy-config.json + path: iam-policy-config.json + defaultMode: 416 + - name: log-volume + hostPath: + path: "{{ .Values.global.log.hostPath.componentLog }}" + type: DirectoryOrCreate + - name: data-volume + emptyDir: + sizeLimit: 5Gi + initContainers: + - name: agent-init-container + image: {{ .Values.global.imageRegistry }}{{ .Values.global.images.agentInit }} + command: ["/bin/sh", "-c", "chown 1002:1002 /opt/yuanrong/logs && chmod 777 /opt/yuanrong/logs"] + resources: + limits: + cpu: {{ .Values.global.resources.functionAgentInit.limits.cpu }} + memory: {{ .Values.global.resources.functionAgentInit.limits.memory }} + requests: + cpu: {{ .Values.global.resources.functionAgentInit.requests.cpu }} + memory: {{ .Values.global.resources.functionAgentInit.requests.memory }} + volumeMounts: + - name: log-volume + mountPath: /opt/yuanrong/logs + subPathExpr: $(RUNTIME_POD_NAME) + env: + - name: RUNTIME_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: IfNotPresent + securityContext: + capabilities: + add: + - CHOWN + - NET_RAW + - NET_ADMIN + - SYS_ADMIN + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + - FOWNER + - FSETID + drop: + - ALL + runAsUser: 0 + containers: + - name: service-container + image: {{ .Values.global.imageRegistry }}{{ .Values.global.images.common }} + command: ["/bin/sh", "-l", "/home/sn/manager/bin/bootstrap"] + ports: + - containerPort: {{ .Values.global.port.faasSchedulerPort }} + protocol: TCP + env: + - name: RUNTIME_HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: RUNTIME_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: RUNTIME_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: RUNTIME_POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: RUNTIME_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: RUNTIME_POD_UID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.uid + - name: X_WISECLOUD_SITE + value: aaa.bbb + - name: X_WISECLOUD_TENANT_ID + value: aaa.bbb + - name: X_WISECLOUD_APPLICATION_ID + value: aaa.bbb + - name: X_WISECLOUD_SERVICE_ID + value: aaa.bbb + - name: X_WISECLOUD_ENVIRONMENT_ID + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_NAME + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_CLUSTER + value: cn-dev-scheduler-green + - name: LANG + value: en_US.UTF-8 + - name: RUNTIME_MICROSERVICE_ENVIRONMENT + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_SERVICE_NAME + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_BUSINESS + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_REGION + value: cn-north-4 + - name: RUNTIME_MICROSERVICE_AZ + value: cn-north-4g + - name: X_WISECLOUD_CLOUDMAP_ID + value: aaa.bbb + - name: NUWA_CLOUDMAP_NAMESPACENAME + value: aaa.bbb + - name: NUWA_CLOUDMAP_SERVERADDR + value: + - name: NUWA_CLOUDMAP_DUAL_SERVERADDR + value: + - name: TZ + value: Asia/Shanghai + - name: WISECLOUD_ACMS_ENDPOINT + value: + - name: YR_LOG_LEVEL + value: DEBUG + - name: FUNCTION_AGENT_PORT + value: "{{ .Values.global.port.functionAgentPort }}" + - name: RUNTIME_MGR_PORT + value: "{{ .Values.global.port.runtimeMgrPort }}" + - name: RUNTIME_INIT_PORT + value: "{{ .Values.global.port.runtimeInitPort }}" + - name: RUNTIME_PORT_NUM + value: "{{ .Values.global.port.runtimePortNum }}" + - name: METRICS_COLLECTOR_TYPE + value: {{ .Values.global.runtime.metricsCollectorType }} + - name: DISK_USAGE_MONITOR_PATH + value: {{ quote .Values.global.runtime.diskUsageMonitor.path }} + - name: DISK_USAGE_LIMIT + value: "{{ .Values.global.runtime.diskUsageMonitor.limit }}" + - name: DISK_USAGE_MONITOR_DURATION + value: "{{ .Values.global.runtime.diskUsageMonitor.duration }}" + - name: CPU4COMP + value: "5000" + - name: MEM4COMP + value: "10240" + - name: INIT_LABELS + value: '{"resource.owner":"30450000-0000-4000-8069-949f37caf04c"}' + - name: RUNTIME_LOG_DIR + value: /opt/yuanrong/logs + - name: RUNTIME_LOG_LEVEL + value: INFO + - name: IS_NEW_RUNTIME_PATH + value: "true" + - name: JAVA_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.java8 }}" + - name: JAVA11_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.java11 }}" + - name: PYTHON36_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.python36 }}" + - name: PYTHON38_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.python38 }}" + - name: PYTHON39_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.python39 }}" + - name: CPP_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.cpp }}" + - name: JVM_CUSTOM_ARGS + value: "{{ .Values.global.runtime.jvmCustomArgs }}" + - name: RUNTIME_GID + value: "1002" + - name: RUNTIME_UID + value: "1002" + - name: INIT_HANDLER + value: faasmanager.InitHandler + - name: CALL_HANDLER + value: faasmanager.CallHandler + - name: CHECKPOINT_HANDLER + value: faasmanager.CheckpointHandler + - name: SHUTDOWN_HANDLER + value: faasmanager.ShutdownHandler + - name: SIGNAL_HANDLER + value: faasmanager.SignalHandler + - name: YR_FUNCTION_LIB_PATH + value: /home/sn/manager/bin + - name: INIT_ARGS_FILE_PATH + value: /home/sn/config/config.json + - name: GLOG_log_dir + value: /opt/yuanrong/logs + - name: LOG_PATTERN + value: |- + { + "separator": " | ", + "placeholders": [ + {"flags": "%Y-%m-%d %H:%M:%S.%e"}, + {"flags": "%l"}, + {"flags": "%s:%#"}, + {"env": "POD_NAME"}, + {"env": "CLUSTER_ID"}, + {"flags": ""} + ] + } + - name: IS_PROTOMSG_TO_RUNTIME + value: "{{ .Values.global.runtime.isProtoMsgToRuntime }}" + - name: MAX_PRIORITY + value: "{{ .Values.global.common.prioritySchedule.maxPriority }}" + - name: CLUSTER_NAME + value: "cn-dev" + - name: FUNCTION_PROXY_PORT + value: "{{ .Values.global.port.functionProxyPort }}" + - name: FUNCTION_PROXY_GRPC_PORT + value: "{{ .Values.global.port.functionProxyGrpcPort }}" + - name: META_STORE_ADDRESS + {{- if .Values.global.metaStore.enable }} + value: "{{ .Values.global.metaStore.address }}" + {{- else }} + value: "{{ .Values.global.etcdManagement.detcd }}" + {{- end }} + - name: IAM_META_STORE_ADDRESS + {{- if .Values.global.metaStore.enable }} + value: "{{ .Values.global.metaStore.address }}" + {{- else }} + value: "{{ .Values.global.etcdManagement.metcd }}" + {{- end }} + - name: SslTargetName + value: "aaa.bbb" + - name: DS_WORKER_PORT + value: "{{ .Values.global.port.worker }}" + - name: ENABLE_TRACE + value: "{{ .Values.global.observer.enableTrace }}" + - name: LOG_PATH + value: "/opt/yuanrong/logs" + - name: LOG_LEVEL + value: "DEBUG" + - name: LOG_ROLLING_MAXSIZE + value: "1000" + - name: LOG_ROLLING_MAXFILES + value: "3" + - name: LOG_ASYNC_LOGBUFSECS + value: "30" + - name: LOG_ASYNC_MAXQUEUESIZE + value: "51200" + - name: LOG_ASYNC_THREADCOUNT + value: "1" + - name: LOG_ALSOLOGTOSTDERR + value: "false" + - name: ENABLE_METRICS + value: "{{ .Values.global.observer.metrics.enable }}" + - name: STS_CONFIG + value: "{}" + - name: METRICS_CONFIG_FILE + value: '{{ quote .Values.global.observer.metrics.metricsConfigFile }}' + - name: MEM_THRESHOLD_PERCENTAGE + value: "90" + - name: ResourcePath + value: "/home/wisfunction/resource" + - name: RUNTIME_HEARTBEAT_ENABLE + value: "true" + - name: RUNTIME_MAX_HEARTBEAT_TIMEOUT_TIMES + value: "{{ .Values.global.runtime.runtimeMaxHeartbeatTimeoutTimes }}" + - name: MAX_STORAGE_OPERATE_RETRY_TIMES + value: "60" + - name: RUNTIME_HEARTBEAT_TIMEOUT_MS + value: "{{ .Values.global.runtime.runtimeHeartbeatTimeoutMS }}" + - name: RUNTIME_RECOVER_ENABLE + value: "false" + - name: DRIVER_ENABLE + value: "{{ .Values.global.common.driverEnable }}" + - name: STATE_STORAGE_TYPE + value: "{{ .Values.global.common.stateStorageType }}" + - name: ELECTION_MODE + value: "{{ .Values.global.common.electionMode }}" + - name: MAX_GRPC_SIZE + value: "{{ .Values.global.common.maxGrpcSize }}" + - name: DS_HEALTH_CHECK_INTERVAL + value: "1000" + - name: MAX_DS_HEALTH_CHECK_TIMES + value: "12" + - name: DS_HEALTH_CHECK_PATH + value: "/home/sn/datasystem/health" + - name: SERVICES_PATH + value: "/home/sn/service-config/services.yaml" + - name: SYSTEM_TIMEOUT + value: "{{ .Values.global.common.systemTimeout }}" + - name: SERVICE_TTL + value: "60000" + - name: RUNTIME_SHUTDOWN_TIMEOUT_SECONDS + value: "{{ .Values.global.runtime.runtimeShutdownTimeoutSeconds }}" + - name: CACHE_STORAGE_AUTH_ENABLE + value: "{{ .Values.global.dataSystem.authEnabled }}" + - name: SSL_ENABLE + value: "{{ .Values.global.mutualSSLConfig.sslEnable }}" + - name: DECRYPT_ALGORITHM + value: "{{ .Values.global.common.decryptAlgorithm }}" + - name: MIN_INSTANCE_CPU_SIZE + value: "{{ .Values.global.runtime.minInstanceCpuSize }}" + - name: MIN_INSTANCE_MEMORY_SIZE + value: "{{ .Values.global.runtime.minInstanceMemorySize }}" + - name: MAX_INSTANCE_CPU_SIZE + value: "{{ .Values.global.runtime.maxInstanceCpuSize }}" + - name: MAX_INSTANCE_MEMORY_SIZE + value: "{{ .Values.global.runtime.maxInstanceMemorySize }}" + - name: ENABLE_SERVER_MODE + value: "{{ .Values.global.runtime.serverModeEnable }}" + - name: LOG_COMPRESS_ENABLE + value: "true" + - name: ENABLE_PRINT_RESOURCE_VIEW + value: "{{ .Values.global.common.enablePrintResourceView }}" + - name: PROMETHEUS_PUSH_GATEWAY_IP + value: "{{ .Values.global.observer.proGatewayIP }}" + - name: PROMETHEUS_PUSH_GATEWAY_PORT + value: "{{ .Values.global.observer.gatewayPort }}" + - name: CLUSTER_ID + value: {{ quote .Values.global.clusterId }} + - name: INVOKE_LIMITATION_ENABLE + value: "{{ .Values.global.rateLimit.invokeRateLimit.enable }}" + - name: LOW_MEMORY_THRESHOLD + value: "{{ .Values.global.rateLimit.invokeRateLimit.lowThreshold }}" + - name: HIGH_MEMORY_THRESHOLD + value: "{{ .Values.global.rateLimit.invokeRateLimit.highThreshold }}" + - name: MESSAGE_SIZE_THRESHOLD + value: "{{ .Values.global.rateLimit.invokeRateLimit.msgSize }}" + - name: RUNTIME_DS_AUTH_ENABLE + value: "{{ .Values.global.runtime.dataSystem.authEnable }}" + - name: RUNTIME_DS_ENCRYPT_ENABLE + value: {{ quote .Values.global.runtime.dataSystem.encryptEnable }} + - name: RUNTIME_DS_CLIENT_PUBLICKEY + value: "" + - name: RUNTIME_DS_CLIENT_PRIVATEKEY + value: "" + - name: RUNTIME_DS_SERVER_PUBLICKEY + value: "" + - name: ETCD_AUTH_TYPE + value: "{{ .Values.global.etcdManagement.authType }}" + - name: SCC_ENABLE + value: "{{ .Values.global.scc.enable }}" + - name: IAM_POLICY_CONFIG_PATH + value: "/home/sn/iam-config/iam-policy-config.json" + - name: ENABLE_IAM + value: "{{ .Values.global.iam.enable }}" + - name: IAM_BASE_PATH + value: "http://iam-adaptor.{{ .Values.global.namespace }}.svc.cluster.local:{{ .Values.global.port.iamAdapterHttpPort }}" + - name: IAM_CREDENTIAL_TYPE + value: "{{ .Values.global.iam.credentialType }}" + - name: K8S_BASE_URL + value: "{{ .Values.global.kubernetes.kubeApiBaseUrl }}" + - name: K8S_NAMESPACE + value: "default" + - name: MAX_TOLERATE_META_STORE_FAILED_TIMES + value: "{{ .Values.global.etcdManagement.maxTolerateMetaStoreFailedTimes }}" + - name: META_HEALTH_CHECK_INTERVAL_MS + value: "{{ .Values.global.etcdManagement.metaStoreCheckHealthIntervalMs }}" + - name: META_HEALTH_CHECK_TIMEOUTS + value: "{{ .Values.global.etcdManagement.metaStoreTimeoutMs }}" + - name: SYSTEM_AUTH_MODE + value: "{{ .Values.global.common.systemAuthMode }}" + - name: ETCD_TARGET_NAME_OVERRIDE + value: "aaa.bbb" + - name: CACHE_STORAGE_AUTH_TYPE + value: "" + - name: FUNCTION_META_PATH + value: "/home/sn/function-metas" + - name: RESOURCE_PATH + value: "/home/sn/resource" + - name: LIB_PATH + value: "{{ .Values.global.runtime.libPath }}" + resources: + limits: + cpu: {{ .Values.global.resources.faasScheduler.limits.cpu }} + memory: {{ .Values.global.resources.faasScheduler.limits.memory }} + requests: + cpu: {{ .Values.global.resources.faasScheduler.requests.cpu }} + memory: {{ .Values.global.resources.faasScheduler.requests.memory }} + volumeMounts: + - name: volume-config + mountPath: /home/sn/config + - name: log-volume + mountPath: /opt/yuanrong/logs + subPathExpr: $(RUNTIME_POD_NAME) + - name: data-volume + mountPath: /opt/yuanrong/data + - name: iam-policy-config-volume + mountPath: /home/sn/iam-config + livenessProbe: + tcpSocket: + port: {{ .Values.global.port.faasSchedulerPort }} + initialDelaySeconds: 6 + timeoutSeconds: 5 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + tcpSocket: + port: {{ .Values.global.port.faasSchedulerPort }} + initialDelaySeconds: 6 + timeoutSeconds: 5 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 10 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - kill -15 $(ps aux | grep -i function_proxy | awk '{print $2}') 2>/dev/null + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 1002 + runAsNonRoot: true + restartPolicy: Always + serviceAccount: faas-manager + serviceAccountName: faas-manager + terminationGracePeriodSeconds: 300 + dnsPolicy: ClusterFirst + securityContext: + fsGroup: {{ .Values.global.runtime.fsGroup }} + imagePullSecrets: + - name: default-secret + revisionHistoryLimit: 5 diff --git a/deploy/k8s/charts/templates/manager/role.yaml b/deploy/k8s/charts/templates/manager/role.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f089499b6ee48ab607ae8902be58fa3af8ed348 --- /dev/null +++ b/deploy/k8s/charts/templates/manager/role.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + creationTimestamp: null + name: faas-manager +rules: + - apiGroups: + - patservice.cap.io + resources: + - '*' + - pats + verbs: + - '*' \ No newline at end of file diff --git a/deploy/k8s/charts/templates/manager/role_binding.yaml b/deploy/k8s/charts/templates/manager/role_binding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32abf4ba83145214ccb98e017552516f9e18026d --- /dev/null +++ b/deploy/k8s/charts/templates/manager/role_binding.yaml @@ -0,0 +1,12 @@ +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: faas-manager +subjects: + - kind: ServiceAccount + name: faas-manager + namespace: "default" +roleRef: + kind: ClusterRole + name: faas-manager + apiGroup: rbac.authorization.k8s.io \ No newline at end of file diff --git a/deploy/k8s/charts/templates/manager/service_account.yaml b/deploy/k8s/charts/templates/manager/service_account.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a25c7ac903e6f7ff15b68a796820d29bac12c77 --- /dev/null +++ b/deploy/k8s/charts/templates/manager/service_account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: faas-manager + namespace: "default" \ No newline at end of file diff --git a/deploy/k8s/charts/templates/master/_common_adapter.tpl b/deploy/k8s/charts/templates/master/_common_adapter.tpl new file mode 100644 index 0000000000000000000000000000000000000000..804b66d5e4555150226c08fd6dcfb0a1ec56ad34 --- /dev/null +++ b/deploy/k8s/charts/templates/master/_common_adapter.tpl @@ -0,0 +1,5 @@ +{{- define "functionSystem.imagePullSecrets" -}} +{{- range .Values.global.imagePullSecrets }} +- name: {{ . }} +{{- end }} +{{- end -}} \ No newline at end of file diff --git a/deploy/k8s/charts/templates/master/function-agent-configmap.yaml b/deploy/k8s/charts/templates/master/function-agent-configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a65d1fd38a2734f1dda7076090535b75705ade3 --- /dev/null +++ b/deploy/k8s/charts/templates/master/function-agent-configmap.yaml @@ -0,0 +1,104 @@ +apiVersion: v1 +metadata: + name: function-agent-config + namespace: {{ .Values.global.namespace }} + creationTimestamp: null +kind: ConfigMap +data: + {{- if .Values.global.runtime.useAscendCustomConfig }} + PATH: /home/snuser/.local/bin:/home/snuser/bin:/bin:/bin:/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/function/runtime/java8/rtsp/jre/bin:/opt/function/runtime/java11/rtsp/jre/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/Ascend/toolbox/latest/Ascend-DMI/bin:/usr/local/Ascend/ascend-toolkit/latest/bin:/usr/local/Ascend/ascend-toolkit/latest/compiler/ccec_compiler/bin + LD_LIBRARY_PATH: /usr/lib64:/home/snuser/snlib:/tmp:/home/snuser/.local/lib/python3.9/site-packages/datasystem:/usr/local/Ascend/toolbox/latest/Ascend-DMI/lib64:/usr/local/Ascend/toolbox/latest/Ascend-DMI/bin:/usr/local/dcmi:/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/ascend-toolkit/latest/lib64:/usr/local/Ascend/ascend-toolkit/latest/lib64/plugin/opskernel:/usr/local/Ascend/ascend-toolkit/latest/lib64/plugin/nnengine + ASCEND_TOOLKIT_HOME: /usr/local/Ascend/ascend-toolkit/latest + ASCEND_AICPU_PATH: /usr/local/Ascend/ascend-toolkit/latest + ASCEND_OPP_PATH: /usr/local/Ascend/ascend-toolkit/latest/opp + TOOLCHAIN_HOME: /usr/local/Ascend/ascend-toolkit/latest/toolkit + ASCEND_HOME_PATH: /usr/local/Ascend/ascend-toolkit/latest + _env: ASCEND_TOOLKIT_HOME, ASCEND_AICPU_PATH, ASCEND_OPP_PATH, TOOLCHAIN_HOME, ASCEND_HOME_PATH + {{- end }} + runtime.json: |- + { + "maxRequestBodySize": "6", + "maxFdNum": 1024, + "dataSystemConnectionTimeout": "1", + "maxLogSize": 40, + "maxLogFileNum": 20 + } + python-runtime-log.json: |- + { + "version": 1, + "disable_existing_loggers": false, + "formatters": { + "extra": { + "format": "[%(asctime)s %(levelname)s %(filename)s:%(lineno)d] [%(podname)s %(thread)d] [%(runtime_id)s] %(message)s" + } + }, + "handlers": { + "file": { + "class": "logging.handlers.RotatingFileHandler", + "filename": "{{ .Values.global.log.runtime.path }}", + "formatter": "extra", + "maxBytes": 419430400, + "backupCount": 1 + } + }, + "loggers": { + "FileLogger": { + "handlers": [ + "file" + ], + "level": "{{ .Values.global.log.runtime.level }}", + "propagate": false + } + } + } + log4j2.xml: |- + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + iptabelsRule: |- + *filter + :INPUT ACCEPT [193520:13189012] + :FORWARD ACCEPT [0:0] + :OUTPUT ACCEPT [208122:17531363] + -A OUTPUT -d aaa.bbb.ccc.ddd/32 -m comment --comment "yangtse OUTPUT rules" -j DROP + COMMIT + *nat + :PREROUTING ACCEPT [23377:1215612] + :INPUT ACCEPT [23377:1215612] + :OUTPUT ACCEPT [3662:358152] + :POSTROUTING ACCEPT [3662:358152] + -A PREROUTING -s aaa.bbb.ccc.ddd/32 -i veth1 -m comment --comment "yangtse PREROUTING rules" -j RETURN + -A POSTROUTING ! -s aaa.bbb.ccc.ddd/32 -o eth0 -m comment --comment "yangtse SNAT rules" -j MASQUERADE + COMMIT diff --git a/deploy/k8s/charts/templates/master/function-agent-deployment.tpl b/deploy/k8s/charts/templates/master/function-agent-deployment.tpl new file mode 100644 index 0000000000000000000000000000000000000000..126b11f2b7b3016ce469ccb4ded235996c059dd9 --- /dev/null +++ b/deploy/k8s/charts/templates/master/function-agent-deployment.tpl @@ -0,0 +1,805 @@ +{{- define "agent.deployment.template" }} +metadata: + labels: + app: function-agent + name: function-agent + namespace: {{ .Values.global.namespace }} +spec: + progressDeadlineSeconds: 600 + replicas: {{ .Values.global.pool.poolSize }} + revisionHistoryLimit: 10 + selector: + matchLabels: + app: function-agent + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + {{ include "agent.pod.template" . }} +{{- end }} + +{{- define "agent.pod.template" }} + metadata: + creationTimestamp: null + labels: + app: function-agent + annotations: + yr-default: yr-default + spec: + {{- if .Values.global.pool.nodeAffinity }} + affinity: + {{- with .Values.global.pool.nodeAffinity }} + nodeAffinity: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- end }} + {{- if .Values.global.pool.accelerator }} + nodeSelector: + accelerator: {{ .Values.global.pool.accelerator }} + {{- else if .Values.global.pool.nodeSelector }} + {{- with .Values.global.pool.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + {{- if .Values.global.controlPlane.tolerations }} + {{- with .Values.global.controlPlane.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + initContainers: + - name: function-agent-init + command: + - /home/sn/bin/entrypoint-function-agent-init + image: "{{ .Values.global.imageRegistry }}{{ .Values.global.images.agentInit }}" + securityContext: + runAsUser: 0 + capabilities: + drop: + - ALL + add: # Add as needed based on the script entrypoint-function-agent-init. + - NET_RAW + - NET_ADMIN + - SYS_ADMIN + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + - FOWNER + - FSETID + env: + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: ENABLE_IPV4_TENANT_ISOLATION + value: "{{ .Values.global.tenantIsolation.ipv4.enable }}" + - name: HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: THIRD_PARTY_WHITELIST + value: "{{ .Values.global.tenantIsolation.thirdPartyWhitelist }}" + - name: SVC_CIDR + value: "{{ .Values.global.kubernetes.svcCIDR }}" + - name: POD_CIDR + value: "{{ .Values.global.kubernetes.podCIDR }}" + - name: HOST_CIDR + value: "{{ .Values.global.kubernetes.hostCIDR }}" + - name: TCP_PORT_WHITELIST + value: "{{ .Values.global.tenantIsolation.ipv4.tcpPortWhitelist }}" + - name: UDP_PORT_WHITELIST + value: "{{ .Values.global.tenantIsolation.ipv4.udpPortWhitelist }}" + volumeMounts: + {{- if .Values.global.log.hostPath.enable }} + - mountPath: "{{ .Values.global.log.functionSystem.path }}" + name: varlog-runtime-manager + subPathExpr: $(POD_NAME) + - mountPath: "{{ .Values.global.log.runtime.path }}" + name: servicelog + subPathExpr: $(POD_NAME) + - mountPath: "{{ .Values.global.log.userOutput.path }}" + name: stdlog + subPathExpr: $(POD_NAME) + {{- else }} + - mountPath: "{{ .Values.global.log.functionSystem.path }}" + name: varlog-runtime-manager + - mountPath: "{{ .Values.global.log.runtime.path }}" + name: servicelog + - mountPath: "{{ .Values.global.log.userOutput.path }}" + name: stdlog + {{- end }} + - mountPath: /home/snuser/secret + name: secret-dir + - mountPath: /dcache + name: pkg-dir + - mountPath: /opt/function/code + name: pkg-dir1 + {{- if .Values.global.sts.enable }} + - mountPath: /home/snuser/alarms + name: alarms-dir + {{- end }} + - mountPath: {{ .Values.global.observer.metrics.path.file }} + name: metrics-dir + - mountPath: /home/snuser/metrics + name: runtime-metrics-dir + - mountPath: {{ .Values.global.observer.metrics.path.failure }} + name: varfailuremetrics + containers: + - name: runtime-manager + command: + - /home/sn/bin/entrypoint-runtime-manager + {{- if and .Values.global.pool.accelerator (ne .Values.global.pool.accelerator "nvidia-gpu") (ne .Values.global.pool.accelerator "amd-gpu") }} + envFrom: + - configMapRef: + name: function-agent-config + {{- end }} + env: + - name: POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: NODE_ID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: POD_ID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.uid + - name: RUNTIME_MGR_PORT + value: {{ quote .Values.global.port.runtimeMgrPort }} + - name: ENABLE_INHERIT_ENV + value: "false" + - name: FUNCTION_AGENT_PORT + value: {{ quote .Values.global.port.functionAgentPort }} + - name: RUNTIME_INIT_PORT + value: {{ quote .Values.global.port.runtimeInitPort }} + - name: DS_WORKER_PORT + value: {{ quote .Values.global.port.worker }} + - name: FUNCTION_PROXY_GRPC_PORT + value: {{ quote .Values.global.port.functionProxyGrpcPort }} + - name: RUNTIME_PORT_NUM + value: {{ quote .Values.global.port.runtimePortNum }} + - name: METRICS_COLLECTOR_TYPE + value: {{ .Values.global.runtime.metricsCollectorType }} + - name: DISK_USAGE_MONITOR_NOTIFY_FAILURE_ENABLE + value: {{ quote .Values.global.runtime.diskUsageMonitor.notifyFailureEnable }} + - name: DISK_USAGE_MONITOR_PATH + value: {{ quote .Values.global.runtime.diskUsageMonitor.path }} + - name: DISK_USAGE_LIMIT + value: {{ quote .Values.global.runtime.diskUsageMonitor.limit }} + - name: SNUSER_LIB_PATH + value: {{ quote .Values.global.runtime.snuserLibPath }} + - name: VIRTUAL_ENV_IDLE_TIME_LIMIT + value: {{ quote .Values.global.runtime.virtualEnvIdleTimeLimit }} + - name: REQUEST_ACK_ACC_MAX_SEC + value: {{ quote .Values.global.runtime.requestAckAccMaxSec}} + - name: SNUSER_DIR_DISK_USAGE_LIMIT + value: {{ quote .Values.global.runtime.diskUsageMonitor.snuserDirSizeLimit }} + - name: TMP_DIR_DISK_USAGE_LIMIT + value: {{ quote .Values.global.runtime.diskUsageMonitor.tmpDirSizeLimit }} + - name: DISK_USAGE_MONITOR_DURATION + value: {{ quote .Values.global.runtime.diskUsageMonitor.duration }} + - name: CPU4COMP + value: {{ quote .Values.global.pool.requestCpu }} + - name: MEM4COMP + value: {{ quote .Values.global.pool.requestMemory }} + - name: INIT_LABELS + value: "" + - name: LOG_PATH + value: {{ .Values.global.log.functionSystem.path }} + - name: LOG_LEVEL + value: {{ .Values.global.log.functionSystem.level }} + - name: LOG_PATTERN + value: {{ quote .Values.global.log.functionSystem.pattern }} + - name: LOG_COMPRESS_ENABLE + value: {{ quote .Values.global.log.functionSystem.compress }} + - name: LOG_ROLLING_MAXSIZE + value: {{ quote .Values.global.log.functionSystem.rolling.maxSize }} + - name: LOG_ROLLING_MAXFILES + value: {{ quote .Values.global.log.functionSystem.rolling.maxfiles }} + - name: LOG_ASYNC_LOGBUFSECS + value: "10" + - name: LOG_ASYNC_MAXQUEUESIZE + value: "1024" + - name: LOG_ASYNC_THREADCOUNT + value: "1" + - name: LOG_ALSOLOGTOSTDERR + value: "false" + - name: ENABLE_METRICS + value: {{ quote .Values.global.observer.metrics.enable }} + - name: METRICS_CONFIG + value: {{ quote .Values.global.observer.metrics.metricsConfig }} + - name: METRICS_CONFIG_FILE + value: {{ quote .Values.global.observer.metrics.metricsConfigFile }} + - name: RUNTIME_METRICS_CONFIG + value: {{ quote .Values.global.observer.metrics.runtimeMetricsConfig }} + - name: RUNTIME_METRICS_CONFIG_FILE + value: {{ quote .Values.global.observer.metrics.runtimeMetricsConfigFile }} + - name: ENABLE_TRACE + value: {{ quote .Values.global.observer.trace.enable }} + - name: TRACE_CONFIG + value: {{ quote .Values.global.observer.trace.traceConfig }} + - name: RUNTIME_TRACE_CONFIG + value: {{ quote .Values.global.observer.trace.runtimeTraceConfig }} + - name: RUNTIME_LOG_DIR + value: {{ .Values.global.log.runtime.path }} + - name: RUNTIME_LOG_LEVEL + value: {{ .Values.global.log.runtime.level }} + - name: PROMETHEUS_PUSH_GATEWAY_IP + value: {{ quote .Values.global.observer.proGatewayIP }} + - name: PROMETHEUS_PUSH_GATEWAY_PORT + value: {{ quote .Values.global.observer.gatewayPort }} + - name: JAVA_PRESTART_COUNT + value: {{ quote .Values.global.runtime.prestartCount.java8 }} + - name: JAVA11_PRESTART_COUNT + value: {{ quote .Values.global.runtime.prestartCount.java11 }} + - name: PYTHON36_PRESTART_COUNT + value: {{ quote .Values.global.runtime.prestartCount.python36 }} + - name: PYTHON37_PRESTART_COUNT + value: {{ quote .Values.global.runtime.prestartCount.python37 }} + - name: PYTHON38_PRESTART_COUNT + value: {{ quote .Values.global.runtime.prestartCount.python38 }} + - name: PYTHON39_PRESTART_COUNT + value: {{ quote .Values.global.runtime.prestartCount.python39 }} + - name: PYTHON310_PRESTART_COUNT + value: {{ quote .Values.global.runtime.prestartCount.python310 }} + - name: PYTHON311_PRESTART_COUNT + value: {{ quote .Values.global.runtime.prestartCount.python311 }} + - name: CPP_PRESTART_COUNT + value: {{ quote .Values.global.runtime.prestartCount.cpp }} + - name: JVM_CUSTOM_ARGS + value: {{ quote .Values.global.runtime.jvmCustomArgs }} + - name: JAVA8_DEFAULT_ARGS + value: {{ quote .Values.global.runtime.defaultArgs.java8 }} + - name: JAVA11_DEFAULT_ARGS + value: {{ quote .Values.global.runtime.defaultArgs.java11 }} + - name: JAVA17_DEFAULT_ARGS + value: {{ quote .Values.global.runtime.defaultArgs.java17 }} + - name: JAVA21_DEFAULT_ARGS + value: {{ quote .Values.global.runtime.defaultArgs.java21 }} + - name: SYSTEM_TIMEOUT + value: {{ quote .Values.global.common.systemTimeout }} + - name: CLUSTER_ID + value: {{ quote .Values.global.clusterId }} + - name: RUNTIME_GID + value: "1003" + - name: RUNTIME_UID + value: "1003" + - name: ENABLE_DS_CLIENT + value: "0" + - name: NPU_COLLECTION_MODE + value: {{ quote .Values.global.runtime.npuCollectionMode }} + - name: GPU_COLLECTION_ENABLE + value: {{ quote .Values.global.runtime.gpuCollectionEnable }} + - name: IS_PROTOMSG_TO_RUNTIME + value: {{ quote .Values.global.runtime.isProtoMsgToRuntime }} + - name: MASSIF_ENABLE + value: {{ quote .Values.global.runtime.massifEnable }} + - name: RESOURCE_PATH + value: /home/sn/resource + - name: RUNTIME_HOME_DIR + value: /home/snuser + - name: RESOURCE_LABEL_PATH + value: /home/sn/podInfo/labels + - name: NPU_DEVICE_INFO_PATH + value: /home/sn/config/topology-info.json + - name: RUNTIME_DS_CONNECT_TIMEOUT + value: {{ quote .Values.global.runtime.runtimeDsConnectTimeout }} + {{- if .Values.global.log.runtime.expiration.enable }} + - name: LOG_EXPIRATION_ENABLE + value: {{ quote .Values.global.log.runtime.expiration.enable }} + - name: LOG_EXPIRATION_CLEANUP_INTERVAL + value: {{ quote .Values.global.log.runtime.expiration.cleanupInterval }} + - name: LOG_EXPIRATION_TIME_THRESHOLD + value: {{ quote .Values.global.log.runtime.expiration.timeThreshold }} + - name: LOG_EXPIRATION_MAX_FILE_COUNT + value: {{ quote .Values.global.log.runtime.expiration.maxFileCount }} + - name: LOG_REUSE_ENABLE + value: {{ quote .Values.global.log.runtime.expiration.logReuseEnable }} + {{- end }} + - name: USER_LOG_EXPORT_MODE + value: {{ quote .Values.global.log.runtime.userLogExportMode }} + - name: RUNTIME_DIRECT_CONNECTION_ENABLE + value: "false" + - name: ENABLE_CLEAN_STREAM_PRODUCER + value: {{ quote .Values.global.runtime.cleanStreamProducerEnable }} + {{- if .Values.global.runtime.oomKill.enable }} + - name: OOM_KILL_ENABLE + value: {{ quote .Values.global.runtime.oomKill.enable }} + - name: MEMORY_DETECTION_INTERVAL + value: {{ quote .Values.global.runtime.oomKill.memoryDetectionInterval }} + - name: OOM_CONSECUTIVE_DETECTION_COUNT + value: {{ quote .Values.global.runtime.oomKill.consecutiveDetectionCount }} + - name: OOM_KILL_CONTROL_LIMIT + value: {{ quote .Values.global.runtime.oomKill.controlLimit }} + {{- end }} + - name: KILL_PROCESS_TIMEOUT_SECONDS + value: {{ quote .Values.global.runtime.killProcessTimeoutSeconds }} + - name: RUNTIME_INSTANCE_DEBUG_ENABLE + value: "false" + image: "{{ .Values.global.imageRegistry | trimSuffix "/" }}/{{ .Values.global.images.runtimeManager }}" + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: {{ .Values.global.pool.readinessProbeFailureThreshold }} + exec: + command: + - /bin/bash + - -c + - /home/sn/bin/health-check $(RUNTIME_MGR_PORT) runtime-manager + initialDelaySeconds: 1 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 5 + readinessProbe: + failureThreshold: {{ .Values.global.pool.livenessProbeFailureThreshold }} + exec: + command: + - /bin/bash + - -c + - /home/sn/bin/health-check $(RUNTIME_MGR_PORT) runtime-manager + initialDelaySeconds: 1 + periodSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 + ports: + - containerPort: 21005 + name: 21005tcp00 + protocol: TCP + - name: prometheus-http + containerPort: 9392 + protocol: TCP + resources: + limits: + {{- if eq .Values.global.pool.accelerator "huawei-Ascend310" }} + huawei.com/Ascend310: {{ .Values.global.pool.cardNum }} + {{ else if eq .Values.global.pool.accelerator "huawei-Ascend310P" }} + huawei.com/Ascend310P: {{ .Values.global.pool.cardNum }} + {{ else if eq .Values.global.pool.accelerator "huawei-Ascend910" }} + huawei.com/Ascend910: {{ .Values.global.pool.cardNum }} + {{ else if eq .Values.global.pool.accelerator "huawei-Ascend910B" }} + huawei.com/ascend-1980: {{ .Values.global.pool.cardNum }} + {{ else if eq .Values.global.pool.accelerator "nvidia-gpu" }} + nvidia.com/gpu: {{ .Values.global.pool.cardNum }} + {{ else if eq .Values.global.pool.accelerator "amd-gpu" }} + amd.com/gpu: {{ .Values.global.pool.cardNum }} + {{- end }} + cpu: {{ .Values.global.pool.limitCpu }}m + memory: {{ .Values.global.pool.limitMemory }}Mi + ephemeral-storage: {{ .Values.global.pool.limitEphemeralStorage }}Mi + requests: + {{- if eq .Values.global.pool.accelerator "huawei-Ascend310" }} + huawei.com/Ascend310: {{ .Values.global.pool.cardNum }} + {{ else if eq .Values.global.pool.accelerator "huawei-Ascend310P" }} + huawei.com/Ascend310P: {{ .Values.global.pool.cardNum }} + {{ else if eq .Values.global.pool.accelerator "huawei-Ascend910" }} + huawei.com/Ascend910: {{ .Values.global.pool.cardNum }} + {{ else if eq .Values.global.pool.accelerator "huawei-Ascend910B" }} + huawei.com/ascend-1980: {{ .Values.global.pool.cardNum }} + {{ else if eq .Values.global.pool.accelerator "nvidia-gpu" }} + nvidia.com/gpu: {{ .Values.global.pool.cardNum }} + {{ else if eq .Values.global.pool.accelerator "amd-gpu" }} + amd.com/gpu: {{ .Values.global.pool.cardNum }} + {{- end }} + cpu: {{ .Values.global.pool.requestCpu }}m + memory: {{ .Values.global.pool.requestMemory }}Mi + ephemeral-storage: {{ .Values.global.pool.requestEphemeralStorage }}Mi + securityContext: + capabilities: + add: + - SYS_ADMIN + - KILL + - DAC_OVERRIDE + - SETGID + - SETUID + drop: + - ALL + terminationMessagePath: /var/tmp/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /etc/localtime + name: local-time + {{- if .Values.global.log.hostPath.enable }} + - mountPath: "{{ .Values.global.log.functionSystem.path }}" + name: varlog-runtime-manager + subPathExpr: $(POD_NAME) + - mountPath: "{{ .Values.global.log.runtime.path }}" + name: servicelog + subPathExpr: $(POD_NAME) + - mountPath: "{{ .Values.global.log.userOutput.path }}" + name: stdlog + subPathExpr: $(POD_NAME) + {{- else }} + - mountPath: "{{ .Values.global.log.functionSystem.path }}" + name: varlog-runtime-manager + - mountPath: "{{ .Values.global.log.runtime.path }}" + name: servicelog + - mountPath: "{{ .Values.global.log.userOutput.path }}" + name: stdlog + {{- end }} + - mountPath: /home/snuser/secret + name: secret-dir + - mountPath: /dcache + name: pkg-dir + - mountPath: /opt/function/code + name: pkg-dir1 + {{- if .Values.global.sts.enable }} + - mountPath: /home/snuser/alarms + name: alarms-dir + {{- end }} + - mountPath: /home/sn/metrics + name: metrics-dir + - mountPath: /home/snuser/metrics + name: runtime-metrics-dir + - mountPath: /home/snuser/config/python-runtime-log.json + name: python-runtime-log-config + readOnly: true + subPath: python-runtime-log.json + - mountPath: /home/snuser/config/runtime.json + name: runtime-config + subPath: runtime.json + readOnly: true + - mountPath: /home/snuser/runtime/java/log4j2.xml + name: java-runtime-log4j2-config + subPath: log4j2.xml + - mountPath: /home/uds + name: datasystem-socket + - mountPath: /dev/shm + name: datasystem-shm + - mountPath: /home/sn/podInfo + name: podinfo + - name: function-agent + command: + - /home/sn/bin/entrypoint-function-agent + env: + - name: POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: NODE_ID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: FSPROXY_PORT + value: {{ quote .Values.global.port.functionProxyPort }} + - name: FUNCTION_AGENT_PORT + value: {{ quote .Values.global.port.functionAgentPort }} + - name: PROMETHEUS_PUSH_GATEWAY_IP + value: {{ quote .Values.global.observer.proGatewayIP }} + - name: PROMETHEUS_PUSH_GATEWAY_PORT + value: {{ quote .Values.global.observer.gatewayPort }} + - name: DECRYPT_ALGORITHM + value: {{ quote .Values.global.common.decryptAlgorithm }} + - name: S3_ADDR + value: {{ .Values.global.obsManagement.s3Endpoint | default "$(HOST_IP):30110" }} + - name: S3_PROTOCOL + value: {{ .Values.global.obsManagement.protocol }} + - name: S3_CREDENTIAL_TYPE + value: {{ .Values.global.obsManagement.credentialType }} + - name: LOG_PATH + value: {{ .Values.global.log.functionSystem.path }} + - name: LOG_LEVEL + value: {{ .Values.global.log.functionSystem.level }} + - name: LOG_PATTERN + value: {{ quote .Values.global.log.functionSystem.pattern }} + - name: LOG_COMPRESS_ENABLE + value: {{ quote .Values.global.log.functionSystem.compress }} + - name: LOG_ROLLING_MAXSIZE + value: {{ quote .Values.global.log.functionSystem.rolling.maxSize }} + - name: LOG_ROLLING_MAXFILES + value: {{ quote .Values.global.log.functionSystem.rolling.maxfiles }} + - name: LOG_ASYNC_LOGBUFSECS + value: {{ quote .Values.global.log.functionSystem.async.logBufSecs }} + - name: LOG_ASYNC_MAXQUEUESIZE + value: "51200" + - name: LOG_ASYNC_THREADCOUNT + value: "1" + - name: LOG_ALSOLOGTOSTDERR + value: "false" + - name: ENABLE_METRICS + value: {{ quote .Values.global.observer.metrics.enable }} + - name: METRICS_CONFIG + value: {{ quote .Values.global.observer.metrics.metricsConfig }} + - name: METRICS_CONFIG_FILE + value: {{ quote .Values.global.observer.metrics.metricsConfigFile }} + - name: SYSTEM_TIMEOUT + value: {{ quote .Values.global.common.systemTimeout }} + - name: STS_CONFIG + value: "{}" + - name: SSL_ENABLE + value: {{ quote .Values.global.mutualSSLConfig.sslEnable }} + - name: SSL_BASE_PATH + value: {{ quote .Values.global.mutualSSLConfig.sslBasePath }} + - name: SSL_ROOT_FILE + value: "ca.crt" + - name: SSL_CERT_FILE + value: "module.crt" + - name: SSL_KEY_FILE + value: "module.key" + - name: SSL_PWD_FILE + value: "cert_pwd" + - name: SSL_DECRYPT_TOOL + value: {{ quote .Values.global.mutualSSLConfig.sslDecryptTool }} + - name: S3_DOWNLOAD_MAXZIPSIZE + value: {{ quote .Values.global.common.zipFile.zipFileSizeMax }} + - name: S3_DOWNLOAD_MAXUNZIPSIZE + value: {{ quote .Values.global.common.zipFile.unzipFileSizeMax }} + - name: S3_DOWNLOAD_MAXFILECOUNT + value: {{ quote .Values.global.common.zipFile.fileCountsMax }} + - name: S3_DOWNLOAD_MAXDIRDEPTH + value: {{ quote .Values.global.common.zipFile.dirDepthMax }} + - name: ENABLE_IPV4_TENANT_ISOLATION + value: {{ quote .Values.global.tenantIsolation.ipv4.enable }} + - name: DEPLOY_DIR + value: {{ quote .Values.global.common.deployDir }} + - name: SCC_ENABLE + value: {{ quote .Values.global.scc.enable }} + - name: SCC_ALGORITHM + value: {{ quote .Values.global.scc.algorithm }} + - name: SCC_PRIMARY_FILE + value: "primary.ks" + - name: SCC_STANDBY_FILE + value: "standby.ks" + - name: ENABLE_SIGNATURE_VALIDATION + value: {{ quote .Values.global.common.zipFile.signatureValidationEnable }} + - name: CODE_AGING_TIME + value: {{ quote .Values.global.common.codeAgingTime }} + - name: SYSTEM_AUTH_MODE + value: {{ quote .Values.global.common.systemAuthMode }} + - name: CUSTOM_RESOURCES + value: "" + - name: RESOURCE_PATH + value: /home/sn/resource + - name: SCC_BASE_PATH + value: /home/sn/resource/scc + - name: SCC_LOG_PATH + value: /home/sn/log + - name: DELEGATE_ENV_VAR + value: '{"DISABLE_APIG_FORMAT":"true"}' + image: "{{ .Values.global.imageRegistry | trimSuffix "/" }}/{{ .Values.global.images.functionAgent }}" + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: {{ .Values.global.pool.livenessProbeFailureThreshold }} + exec: + command: + - /bin/bash + - -c + - /home/sn/bin/health-check $(FUNCTION_AGENT_PORT) function-agent + initialDelaySeconds: 10 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 5 + readinessProbe: + failureThreshold: {{ .Values.global.pool.readinessProbeFailureThreshold }} + exec: + command: + - /bin/bash + - -c + - /home/sn/bin/health-check $(FUNCTION_AGENT_PORT) function-agent + initialDelaySeconds: 3 + periodSeconds: 1 + successThreshold: 1 + timeoutSeconds: 5 + ports: + - containerPort: 58866 + name: 58866tcp00 + protocol: TCP + resources: + limits: + cpu: "{{ .Values.global.resources.functionAgent.limits.cpu }}" + memory: {{ .Values.global.resources.functionAgent.limits.memory }} + requests: + cpu: "{{ .Values.global.resources.functionAgent.requests.cpu }}" + memory: {{ .Values.global.resources.functionAgent.requests.memory }} + securityContext: + capabilities: + add: + - NET_ADMIN + - NET_RAW + drop: + - ALL + terminationMessagePath: /var/tmp/termination-log + terminationMessagePolicy: File + volumeMounts: + {{- if .Values.global.common.secretKeyEnable }} + - name: localauth + mountPath: /home/sn/resource/cipher + readOnly: true + {{- end }} + - mountPath: /etc/localtime + name: local-time + - mountPath: "{{ .Values.global.log.functionSystem.path }}" + name: varlog-function-agent + {{- if .Values.global.log.hostPath.enable }} + subPathExpr: $(POD_NAME) + {{- end}} + - mountPath: /dcache + name: pkg-dir + - mountPath: /opt/function/code + name: pkg-dir1 + - mountPath: {{ .Values.global.observer.metrics.path.file }} + name: metrics-dir + - mountPath: {{ .Values.global.observer.metrics.path.failure }} + name: varfailuremetrics + {{- if or (eq .Values.global.mutualSSLConfig.sslEnable true) (eq .Values.global.observer.metrics.sslEnable true) }} + - mountPath: {{ .Values.global.mutualSSLConfig.sslBasePath }} + name: module + readOnly: true + {{- end }} + {{- if .Values.global.scc.enable }} + - name: scc-ks + mountPath: /home/sn/resource/scc + readOnly: true + {{- end }} + dnsPolicy: ClusterFirst + imagePullSecrets: +{{- include "functionSystem.imagePullSecrets" . | nindent 6 }} + restartPolicy: Always + automountServiceAccountToken: false + schedulerName: default-scheduler + securityContext: + fsGroup: {{ .Values.global.runtime.fsGroup }} + supplementalGroups: + - 1000 + - 1002 + terminationGracePeriodSeconds: {{ .Values.global.pool.gracePeriodSeconds }} + volumes: + - name: local-time + hostPath: + path: /etc/localtime + {{- if .Values.global.log.hostPath.enable }} + - name: varlog-function-agent + hostPath: + path: "{{ .Values.global.log.hostPath.componentLog }}" + type: DirectoryOrCreate + - name: varlog-runtime-manager + hostPath: + path: "{{ .Values.global.log.hostPath.componentLog }}" + type: DirectoryOrCreate + - name: servicelog + hostPath: + path: "{{ .Values.global.log.hostPath.serviceLog }}" + type: DirectoryOrCreate + - name: stdlog + hostPath: + path: "{{ .Values.global.log.hostPath.userLog }}" + type: DirectoryOrCreate + {{- else }} + - name: varlog-function-agent + emptyDir: {} + - name: varlog-runtime-manager + emptyDir: {} + - name: servicelog + emptyDir: {} + - name: stdlog + emptyDir: {} + {{- end }} + - name: secret-dir + emptyDir: {} + - name: pkg-dir + emptyDir: {} + - name: pkg-dir1 + emptyDir: {} + {{- if .Values.global.sts.enable }} + - emptyDir: {} + name: alarms-dir + {{- end }} + - emptyDir: {} + name: metrics-dir + - emptyDir: {} + name: runtime-metrics-dir + {{- if .Values.global.common.secretKeyEnable }} + - name: localauth + secret: + secretName: local-secret + {{- end }} + {{- if .Values.global.observer.metrics.hostPath.failureFileEnable }} + - name: varfailuremetrics + hostPath: + path: "{{ .Values.global.observer.metrics.hostPath.failureMetrics }}" + type: DirectoryOrCreate + {{- else }} + - emptyDir: {} + name: varfailuremetrics + {{- end }} + - name: resource-volume + emptyDir: {} + - configMap: + defaultMode: 0440 + items: + - key: python-runtime-log.json + path: python-runtime-log.json + name: function-agent-config + name: python-runtime-log-config + - configMap: + defaultMode: 0440 + items: + - key: runtime.json + path: runtime.json + name: function-agent-config + name: runtime-config + - configMap: + defaultMode: 0440 + items: + - key: log4j2.xml + path: log4j2.xml + name: function-agent-config + name: java-runtime-log4j2-config + - configMap: + defaultMode: 0440 + items: + - key: iptabelsRule + path: iptabelsRule + name: function-agent-config + name: iptables-rules + - hostPath: + path: /home/uds + type: "" + name: datasystem-socket + - hostPath: + path: /dev/shm + type: "" + name: datasystem-shm + - downwardAPI: + defaultMode: 420 + items: + - fieldRef: + apiVersion: v1 + fieldPath: metadata.labels + path: labels + - fieldRef: + apiVersion: v1 + fieldPath: metadata.annotations + path: annotations + name: podinfo + {{- if or (eq .Values.global.mutualSSLConfig.sslEnable true) (eq .Values.global.observer.metrics.sslEnable true) }} + - name: module + secret: + defaultMode: 0440 + secretName: {{ .Values.global.mutualSSLConfig.secretName }} + {{- end }} + {{- if .Values.global.scc.enable }} + - name: scc-ks + secret: + defaultMode: 0440 + secretName: {{ .Values.global.scc.secretName }} + - configMap: + defaultMode: 0440 + items: + - key: CONFIG + path: scc.conf + name: fs-scc-configmap + name: scc-config + {{- end }} +{{- end }} \ No newline at end of file diff --git a/deploy/k8s/charts/templates/master/function-agent-deployment.yaml b/deploy/k8s/charts/templates/master/function-agent-deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ccf2f08e1c23912e6635aff6b3353718d52ada7 --- /dev/null +++ b/deploy/k8s/charts/templates/master/function-agent-deployment.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +metadata: + name: function-agent + namespace: {{ .Values.global.namespace }} +kind: ConfigMap +data: + function-agent.json: |- + {{ include "agent.deployment.template" . | fromYaml | toJson }} \ No newline at end of file diff --git a/deploy/k8s/charts/templates/master/function-master.yaml b/deploy/k8s/charts/templates/master/function-master.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d5ad0df7c6abfdfdaa4ac3db2f5f4921e6a0464 --- /dev/null +++ b/deploy/k8s/charts/templates/master/function-master.yaml @@ -0,0 +1,328 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: function-master + namespace: {{ .Values.global.namespace }} +spec: + replicas: {{ .Values.global.replicas.functionMaster }} + selector: + matchLabels: + app: function-master + template: + metadata: + labels: + app: function-master + spec: + volumes: + - name: log-volume + hostPath: + path: "{{ .Values.global.log.hostPath.componentLog }}" + type: DirectoryOrCreate + - name: data-volume + emptyDir: + sizeLimit: 5Gi + - configMap: + defaultMode: 0440 + name: function-agent + name: function-agent + initContainers: + - name: agent-init-container + image: {{ .Values.global.imageRegistry }}{{ .Values.global.images.agentInit }} + command: ["/bin/sh", "-c", "chown 1002:1002 /opt/yuanrong/logs && chmod 777 /opt/yuanrong/logs"] + resources: + limits: + cpu: {{ .Values.global.resources.functionAgentInit.limits.cpu }} + memory: {{ .Values.global.resources.functionAgentInit.limits.memory }} + requests: + cpu: {{ .Values.global.resources.functionAgentInit.requests.cpu }} + memory: {{ .Values.global.resources.functionAgentInit.requests.memory }} + volumeMounts: + - name: log-volume + mountPath: /opt/yuanrong/logs + subPathExpr: $(RUNTIME_POD_NAME) + env: + - name: RUNTIME_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: IfNotPresent + securityContext: + capabilities: + add: + - CHOWN + - NET_RAW + - NET_ADMIN + - SYS_ADMIN + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + - FOWNER + - FSETID + drop: + - ALL + runAsUser: 0 + containers: + - name: service-container + image: {{ .Values.global.imageRegistry }}{{ .Values.global.images.common }} + command: ["/bin/sh", "-l", "/home/sn/function-master/bin/bootstrap"] + ports: + - containerPort: {{ .Values.global.port.functionMasterPort }} + protocol: TCP + env: + - name: RUNTIME_HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: RUNTIME_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: RUNTIME_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: RUNTIME_POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: RUNTIME_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: RUNTIME_POD_UID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.uid + - name: X_WISECLOUD_SITE + value: aaa.bbb + - name: X_WISECLOUD_TENANT_ID + value: aaa.bbb + - name: X_WISECLOUD_APPLICATION_ID + value: aaa.bbb + - name: X_WISECLOUD_SERVICE_ID + value: aaa.bbb + - name: X_WISECLOUD_ENVIRONMENT_ID + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_NAME + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_CLUSTER + value: cn-dev-master + - name: LANG + value: en_US.UTF-8 + - name: RUNTIME_MICROSERVICE_ENVIRONMENT + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_SERVICE_NAME + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_BUSINESS + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_REGION + value: cn-north-4 + - name: RUNTIME_MICROSERVICE_AZ + value: cn-north-4g + - name: X_WISECLOUD_CLOUDMAP_ID + value: aaa.bbb + - name: NUWA_CLOUDMAP_NAMESPACENAME + value: aaa.bbb + - name: NUWA_CLOUDMAP_SERVERADDR + value: + - name: NUWA_CLOUDMAP_DUAL_SERVERADDR + value: + - name: TZ + value: Asia/Shanghai + - name: WISECLOUD_ACMS_ENDPOINT + value: + - name: ENABLE_NUWA + value: "false" + - name: NUWA_RUNTIME_API_PATH + value: "" + - name: STS_ENABLE + value: "false" + - name: GLOBAL_SCHEDULER_PORT + value: "{{ .Values.global.port.functionMasterPort }}" + - name: SYSTEM_FUNCTION_RETRY_PERIOD + value: "{{ .Values.global.common.functionBootstrapRetryPeriod }}" + - name: META_STORE_ADDRESS + {{- if and (.Values.global.metaStore.enable) (ne .Values.global.metaStore.mode "passthrough") }} + value: "{{ .Values.global.metaStore.address }}" + {{- else }} + value: "{{ .Values.global.etcdManagement.detcd }}" + {{- end }} + - name: LOG_PATH + value: "/opt/yuanrong/logs" + - name: LOG_LEVEL + value: "DEBUG" + - name: LOG_ROLLING_MAXSIZE + value: "1000" + - name: LOG_ROLLING_MAXFILES + value: "3" + - name: LOG_ASYNC_LOGBUFSECS + value: "30" + - name: LOG_ASYNC_MAXQUEUESIZE + value: "51200" + - name: LOG_ASYNC_THREADCOUNT + value: "1" + - name: LOG_ALSOLOGTOSTDERR + value: "false" + - name: ENABLE_METRICS + value: "{{ .Values.global.observer.metrics.enable }}" + - name: METRICS_CONFIG_FILE + value: "{{ .Values.global.observer.metrics.metricsConfigFile }}" + - name: K8S_BASE_URL + value: "{{ .Values.global.kubernetes.kubeApiBaseUrl }}" + - name: RUNTIME_RECOVER_ENABLE + value: "false" + - name: SYSTEM_TIMEOUT + value: "{{ .Values.global.common.systemTimeout }}" + - name: ELECTION_MODE + value: "{{ .Values.global.common.electionMode }}" + - name: MAX_STORAGE_OPERATE_RETRY_TIMES + value: "60" + - name: SSL_ENABLE + value: "{{ .Values.global.mutualSSLConfig.sslEnable }}" + - name: DECRYPT_ALGORITHM + value: "{{ .Values.global.common.decryptAlgorithm }}" + - name: DS_HEALTH_CHECK_PATH + value: "/home/sn/datasystem/health" + - name: DS_HEALTH_CHECK_INTERVAL + value: "1000" + - name: MAX_DS_HEALTH_CHECK_TIMES + value: "12" + - name: K8S_NAMESPACE + value: "default" + - name: LOG_COMPRESS_ENABLE + value: "true" + - name: ENABLE_PRINT_RESOURCE_VIEW + value: "{{ .Values.global.common.enablePrintResourceView }}" + - name: WORKER_TAINT_EXCLUDES + value: "{{ .Values.global.common.workerTaintExcludes }}" + - name: SYSTEM_AUTH_MODE + value: "{{ .Values.global.common.systemAuthMode }}" + - name: CLUSTER_ID + value: {{ quote .Values.global.clusterId }} + - name: CLUSTER_NAME + value: "cn-dev" + - name: MIGRATE_PREFIX + value: "{{ .Values.global.runtime.migratePrefix }}" + - name: TAINT_TOLERANCE_LIST + value: "{{ .Values.global.runtime.taintToleranceList }}" + - name: MIGRATE_ENABLE + value: "{{ .Values.global.runtime.migrateEnable }}" + - name: SYSTEM_UPGRADE_WATCH_ENABLE + value: "{{ .Values.global.systemUpgradeConfig.enable }}" + - name: AZ_ID + value: "{{ .Values.global.systemUpgradeConfig.azID }}" + - name: SYSTEM_UPGRADE_KEY + value: "{{ .Values.global.systemUpgradeConfig.systemUpgradeKey }}" + - name: SYSTEM_UPGRADE_ADDRESS + value: "{{ .Values.global.systemUpgradeConfig.systemUpgradeWatchAddress }}" + - name: ETCD_CLUSTER_ADDRESS + value: "{{ .Values.global.etcdManagement.detcd }}" + - name: GRACE_PERIOD_SECONDS + value: "{{ .Values.global.common.deletePodGracePeriodSeconds }}" + - name: ETCD_AUTH_TYPE + value: "{{ .Values.global.etcdManagement.authType }}" + - name: SCC_ENABLE + value: "{{ .Values.global.scc.enable }}" + - name: MAX_TOLERATE_META_STORE_FAILED_TIMES + value: "{{ .Values.global.etcdManagement.maxTolerateMetaStoreFailedTimes }}" + - name: META_HEALTH_CHECK_INTERVAL_MS + value: "{{ .Values.global.etcdManagement.metaStoreCheckHealthIntervalMs }}" + - name: META_HEALTH_CHECK_TIMEOUTS + value: "{{ .Values.global.etcdManagement.metaStoreTimeoutMs }}" + - name: ETCD_TARGET_NAME_OVERRIDE + value: "aaa.bbb" + - name: FUNCTION_META_PATH + value: "/home/sn/function-metas" + - name: RESOURCE_PATH + value: "/home/sn/resource" + - name: POOL_CONFIG_PATH + value: "/home/sn/pools.json" + - name: AGENT_TEMPLATE_PATH + value: /home/sn/scaler/template/function-agent.json + - name: METRICS_CONFIG + value: '{{ quote .Values.global.observer.metrics.metricsConfig }}' + - name: LOG_PATTERN + value: '{ + "separator": " | ", + "placeholders": [ + {"flags": "%Y-%m-%dT%H:%M:%S.%f"}, + {"flags": "%l"}, + {"flags": "%s:%#"}, + {"env": "POD_NAME"}, + {"env": "CLUSTER_ID"}, + {"flags": ""} + ] + }' + - name: STS_CONFIG + value: "{}" + - name: KUBE_CLIENT_RETRY_TIMES + value: {{ quote .Values.global.kubernetes.kubeClientRetryTime }} + - name: KUBE_API_RETRY_CYCLE + value: {{ quote .Values.global.kubernetes.kubeClientRetryCycMs }} + - name: HEALTH_MONITOR_MAX_FAILURE + value: {{ quote .Values.global.kubernetes.healthMonitorMaxFailure }} + - name: HEALTH_MONITOR_RETRY_INTERVAL + value: {{ quote .Values.global.kubernetes.healthMonitorRetryInterval }} + resources: + limits: + cpu: {{ .Values.global.resources.functionMaster.limits.cpu }} + memory: {{ .Values.global.resources.functionMaster.limits.memory }} + requests: + cpu: {{ .Values.global.resources.functionMaster.requests.cpu }} + memory: {{ .Values.global.resources.functionMaster.requests.memory }} + volumeMounts: + - name: log-volume + mountPath: /opt/yuanrong/logs + subPathExpr: $(RUNTIME_POD_NAME) + - name: data-volume + mountPath: /opt/yuanrong/data + - mountPath: /home/sn/scaler/template + name: function-agent + livenessProbe: + tcpSocket: + port: {{ .Values.global.port.functionMasterPort }} + initialDelaySeconds: 3 + timeoutSeconds: 5 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + tcpSocket: + port: {{ .Values.global.port.functionMasterPort }} + initialDelaySeconds: 3 + timeoutSeconds: 5 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 10 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - sleep 1 + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 1002 + runAsNonRoot: true + restartPolicy: Always + terminationGracePeriodSeconds: 300 + dnsPolicy: ClusterFirst + securityContext: + fsGroup: {{ .Values.global.runtime.fsGroup }} + serviceAccountName: function-master + imagePullSecrets: + - name: default-secret + schedulerName: default-scheduler diff --git a/deploy/k8s/charts/templates/master/role-binding.yaml b/deploy/k8s/charts/templates/master/role-binding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46dc0ec9530d2dd2de3b8519493824f270c5cb6d --- /dev/null +++ b/deploy/k8s/charts/templates/master/role-binding.yaml @@ -0,0 +1,13 @@ +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: function-master + namespace: {{ .Values.global.namespace }} +subjects: +- kind: ServiceAccount + name: function-master + namespace: {{ .Values.global.namespace }} +roleRef: + kind: ClusterRole + name: function-master + apiGroup: rbac.authorization.k8s.io \ No newline at end of file diff --git a/deploy/k8s/charts/templates/master/role.yaml b/deploy/k8s/charts/templates/master/role.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e98fac838ff62494054af8b52049eec9ba734d53 --- /dev/null +++ b/deploy/k8s/charts/templates/master/role.yaml @@ -0,0 +1,59 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: function-master + namespace: {{ .Values.global.namespace }} +rules: + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - create + - watch + - delete + - update + - patch + - apiGroups: + - apps + resources: + - deployments + verbs: + - get + - list + - create + - delete + - update + - patch + - watch + - apiGroups: + - "" + resources: + - nodes + verbs: + - list + - watch + - patch + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - create + - update + - apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - get + - list + - create + - update + - watch + - patch + - delete \ No newline at end of file diff --git a/deploy/k8s/charts/templates/master/service-account.yaml b/deploy/k8s/charts/templates/master/service-account.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8140c94ee4fdcfe12b76a7547e18ba78fd75c40 --- /dev/null +++ b/deploy/k8s/charts/templates/master/service-account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: function-master + namespace: {{ .Values.global.namespace }} \ No newline at end of file diff --git a/deploy/k8s/charts/templates/scheduler/role.yaml b/deploy/k8s/charts/templates/scheduler/role.yaml new file mode 100644 index 0000000000000000000000000000000000000000..325d33b411eef1059908c6ca41ab25c58663fb70 --- /dev/null +++ b/deploy/k8s/charts/templates/scheduler/role.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + creationTimestamp: null + name: faas-scheduler +rules: + - apiGroups: + - yr.cap.io + resources: + - '*' + - yrtasks + verbs: + - '*' \ No newline at end of file diff --git a/deploy/k8s/charts/templates/scheduler/role_binding.yaml b/deploy/k8s/charts/templates/scheduler/role_binding.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c368d439d797b47b04e4717d78cbcee98a0aee4 --- /dev/null +++ b/deploy/k8s/charts/templates/scheduler/role_binding.yaml @@ -0,0 +1,12 @@ +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: faas-scheduler +subjects: + - kind: ServiceAccount + name: faas-scheduler + namespace: "default" +roleRef: + kind: ClusterRole + name: faas-scheduler + apiGroup: rbac.authorization.k8s.io \ No newline at end of file diff --git a/deploy/k8s/charts/templates/scheduler/scheduler-configmap.yaml b/deploy/k8s/charts/templates/scheduler/scheduler-configmap.yaml new file mode 100644 index 0000000000000000000000000000000000000000..71461ed6d01b5766f59a2fa518fc73a1adf52380 --- /dev/null +++ b/deploy/k8s/charts/templates/scheduler/scheduler-configmap.yaml @@ -0,0 +1,117 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: scheduler-config + namespace: default +data: + config.json: |- + { + "schedulerNum": 1, + "clusterID":"cluster001", + "scenario": "faas", + "clusterName": "cn-dev-gy", + "regionName": "guiyang", + "alarmConfig": { + "enableAlarm": false, + "minInsStartInterval": 15, + "minInsCheckInterval": 15, + "alarmLogConfig": { + "enableAlarm": true, + "filepath": "", + "level": "Info", + "tick": 0, + "first": 0, + "thereafter": 0, + "singlesize": 500, + "threshold": 3, + "disable": false + }, + "xiangYunFourConfig": { + "site": "aaa.bbb", + "tenantID": "T014", + "applicationID": "aaa.bbb", + "serviceID": "aaa.bbb" + } + }, + "nodeSelector": {}, + "enableNPUDriverMount": false, + "cpu": 2000, + "memory": 2048, + "image": ":", + "version":"", + "metricsAddr": ":9099", + "metricsHttpsEnable": false, + "predictGroupWindow": 900000, + "autoScaleConfig": { + "slaQuota": 1000, + "scaleDownTime": 60000, + "burstScaleNum": 1000 + }, + "enableRollout": false, + "enableHealthCheck": true, + "stateDisable": true, + "reliabilityType": "low", + "leaseSpan": 5000, + "functionLimitRate": 20000, + "npuEphemeralStorage": 512000, + "ephemeralStorage": 6144, + "hostaliaseshostname": [{"hostnames":[""],"ip":""},{"hostnames":[""],"ip":""}], + "systemAuth": "", + "dockerRootPath":"/var/lib/docker", + "functionConfig": [], + "localAuth": { + "aKey": "", + "sKey": "" + }, + "xpuNodeLabels": [ + {"xpuType": "huawei.com/ascend-1980","instanceType": "280T","nodeLabelKey": "node.kubernetes.io/npu.chip.name","NodeLabelValues": ["910B4"]}, + {"xpuType": "huawei.com/ascend-1980","instanceType": "376T","nodeLabelKey": "node.kubernetes.io/npu.chip.name","NodeLabelValues": ["910B1","910B2"]}, + {"xpuType": "huawei.com/ascend-1980","instanceType": "313T","nodeLabelKey": "node.kubernetes.io/npu.chip.name","NodeLabelValues": ["910B3"]}, + {"xpuType": "huawei.com/ascend-1980","nodeLabelKey": "node.kubernetes.io/npu.chip.name","NodeLabelValues": ["910B1","910B2"]} + ], + "serviceAccountJwt": { + "nuwaRuntimeAddr": "", + "nuwaGatewayAddr": "", + "oauthTokenUrl": "", + "serviceAccountKey": "", + "tlsConfig": { + "tlsCipherSuites": ["TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256","TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256","TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384","TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384","TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256","TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256"], + "httpsInsecureSkipVerify": true + }, + "TLSConfig": { + "tlsCipherSuites": ["TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256","TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256","TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384","TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384","TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256","TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256"], + "httpsInsecureSkipVerify": true + } + }, + "routerEtcd": { + "servers": ["{{ .Values.global.etcdManagement.detcd }}"], + {{- if eq .Values.global.etcdManagement.authType "TLS" }} + "sslEnable": true, + {{- else }} + "sslEnable": false, + {{- end}} + "user":"", + "password":"", + "authType": {{ quote .Values.global.etcdManagement.authType }}, + "useSecret": {{ .Values.global.etcdManagement.useSecret }}, + "secretName": {{ quote .Values.global.etcdManagement.secretName }} + }, + "metaEtcd": { + "servers": ["{{ .Values.global.etcdManagement.detcd }}"], + {{- if eq .Values.global.etcdManagement.authType "TLS" }} + "sslEnable": true, + {{- else }} + "sslEnable": false, + {{- end}} + "user":"", + "password":"", + "authType": {{ quote .Values.global.etcdManagement.authType }}, + "useSecret": {{ .Values.global.etcdManagement.useSecret }}, + "secretName": {{ quote .Values.global.etcdManagement.secretName }} + }, + "tlsConfig": { + "caContent": "${CA_CONTENT}", + "keyContent": "${KEY_CONTENT}", + "certContent": "${CERT_CONTENT}" + } + } \ No newline at end of file diff --git a/deploy/k8s/charts/templates/scheduler/scheduler.yaml b/deploy/k8s/charts/templates/scheduler/scheduler.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be21136e297de68096db2cbe2904d070787f748e --- /dev/null +++ b/deploy/k8s/charts/templates/scheduler/scheduler.yaml @@ -0,0 +1,452 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: faas-scheduler + namespace: {{ .Values.global.namespace }} +spec: + replicas: {{ .Values.global.replicas.faasScheduler }} + selector: + matchLabels: + app: faas-scheduler + template: + metadata: + labels: + app: faas-scheduler + spec: + volumes: + - name: volume-config + configMap: + name: scheduler-config + items: + - key: config.json + path: config.json + defaultMode: 420 + - name: iam-policy-config-volume + configMap: + name: iam-policy-config + items: + - key: iam-policy-config.json + path: iam-policy-config.json + defaultMode: 416 + - name: log-volume + hostPath: + path: "{{ .Values.global.log.hostPath.componentLog }}" + type: DirectoryOrCreate + - name: data-volume + emptyDir: + sizeLimit: 5Gi + initContainers: + - name: agent-init-container + image: {{ .Values.global.imageRegistry }}{{ .Values.global.images.agentInit }} + command: ["/bin/sh", "-c", "chown 1002:1002 /opt/yuanrong/logs && chmod 777 /opt/yuanrong/logs"] + resources: + limits: + cpu: {{ .Values.global.resources.functionAgentInit.limits.cpu }} + memory: {{ .Values.global.resources.functionAgentInit.limits.memory }} + requests: + cpu: {{ .Values.global.resources.functionAgentInit.requests.cpu }} + memory: {{ .Values.global.resources.functionAgentInit.requests.memory }} + volumeMounts: + - name: log-volume + mountPath: /opt/yuanrong/logs + subPathExpr: $(RUNTIME_POD_NAME) + env: + - name: RUNTIME_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: IfNotPresent + securityContext: + capabilities: + add: + - CHOWN + - NET_RAW + - NET_ADMIN + - SYS_ADMIN + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + - FOWNER + - FSETID + drop: + - ALL + runAsUser: 0 + containers: + - name: service-container + image: {{ .Values.global.imageRegistry }}{{ .Values.global.images.common }} + command: ["/bin/sh", "-l", "/home/sn/scheduler/bin/bootstrap"] + ports: + - containerPort: {{ .Values.global.port.faasSchedulerPort }} + protocol: TCP + env: + - name: RUNTIME_HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: RUNTIME_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: RUNTIME_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: RUNTIME_POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: RUNTIME_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: RUNTIME_POD_UID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.uid + - name: X_WISECLOUD_SITE + value: aaa.bbb + - name: X_WISECLOUD_TENANT_ID + value: aaa.bbb + - name: X_WISECLOUD_APPLICATION_ID + value: aaa.bbb + - name: X_WISECLOUD_SERVICE_ID + value: aaa.bbb + - name: X_WISECLOUD_ENVIRONMENT_ID + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_NAME + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_CLUSTER + value: cn-dev-scheduler-green + - name: LANG + value: en_US.UTF-8 + - name: RUNTIME_MICROSERVICE_ENVIRONMENT + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_SERVICE_NAME + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_BUSINESS + value: aaa.bbb + - name: RUNTIME_MICROSERVICE_REGION + value: cn-north-4 + - name: RUNTIME_MICROSERVICE_AZ + value: cn-north-4g + - name: X_WISECLOUD_CLOUDMAP_ID + value: aaa.bbb + - name: NUWA_CLOUDMAP_NAMESPACENAME + value: aaa.bbb + - name: NUWA_CLOUDMAP_SERVERADDR + value: + - name: NUWA_CLOUDMAP_DUAL_SERVERADDR + value: + - name: TZ + value: Asia/Shanghai + - name: WISECLOUD_ACMS_ENDPOINT + value: + - name: YR_LOG_LEVEL + value: DEBUG + - name: FUNCTION_AGENT_PORT + value: "{{ .Values.global.port.functionAgentPort }}" + - name: RUNTIME_MGR_PORT + value: "{{ .Values.global.port.runtimeMgrPort }}" + - name: RUNTIME_INIT_PORT + value: "{{ .Values.global.port.runtimeInitPort }}" + - name: RUNTIME_PORT_NUM + value: "{{ .Values.global.port.runtimePortNum }}" + - name: METRICS_COLLECTOR_TYPE + value: {{ .Values.global.runtime.metricsCollectorType }} + - name: DISK_USAGE_MONITOR_PATH + value: {{ quote .Values.global.runtime.diskUsageMonitor.path }} + - name: DISK_USAGE_LIMIT + value: "{{ .Values.global.runtime.diskUsageMonitor.limit }}" + - name: DISK_USAGE_MONITOR_DURATION + value: "{{ .Values.global.runtime.diskUsageMonitor.duration }}" + - name: CPU4COMP + value: "5000" + - name: MEM4COMP + value: "10240" + - name: INIT_LABELS + value: '{"resource.owner":"30450000-0000-4000-8069-949f37caf04c"}' + - name: RUNTIME_LOG_DIR + value: /opt/yuanrong/logs + - name: RUNTIME_LOG_LEVEL + value: INFO + - name: IS_NEW_RUNTIME_PATH + value: "true" + - name: JAVA_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.java8 }}" + - name: JAVA11_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.java11 }}" + - name: PYTHON36_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.python36 }}" + - name: PYTHON38_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.python38 }}" + - name: PYTHON39_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.python39 }}" + - name: CPP_PRESTART_COUNT + value: "{{ .Values.global.runtime.prestartCount.cpp }}" + - name: JVM_CUSTOM_ARGS + value: "{{ .Values.global.runtime.jvmCustomArgs }}" + - name: RUNTIME_GID + value: "1002" + - name: RUNTIME_UID + value: "1002" + - name: INIT_HANDLER + value: faasscheduler.InitHandler + - name: CALL_HANDLER + value: faasscheduler.CallHandler + - name: CHECKPOINT_HANDLER + value: faasscheduler.CheckpointHandler + - name: SHUTDOWN_HANDLER + value: faasscheduler.ShutdownHandler + - name: SIGNAL_HANDLER + value: faasscheduler.SignalHandler + - name: YR_FUNCTION_LIB_PATH + value: /home/sn/scheduler/bin + - name: INIT_ARGS_FILE_PATH + value: /home/sn/config/config.json + - name: GLOG_log_dir + value: /opt/yuanrong/logs + - name: LOG_PATTERN + value: |- + { + "separator": " | ", + "placeholders": [ + {"flags": "%Y-%m-%d %H:%M:%S.%e"}, + {"flags": "%l"}, + {"flags": "%s:%#"}, + {"env": "POD_NAME"}, + {"env": "CLUSTER_ID"}, + {"flags": ""} + ] + } + - name: IS_PROTOMSG_TO_RUNTIME + value: "{{ .Values.global.runtime.isProtoMsgToRuntime }}" + - name: MAX_PRIORITY + value: "{{ .Values.global.common.prioritySchedule.maxPriority }}" + - name: CLUSTER_NAME + value: "cn-dev" + - name: FUNCTION_PROXY_PORT + value: "{{ .Values.global.port.functionProxyPort }}" + - name: FUNCTION_PROXY_GRPC_PORT + value: "{{ .Values.global.port.functionProxyGrpcPort }}" + - name: META_STORE_ADDRESS + {{- if .Values.global.metaStore.enable }} + value: "{{ .Values.global.metaStore.address }}" + {{- else }} + value: "{{ .Values.global.etcdManagement.detcd }}" + {{- end }} + - name: IAM_META_STORE_ADDRESS + {{- if .Values.global.metaStore.enable }} + value: "{{ .Values.global.metaStore.address }}" + {{- else }} + value: "{{ .Values.global.etcdManagement.metcd }}" + {{- end }} + - name: SslTargetName + value: "aaa.bbb" + - name: DS_WORKER_PORT + value: "{{ .Values.global.port.worker }}" + - name: ENABLE_TRACE + value: "{{ .Values.global.observer.enableTrace }}" + - name: LOG_PATH + value: "/opt/yuanrong/logs" + - name: LOG_LEVEL + value: "DEBUG" + - name: LOG_ROLLING_MAXSIZE + value: "1000" + - name: LOG_ROLLING_MAXFILES + value: "3" + - name: LOG_ASYNC_LOGBUFSECS + value: "30" + - name: LOG_ASYNC_MAXQUEUESIZE + value: "51200" + - name: LOG_ASYNC_THREADCOUNT + value: "1" + - name: LOG_ALSOLOGTOSTDERR + value: "false" + - name: ENABLE_METRICS + value: "{{ .Values.global.observer.metrics.enable }}" + - name: STS_CONFIG + value: "{}" + - name: METRICS_CONFIG_FILE + value: '{{ quote .Values.global.observer.metrics.metricsConfigFile }}' + - name: MEM_THRESHOLD_PERCENTAGE + value: "90" + - name: ResourcePath + value: "/home/wisfunction/resource" + - name: RUNTIME_HEARTBEAT_ENABLE + value: "true" + - name: RUNTIME_MAX_HEARTBEAT_TIMEOUT_TIMES + value: "{{ .Values.global.runtime.runtimeMaxHeartbeatTimeoutTimes }}" + - name: MAX_STORAGE_OPERATE_RETRY_TIMES + value: "60" + - name: RUNTIME_HEARTBEAT_TIMEOUT_MS + value: "{{ .Values.global.runtime.runtimeHeartbeatTimeoutMS }}" + - name: RUNTIME_RECOVER_ENABLE + value: "false" + - name: DRIVER_ENABLE + value: "{{ .Values.global.common.driverEnable }}" + - name: STATE_STORAGE_TYPE + value: "{{ .Values.global.common.stateStorageType }}" + - name: ELECTION_MODE + value: "{{ .Values.global.common.electionMode }}" + - name: MAX_GRPC_SIZE + value: "{{ .Values.global.common.maxGrpcSize }}" + - name: DS_HEALTH_CHECK_INTERVAL + value: "1000" + - name: MAX_DS_HEALTH_CHECK_TIMES + value: "12" + - name: DS_HEALTH_CHECK_PATH + value: "/home/sn/datasystem/health" + - name: SERVICES_PATH + value: "/home/sn/service-config/services.yaml" + - name: SYSTEM_TIMEOUT + value: "{{ .Values.global.common.systemTimeout }}" + - name: SERVICE_TTL + value: "60000" + - name: RUNTIME_SHUTDOWN_TIMEOUT_SECONDS + value: "{{ .Values.global.runtime.runtimeShutdownTimeoutSeconds }}" + - name: CACHE_STORAGE_AUTH_ENABLE + value: "{{ .Values.global.dataSystem.authEnabled }}" + - name: SSL_ENABLE + value: "{{ .Values.global.mutualSSLConfig.sslEnable }}" + - name: DECRYPT_ALGORITHM + value: "{{ .Values.global.common.decryptAlgorithm }}" + - name: MIN_INSTANCE_CPU_SIZE + value: "{{ .Values.global.runtime.minInstanceCpuSize }}" + - name: MIN_INSTANCE_MEMORY_SIZE + value: "{{ .Values.global.runtime.minInstanceMemorySize }}" + - name: MAX_INSTANCE_CPU_SIZE + value: "{{ .Values.global.runtime.maxInstanceCpuSize }}" + - name: MAX_INSTANCE_MEMORY_SIZE + value: "{{ .Values.global.runtime.maxInstanceMemorySize }}" + - name: ENABLE_SERVER_MODE + value: "{{ .Values.global.runtime.serverModeEnable }}" + - name: LOG_COMPRESS_ENABLE + value: "true" + - name: ENABLE_PRINT_RESOURCE_VIEW + value: "{{ .Values.global.common.enablePrintResourceView }}" + - name: PROMETHEUS_PUSH_GATEWAY_IP + value: "{{ .Values.global.observer.proGatewayIP }}" + - name: PROMETHEUS_PUSH_GATEWAY_PORT + value: "{{ .Values.global.observer.gatewayPort }}" + - name: CLUSTER_ID + value: {{ quote .Values.global.clusterId }} + - name: INVOKE_LIMITATION_ENABLE + value: "{{ .Values.global.rateLimit.invokeRateLimit.enable }}" + - name: LOW_MEMORY_THRESHOLD + value: "{{ .Values.global.rateLimit.invokeRateLimit.lowThreshold }}" + - name: HIGH_MEMORY_THRESHOLD + value: "{{ .Values.global.rateLimit.invokeRateLimit.highThreshold }}" + - name: MESSAGE_SIZE_THRESHOLD + value: "{{ .Values.global.rateLimit.invokeRateLimit.msgSize }}" + - name: RUNTIME_DS_AUTH_ENABLE + value: "{{ .Values.global.runtime.dataSystem.authEnable }}" + - name: RUNTIME_DS_ENCRYPT_ENABLE + value: {{ quote .Values.global.runtime.dataSystem.encryptEnable }} + - name: RUNTIME_DS_CLIENT_PUBLICKEY + value: "" + - name: RUNTIME_DS_CLIENT_PRIVATEKEY + value: "" + - name: RUNTIME_DS_SERVER_PUBLICKEY + value: "" + - name: ETCD_AUTH_TYPE + value: "{{ .Values.global.etcdManagement.authType }}" + - name: SCC_ENABLE + value: "{{ .Values.global.scc.enable }}" + - name: IAM_POLICY_CONFIG_PATH + value: "/home/sn/iam-config/iam-policy-config.json" + - name: ENABLE_IAM + value: "{{ .Values.global.iam.enable }}" + - name: IAM_BASE_PATH + value: "http://iam-adaptor.{{ .Values.global.namespace }}.svc.cluster.local:{{ .Values.global.port.iamAdapterHttpPort }}" + - name: IAM_CREDENTIAL_TYPE + value: "{{ .Values.global.iam.credentialType }}" + - name: K8S_BASE_URL + value: "{{ .Values.global.kubernetes.kubeApiBaseUrl }}" + - name: K8S_NAMESPACE + value: "default" + - name: MAX_TOLERATE_META_STORE_FAILED_TIMES + value: "{{ .Values.global.etcdManagement.maxTolerateMetaStoreFailedTimes }}" + - name: META_HEALTH_CHECK_INTERVAL_MS + value: "{{ .Values.global.etcdManagement.metaStoreCheckHealthIntervalMs }}" + - name: META_HEALTH_CHECK_TIMEOUTS + value: "{{ .Values.global.etcdManagement.metaStoreTimeoutMs }}" + - name: SYSTEM_AUTH_MODE + value: "{{ .Values.global.common.systemAuthMode }}" + - name: ETCD_TARGET_NAME_OVERRIDE + value: "aaa.bbb" + - name: CACHE_STORAGE_AUTH_TYPE + value: "" + - name: FUNCTION_META_PATH + value: "/home/sn/function-metas" + - name: RESOURCE_PATH + value: "/home/sn/resource" + - name: LIB_PATH + value: "{{ .Values.global.runtime.libPath }}" + - name: ENABLE_AGENT_CRD_REGISTRY + value: "true" + resources: + limits: + cpu: {{ .Values.global.resources.faasScheduler.limits.cpu }} + memory: {{ .Values.global.resources.faasScheduler.limits.memory }} + requests: + cpu: {{ .Values.global.resources.faasScheduler.requests.cpu }} + memory: {{ .Values.global.resources.faasScheduler.requests.memory }} + volumeMounts: + - name: volume-config + mountPath: /home/sn/config + - name: log-volume + mountPath: /opt/yuanrong/logs + subPathExpr: $(RUNTIME_POD_NAME) + - name: data-volume + mountPath: /opt/yuanrong/data + - name: iam-policy-config-volume + mountPath: /home/sn/iam-config + livenessProbe: + tcpSocket: + port: {{ .Values.global.port.faasSchedulerPort }} + initialDelaySeconds: 6 + timeoutSeconds: 5 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + tcpSocket: + port: {{ .Values.global.port.faasSchedulerPort }} + initialDelaySeconds: 6 + timeoutSeconds: 5 + periodSeconds: 5 + successThreshold: 1 + failureThreshold: 10 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - kill -15 $(ps aux | grep -i function_proxy | awk '{print $2}') 2>/dev/null + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 1002 + runAsNonRoot: true + restartPolicy: Always + serviceAccount: faas-scheduler + serviceAccountName: faas-scheduler + terminationGracePeriodSeconds: 300 + dnsPolicy: ClusterFirst + securityContext: + fsGroup: {{ .Values.global.runtime.fsGroup }} + imagePullSecrets: + - name: default-secret + revisionHistoryLimit: 5 diff --git a/deploy/k8s/charts/templates/scheduler/service_account.yaml b/deploy/k8s/charts/templates/scheduler/service_account.yaml new file mode 100644 index 0000000000000000000000000000000000000000..06384f7a1a614a265dce615ddf9914872b96fb48 --- /dev/null +++ b/deploy/k8s/charts/templates/scheduler/service_account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: faas-scheduler + namespace: "default" \ No newline at end of file diff --git a/deploy/k8s/charts/values.yaml b/deploy/k8s/charts/values.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5982e8ad622aeb749cb6503b1978bd8396362ce5 --- /dev/null +++ b/deploy/k8s/charts/values.yaml @@ -0,0 +1,559 @@ +# Default values for YuanRong. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +global: + litebusDataKey: "AB05D39677C54E231CC4224812F1C51E95D72792F1F1A4C338F4404D80017860" + namespace: "default" + # cluster ID + clusterId: "" + # Configure priorityClass. + # If the value is false, the default priorityClass is system-cluster-critical. + # If the value is true, a priorityClass with preemptionPolicy Never is created. + enableNonPreemptive: false + imageRegistry: + images: + common: functionsystem:version_replace + agentInit: function-agent-init:version_replace + functionAgent: function-agent:version_replace + datasystem: datasystem:version_replace + runtimeManager: runtime-manager:version_replace + resources: + functionProxy: + limits: + cpu: "500m" + memory: "1Gi" + requests: + cpu: "500m" + memory: "1Gi" + faasFrontend: + limits: + cpu: "500m" + memory: "1Gi" + requests: + cpu: "500m" + memory: "1Gi" + faasScheduler: + limits: + cpu: "500m" + memory: "1Gi" + requests: + cpu: "500m" + memory: "1Gi" + functionMaster: + limits: + cpu: "500m" + memory: "1Gi" + requests: + cpu: "500m" + memory: "1Gi" + iamAdapter: + limits: + cpu: "500m" + memory: "1Gi" + requests: + cpu: "500m" + memory: "1Gi" + functionAgentInit: + limits: + cpu: "300m" + memory: "800Mi" + requests: + cpu: "300m" + memory: "800Mi" + functionAgentRuntime: + limits: + cpu: "500m" + memory: "1Gi" + requests: + cpu: "500m" + memory: "1Gi" + functionAgentProxy: + limits: + cpu: "500m" + memory: "1Gi" + requests: + cpu: "500m" + memory: "1Gi" + functionAgent: + limits: + cpu: "1" + memory: "500Mi" + requests: + cpu: "500m" + memory: "500Mi" + # component port + port: + iamAdapterHttpPort: 31218 + fassFrontendPort: 8888 + faasFrontendNodePort: 31222 + faasSchedulerPort: 9994 + functionProxyPort: 22423 + functionProxyGrpcPort: 32568 + functionAgentPort: 58866 + runtimeMgrPort: 21005 + runtimeInitPort: 21006 + runtimePortNum: 65535 + functionMasterPort: 22668 + worker: 31501 + # Number of pods + replicas: + functionAgent: 0 + functionMaster: 1 + iamAdaptor: 1 + faasScheduler: 2 + faasFrontend: 1 + faasManager: 1 + + # common log config + log: + # log output host path + hostPath: + # logs of components + componentLog: /var/paas/sys/log/cff/default/componentlogs + # logs of runtime + serviceLog: /var/paas/sys/log/cff/default/servicelogs + # logs of user functions std + userLog: /var/paas/sys/log/cff/default/processrouters/stdlogs + enable: true + functionSystem: + path: /home/sn/log + level: INFO + pattern: "" + compress: true + rolling: + # single file size, unit:M + maxSize: 1000 + # Number of rollback files + maxfiles: 3 + async: + # Cache time, unit: second + logBufSecs: 30 + # runtime log config + runtime: + path: /home/snuser/log + level: INFO + expiration: + # Enable runtime log expiration + enable: true + # Check the time interval for expired logs, unit in seconds, default is 10 minutes: 60 * 10. It needs to be explicitly less than the timeThreshold, otherwise it is illegal. + cleanupInterval: 600 + # The maximum retention time for expired log files, in seconds, is 5 days by default: 60 * 60 * 24 * 5. + timeThreshold: 432000 + # The maximum number of expired log files to be retained, in units of pieces, is 512 by default. + maxFileCount: 512 + # enable log prefix reuse, default is false + logReuseEnable: false + userLogExportMode: "file" + # for user function standard output logs + userOutput: + path: /home/snuser/log/instances + level: INFO + # affinity config for control plane component (function-master、iam-server、meta-service、admin、repo) + controlPlane: + nodeSelector: {} + tolerations: [] + nodeAffinity: {} + # affinity config for function-proxy + functionProxy: + nodeSelector: {} + tolerations: [] + nodeAffinity: {} + + # iam + iam: + enable: false + # credential type 'token' or 'AK/SK' + credentialType: "AK/SK" + # unit: second + tokenExpiredTimeSpan: 86400 + iamEndpoint: "" + + # network config + network: + elbId: "" + + # rate limit + rateLimit: + # If true, busproxy will start to limit invoke requests + invokeRateLimit: + enable: true + lowThreshold: 0.6 + highThreshold: 0.8 + msgSize: 20480 + + # opentelemetry + observer: + trace: + enable: false + traceConfig: "{\"otlpGrpcExporter\":{\"enable\":false,\"endpoint\":\"\"}}" + runtimeTraceConfig: "{\"otlpGrpcExporter\":{\"enable\":false,\"endpoint\":\"\"}}" + enableTrace: false + # common metrics config + metrics: + enable: false + sslEnable: false + # metric config for component, metrics dir please configure it to /home/sn/metrics + # example: "{\"backends\":[{\"immediatelyExport\":{\"name\":\"LingYun\",\"enable\":false,\"exporters\":[{\"prometheusPushExporter\":{\"enable\":true,\"initConfig\":{\"ip\":\"prometheus-pushgateway.default.svc.cluster.local\",\"port\":9091}}}]}},{\"batchExport\":{\"name\":\"LakeHouse\",\"enable\":true,\"exporters\":[{\"prometheusPushExporter\":{\"enable\":true,\"batchSize\":2,\"batchIntervalSec\":10,\"failureQueueMaxSize\":3,\"failureDataDir\":\"/home/sn/metrics/failure\",\"failureDataFileMaxCapacity\":1,\"initConfig\":{\"ip\":\"yr-test-collector.default.svc.cluster.local\",\"port\":5000}}},{\"fileExporter\":{\"enable\":true,\"batchSize\":2,\"batchIntervalSec\":10,\"failureQueueMaxSize\":3,\"failureDataDir\":\"/home/sn/metrics/failure\",\"failureDataFileMaxCapacity\":1,\"initConfig\":{\"fileDir\":\"\",\"rolling\":{\"enable\":true,\"maxFiles\":3,\"maxSize\":10000},\"contentType\":\"STANDARD\"}}}]}}]}" + metricsConfig: "" + metricsConfigFile: "" + # metric config for runtime, metrics dir please configure it to /home/snuser/metrics + # example: "{\"backends\":[{\"immediatelyExport\":{\"name\":\"LingYun\",\"enable\":false,\"exporters\":[{\"prometheusPushExporter\":{\"enable\":true,\"initConfig\":{\"ip\":\"prometheus-pushgateway.default.svc.cluster.local\",\"port\":9091}}}]}},{\"batchExport\":{\"name\":\"LakeHouse\",\"enable\":true,\"exporters\":[{\"prometheusPushExporter\":{\"enable\":true,\"batchSize\":2,\"batchIntervalSec\":10,\"failureQueueMaxSize\":3,\"failureDataDir\":\"/home/snuser/metrics/failure\",\"failureDataFileMaxCapacity\":1,\"initConfig\":{\"ip\":\"yr-test-collector.default.svc.cluster.local\",\"port\":5000}}},{\"fileExporter\":{\"enable\":true,\"batchSize\":2,\"batchIntervalSec\":10,\"failureQueueMaxSize\":3,\"failureDataDir\":\"/home/sn/metrics/failure\",\"failureDataFileMaxCapacity\":1,\"initConfig\":{\"fileDir\":\"\",\"rolling\":{\"enable\":true,\"maxFiles\":3,\"maxSize\":10000},\"contentType\":\"STANDARD\"}}}]}}]}" + runtimeMetricsConfig: "" + runtimeMetricsConfigFile: "" + path: + file: /home/sn/metrics/file + failure: /home/sn/metrics/failure + # metrics output host path + hostPath: + # metrics failed to report + failureMetrics: /var/paas/sys/metrics/cff/default/failureMetrics + failureFileEnable: true + # prometheus pushgateway ip + proGatewayIP: "prometheus-pushgateway.default.svc.cluster.local" + # prometheus pushgateway port + gatewayPort: 9091 + redisManagement: + serverAddr: "" + password: "" + port: 6379 + connectTimeout: 1 # second + commandTimeout: 1 # second + needAuth: false + pool: + poolSize: 1 + gracePeriodSeconds: 30 + accelerator: "" + # failure threshold for function-agent readiness probe + readinessProbeFailureThreshold: 3 + # failure threshold for function-agent liveness probe + livenessProbeFailureThreshold: 3 + # affinity config for default pool + nodeSelector: { } + tolerations: [ ] + nodeAffinity: { } + requestCpu: 500 + requestMemory: 1024 + requestEphemeralStorage: 2048 + limitCpu: 500 + limitMemory: 1024 + limitEphemeralStorage: 2048 + # tenant isolation + tenantIsolation: + # tenant affinity + affinity: + enable: false + tenantPodReuseTimeWindow: -1 + # network isolation + ipv4: + enable: false + # function-agent POD port whitelist based on iptables + tcpPortWhitelist: "31501" + udpPortWhitelist: "" + # third party whitelist + # Explanation: + # Rules: Record like 'domain,IP,port1/protocol,port2/protocol;' Separate multiple records with semicolons. + # Please note that the IP address may be 'None'. If no IP address is specified, please enter 'None' and the cluster will resolve it based on the service domain. The current default configuration does not specify an IP address. + thirdPartyWhitelist: kubernetes.default.svc.cluster.local,None,443/TCP,;minio.default.svc.cluster.local,None,9000/TCP;ds-core-etcd.default.svc.cluster.local,None,2379/TCP,2380/TCP;ds-core-etcd-headless.default.svc.cluster.local,None,2379/TCP,2380/TCP; + # runtime config + runtime: + snuserLibPath: /home/snuser/snlib + virtualEnvIdleTimeLimit: -1 + requestAckAccMaxSec: 60 + serverModeEnable: true + cleanStreamProducerEnable: true + metricsCollectorType: proc + runtimeShutdownTimeoutSeconds: 600 + # kill -SIGKILL process after kill -SIGINT + killProcessTimeoutSeconds: 0 + runtimeHeartbeatTimeoutMS: 5000 + runtimeMaxHeartbeatTimeoutTimes: 5 + minInstanceCpuSize: "300" + maxInstanceCpuSize: "16000" + minInstanceMemorySize: "128" + # 1024*1024*1024 MB + maxInstanceMemorySize: "1073741824" + migratePrefix: "" + taintToleranceList: "" + migrateEnable: false + npuCollectionMode: "all" + gpuCollectionEnable: false + useAscendCustomConfig: false + massifEnable: false + # runtime ds-client connection timeout(s) + runtimeDsConnectTimeout: 60 + fsGroup: 1002 + # disk usage check config + diskUsageMonitor: + # notify instances failed in the POD + notifyFailureEnable: false + # force delete pod when disk usage exceed limit + forceDeletePODEnable: true + # custom disk monitor path, you can config multi dirs, such as: /dir1;/dir2;/dir3 + path: /tmp + # disk usage limit(MB), -1 means disable + limit: -1 + # disk usage limit(MB) for /home/snuser, -1 means disable + snuserDirSizeLimit: -1 + # disk usage limit(MB) for temp dir( /tmp & /var/tmp ), -1 means disable + tmpDirSizeLimit: -1 + # disk usage monitor duration(ms) + duration: 1000 + prestartCount: + java8: 0 + java11: 0 + python36: 0 + python37: 0 + python38: 0 + python39: 0 + python310: 0 + python311: 0 + cpp: 0 + # jvm prestart custom args + # example '["-XX:+UseG1GC","-XX:+TieredCompilation"]' + jvmCustomArgs: '[]' + # default args for runtime + defaultArgs: + java8: '["-XX:InitialRAMPercentage=35.0", "-XX:+UseConcMarkSweepGC", "-XX:+CMSClassUnloadingEnabled", "-XX:+CMSIncrementalMode", "-XX:+CMSScavengeBeforeRemark", "-XX:+UseCMSInitiatingOccupancyOnly", "-XX:CMSInitiatingOccupancyFraction=70", "-XX:CMSFullGCsBeforeCompaction=5", "-XX:MaxGCPauseMillis=200", "-XX:+ExplicitGCInvokesConcurrent", "-XX:+ExplicitGCInvokesConcurrentAndUnloadsClasses"]' + java11: '["-XX:+UseG1GC", "-XX:MaxRAMPercentage=80.0", "-XX:+TieredCompilation"]' + java17: '["-XX:+UseZGC", "-XX:+AlwaysPreTouch", "-XX:+UseCountedLoopSafepoints", "-XX:+TieredCompilation", "--add-opens=java.base/java.util=ALL-UNNAMED", "--add-opens=java.base/java.lang=ALL-UNNAMED", "--add-opens=java.base/java.net=ALL-UNNAMED", "--add-opens=java.base/java.io=ALL-UNNAMED", "--add-opens=java.base/java.math=ALL-UNNAMED", "--add-opens=java.base/java.time=ALL-UNNAMED", "--add-opens=java.base/java.text=ALL-UNNAMED", "--enable-preview"]' + java21: '["-XX:+UseZGC", "-XX:+ZGenerational", -XX:+AlwaysPreTouch", "-XX:+UseCountedLoopSafepoints", "-XX:+TieredCompilation", "--add-opens=java.base/java.util=ALL-UNNAMED", "--add-opens=java.base/java.lang=ALL-UNNAMED", "--add-opens=java.base/java.net=ALL-UNNAMED", "--add-opens=java.base/java.io=ALL-UNNAMED", "--add-opens=java.base/java.math=ALL-UNNAMED", "--add-opens=java.base/java.time=ALL-UNNAMED", "--add-opens=java.base/java.text=ALL-UNNAMED", "--enable-preview"]' + # runtime and ds worker authentication config + dataSystem: + authEnable: false + encryptEnable: false + oomKill: + # Memory detection interval for runtime process, unit in mili seconds, default is 1000 ms + memoryDetectionInterval: 1000 + # Enable runtime oom detection kill + enable: false + # the control limit for the runtime OOM kill based on process memory usage, unit is MB. + # For example: + # - a value of -5 sets the memory limit to -5MB (e.g., for a 300-128 function, the memory ceiling is 123MB), enabling proactive OOM prevention. + # - a value of 5 sets the memory limit to +5MB (e.g., for a 300-128 function, the memory ceiling is 133MB), allowing for some degree of overcommit. + controlLimit: 0 + # Number of consecutive times the memory usage must exceed the control limit before triggering OOM kill + consecutiveDetectionCount: 3 + # The type of data transferred to runtime + isProtoMsgToRuntime: true + # runtime and ds worker authentication config + curve: + # Worker's public key in the curve encryption environment. + workerPublicKey: "" + # Worker's private key in the curve encryption environment. + workerKeySecret: "" + # Agent's public key in the curve encryption environment. + agentPublicKey: "" + # Agent's private key in the curve encryption environment. + agentKeySecret: "" + # client's private key in the curve encryption environment. + clientPrivateKey: "" + # Client's public key in the curve encryption environment. + clientPublicKey: "" + + ## kubernetes config + kubernetes: + # kube-apiserver's address + kubeApiBaseUrl: https://kubernetes.default.svc.cluster.local:443 + svcCIDR: "10.43.0.0/16" + podCIDR: "10.42.0.0/16" + hostCIDR: "" + kubeClientRetryTime: 5 + kubeClientRetryCycMs: 3000 + healthMonitorMaxFailure: 5 + healthMonitorRetryInterval: 3000 + + # meta-store + metaStore: + enable: false + address: "meta-store.default.svc.cluster.local:22770" + mode: "local" + + sts: + # Sts server address use by sts sdk, format is host:port. + serverDomain: "" + # Sts config use by sts sdk + configPath: "" + # in encrypt_kit=sts scenario, the sts sdk load encrypt info locally, normally is false + # when sts server unavailable for a very long time, can set true to start datasystem independent from sts server. + loaderLocal: true + # Whether to enable STS, which refers to the deployment of STS + enable: false + + + # common config + common: + # system auth mode: { "", "AK/SK" } + systemAuthMode: "" + # code deploy dir, default /dcache + deployDir: /dcache + # CBC STS GCM NO_CRYPTO + decryptAlgorithm: NO_CRYPTO + # function master election mode ("k8s" or "etcd") + electionMode: "txn" + # package storage type : {"s3", "local"} + storageType: s3 + stateStorageType: "datasystem" + # If true, bootstrap will start system function, need system-function-config.json file + functionBootstrapEnable: true + # function bootstrap retry period. (default 5000ms) + functionBootstrapRetryPeriod: 5000 + # If true, bootstrap will start system function, need create secret(localauth) before deploy + secretKeyEnable: false + # Posix max grpc size (MB) + maxGrpcSize: 4 + # If true, function proxy will listen on functionProxyGrpcPort, and accept driver's request + driverEnable: true + # Timeout between function-system components, default 60000 (ms) + systemTimeout: 60000 + # domain heartbeat timeout, default 6000 (ms) + domainHeartbeatTimeout: 6000 + # The TTL time of busproxy, default 60000 (ms) + busproxyServiceTTL: 60000 + # whether print resource view when schedule failed, which will affect performance in big scale + enablePrintResourceView: false + # The max retry times for function agent mgr send request to function-agent [0-100] + functionAgentMgrRetryTimes: 9 + # The retry interval for function agent mgr send request to function-agent(Unit:ms) [5000-60000] + functionAgentMgrRetryInterval: 20000 + # if some node has one of these labels, and the worker is not deployed or fault, won't add taint to this node + # example: label1=key1;label2=key2 + workerTaintExcludes: "" + # When node is tainted with the key, instances on the node will be evicted + evictedTaintKey: "" + # self taint key prefix (when scaler add/remove node taint with prefix) + selfTaintPrefix: "" + # grace period when delete pod (s) + deletePodGracePeriodSeconds: 25 + # enable function-master host network + masterHostNetworkEnable: false + # upper limit of concurrent num + concurrentNumUpperLimit: 1000 + # enable the relaxed scheduling policy. When the relaxed number of available nodes or pods is selected, + # the scheduling progress exits without traversing all nodes or pods.(default -1) + scheduleRelaxed: -1 + prioritySchedule: + # instance schedule maximum priority + # if 0 indicates default schedule strategy, other value greater than 0 indicates priority schedule strategy + maxPriority: 0 + # only valid while maxPriority > 0 + enablePreemption: false + # Explain Req Aggregate Strategy + # three options : no_aggregate strictly relaxed + # no_aggregate: Requests are not aggregated based on resource usage(CPU and Memory) + # strictly: Aggregate requests in the first-in, first-out mode. + # relaxed:Requests are not aggregated in a first-in, first-out order to some extent. + aggregatedStrategy: "no_aggregate" + # schedule plugins need to be registered + schedulePlugins: + local: "[\"Label\", \"ResourceSelector\", \"Default\", \"Heterogeneous\"]" + domain: "[\"Label\", \"ResourceSelector\", \"Default\", \"Heterogeneous\"]" + # config code aging time, unit:seconds [0-3600], code package will be clear if code is not referred by any instance and exceed this time + codeAgingTime: 0 + # Limit of the function package deployed by users + zipFile: + # Maximum zip file size + # if the file size is more than 500 MB, the cluster may break down. + zipFileSizeMax: 300 + # Maximum unzip file size + unzipFileSizeMax: 600 + # Maximum number of files + fileCountsMax: 30000 + # Maximum directory level + dirDepthMax: 20 + # enable signature validation + signatureValidationEnable: false + # Limit of the layer deployed by users + layer: + # Maximum number of layers + layerMaxNumber: 5 + # proxy only partial watch instances + isPartialWatchInstances: false + # cluster list for function instance + instanceClusterList: "" + # The maximum number of versions that can be published for a function. + maxFunctionVersion: 8 + # The maximum number of labels that can be created for the same function version + maxInstanceLabel: 100 + + etcdManagement: + useSecret: false + secretName: "etcd-client-secret" + detcd: base-etcd.default.svc.cluster.local:2379 + # auth type contains: Noauth PWD TLS STS + authType: "Noauth" + # The configuration value should be consistent with the DNS content of the Subject Alternate Names of the TLS certificate. + targetNameOverride: "" + # meta-etcd addr + metcd: base-etcd.default.svc.cluster.local:2379 + # meta etcd parameters + # maximum number of etcd healthy check failures that can be tolerated + maxTolerateMetaStoreFailedTimes: 12 + # meta store health check interval + metaStoreCheckHealthIntervalMs: 5000 + # the timeout of etcd healthcheck rpc + metaStoreTimeoutMs: 10000 + + # obs configurations management + obsManagement: + # obs bucket name + obsBucketName: "bucket-test-log1" + # s3 addr + s3Endpoint: minio.default.svc.cluster.local:9000 + # s3 accessKey + s3AccessKey: "root" + # s3 encrypted secretKey + s3SecretKey: "" + # obs protocol: https, http + protocol: "http" + # credential type: credential_type_rotating_credentials / credential_type_permanent_credentials + credentialType: "credential_type_permanent_credentials" + + # mutual ssl configuration + mutualSSLConfig: + sslEnable: false + sslBasePath: "/home/sn/resource/https" + rootCA: "" + moduleCert: "" + moduleKey: "" + certPwd: "" + serverName: "test" + secretName: "https-secret" + sslDecryptTool: "SCC" # SCC, LOCAL, None + + # system upgrade configs + systemUpgradeConfig: + enable: true + azID: 1 + systemUpgradeKey: "/hms-caas/edgems/upgrade-zones" + systemUpgradeWatchAddress: "base-etcd.default.svc.cluster.local:2379" + + ##dataSystem config + dataSystem: + # Specifies whether auth is enable + authEnabled: false + # auth ak + ak: "" + # encrypted auth sk + sk: "" + # ds auth type (ZMQ AK/SK) + authType: "" + scc: + # If true, SCC encrypt and decrypt will be enabled and using scc secret key + enable: false + secretName: "scc-ks-secret" + primaryContent: "" + standbyContext: "" + # AES256_CBC, AES128_GCM, AES256_GCM, + # SM4_CBC, SM4_CTR, + algorithm: "AES256_GCM" + + tenant: + credential: "967221D8F1630ACBB7EFF58B:6D62A66AF4BE5368FF8AECEF5E572CC3759781822FB02224E8151C6ED151A6D4A0435550678F4A914318E3389116BFB963FC0FEABA3B18A04EC85B804CB4A371AB082962DDD7D2B6C48A0295AB66B4AE4E74F124F4A0D3475A92E05B175F84F0D7282418048AC909104C509CF13EF841B36619888BD339CB10CCE90374170D0A425BF39B8C887E31185CFB61C161D58024EB6E604721AF34473EA3F65C509959758C761EFDBF5DE9E60F95AADA63739BCA7DE7EBFE9D75900365AE93947DAA6DF6F885039A13564B9F9E787DCCDDCEA3FF35A1A91E4064F2D3FB5878D905537A2285A8E677AFE7C815CF87D006AC0DF6601021369987D37CC5E2889F294A151E81D413A4E641EA6CC68A" + + # ComponentAuth configuration for datasystem + enableComponentAuth: false + # ETCD configuration for datasystem + # Config ETCD server address. + etcdAddress: base-etcd.default.svc.cluster.local:2379 + diff --git a/go/build.sh b/go/build.sh index 7675632041ce967bce9d70aeca159f6492dbfe61..4bac0023f1d31e26e4d729a7de0b2486a4f97d0c 100644 --- a/go/build.sh +++ b/go/build.sh @@ -99,8 +99,10 @@ CC='gcc -fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2' go build -tags="${BUIL cd "${OUTPUT_DIR}" DASHBOARD_TAR_NAME="yr-dashboard-${VERSION}.tar.gz" -tar -czvf "${DASHBOARD_TAR_NAME}" ./* +tar -czvf "${DASHBOARD_TAR_NAME}" ./bin ./config mkdir -p "${RUNTIME_OUTPUT_DIR}" rm -rf "${RUNTIME_OUTPUT_DIR}/${DASHBOARD_TAR_NAME}" cp "${DASHBOARD_TAR_NAME}" "${RUNTIME_OUTPUT_DIR}" -cd "${PROJECT_DIR}" \ No newline at end of file +cd "${PROJECT_DIR}" + +bash -x build/faas/build.sh diff --git a/go/build/faas/build.sh b/go/build/faas/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..e6e71e03c91ddddf05f9000ef27cf0d86b292509 --- /dev/null +++ b/go/build/faas/build.sh @@ -0,0 +1,134 @@ +#!/bin/bash +# Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +BASE_DIR=$(cd "$(dirname "$0")"; pwd) +PROJECT_DIR=$(cd "$(dirname "$0")"/../..; pwd) +RUNTIME_OUTPUT_DIR="${PROJECT_DIR}/../output" +OUTPUT_DIR="${BASE_DIR}/../../output/pattern/pattern_faas" +TAR_OUT_DIR="${PROJECT_DIR}/build/_output" +TAR_OUT_FILE="faasfunctions.tar.gz" +EXECUTOR_DIR="${PROJECT_DIR}/build/faas/executor-meta" +TEST_CERT_PATH="${GOROOT}/src/net/http/internal/testcert.go" +BUILD_TAG_FUNCTION="function" +echo LD_LIBRARY_PATH=$LD_LIBRARY_PATH +MODULE_NAME_LIST=("faasscheduler" "faasmanager") +BUILD_VERSION=v0.5.0 +# go module prepare +export GO111MODULE=on +export GONOSUMDB=* +export CGO_ENABLED=1 +mkdir -p ${OUTPUT_DIR} +# resolve missing go.sum entry +go env -w "GOFLAGS"="-mod=mod" +go install google.golang.org/protobuf/cmd/protoc-gen-go@latest +go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest +# remove hard coded cert file in net/http +[ -f "${TEST_CERT_PATH}" ] && rm -f "${TEST_CERT_PATH}" + +function parse_args () { + getopt_cmd=$(getopt -o v:h -l help -- "$@") + [ $? -ne 0 ] && exit 1 + eval set -- "$getopt_cmd" + while true; do + case "$1" in + -v|--version) BUILD_VERSION=$2 && shift 2 ;; + -h|--help) SHOW_HELP="true" && shift ;; + --) shift && break ;; + *) die "Invalid option: $1" && exit 1 ;; + esac + done + if [ "$SHOW_HELP" != "" ]; then + cat < preferred = 1; + // key: RequiredAffinity RequiredAntiAffinity + map required = 2; +} + +message InstanceAffinity { + // key: PreferredAffinity PreferredAntiAffinity + map preferred = 1; + // key: RequiredAffinity RequiredAntiAffinity + map required = 2; + string topologyKey = 3; // By default, only the node level or function_proxy deamonSet level is supported. +} + +message Affinity { + ResourceAffinity resource = 1; + InstanceAffinity instance = 2; +} + +enum ErrorCode { + ERR_NONE = 0; + ERR_PARAM_INVALID = 1001; + ERR_RESOURCE_NOT_ENOUGH = 1002; + ERR_INSTANCE_NOT_FOUND = 1003; + ERR_INSTANCE_DUPLICATED = 1004; + ERR_INVOKE_RATE_LIMITED = 1005; + ERR_RESOURCE_CONFIG_ERROR = 1006; + ERR_INSTANCE_EXITED = 1007; + ERR_EXTENSION_META_ERROR = 1008; + ERR_USER_CODE_LOAD = 2001; + ERR_USER_FUNCTION_EXCEPTION = 2002; + ERR_REQUEST_BETWEEN_RUNTIME_BUS = 3001; + ERR_INNER_COMMUNICATION = 3002; + ERR_INNER_SYSTEM_ERROR = 3003; + ERR_DISCONNECT_FRONTEND_BUS = 3004; + ERR_ETCD_OPERATION_ERROR = 3005; + ERR_BUS_DISCONNECTION = 3006; + ERR_REDIS_OPERATION_ERROR = 3007; + ERR_NPU_FAULT_ERROR = 3016; +} + +message SmallObject { + string id = 1; + bytes value = 2; // sbuffer +} \ No newline at end of file diff --git a/go/pkg/common/faas_common/protobuf/data_service.proto b/go/pkg/common/faas_common/protobuf/data_service.proto new file mode 100644 index 0000000000000000000000000000000000000000..a61f063c46b6ff26c15ab8c847fddd0c0c5029fb --- /dev/null +++ b/go/pkg/common/faas_common/protobuf/data_service.proto @@ -0,0 +1,117 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +package data_service; + +option go_package = "faas/pkg/common/faas_common/grpc/pb/data;data"; + +message PutRequest { + string objectId = 1; + bytes objectData = 2; + int32 writeMode = 3; // put param, default = 0 + int32 consistencyType = 4; // put param, default = 0 + repeated string nestedObjectIds = 5; + int32 cacheType = 6; +} + +message PutResponse { + int32 code = 1; + string message = 2; +} + +message GetRequest { + repeated string objectIds = 1; + int64 timeoutMs = 2; +} + +message GetResponse { + int32 code = 1; + string message = 2; + repeated bytes buffers = 3; +} + +message IncreaseRefRequest { + repeated string objectIds = 1; + string remoteClientId = 2; +} + +message IncreaseRefResponse { + int32 code = 1; + string message = 2; + repeated string failedObjectIds = 3; +} + +message DecreaseRefRequest { + repeated string objectIds = 1; + string remoteClientId = 2; +} + +message DecreaseRefResponse { + int32 code = 1; + string message = 2; + repeated string failedObjectIds = 3; +} + +message KvSetRequest { + string key = 1; + bytes value = 2; + int32 existence = 3; + int32 writeMode = 4; // set param, default = 0 + uint32 ttlSecond = 5; // set param, default = 0 + int32 cacheType = 6; +} + +message KvSetResponse { + int32 code = 1; + string message = 2; +} + +message KvMSetTxRequest { + repeated string keys = 1; + repeated bytes values = 2; + int32 existence = 3; + int32 writeMode = 4; + uint32 ttlSecond = 5; + int32 cacheType = 6; +} + +message KvMSetTxResponse { + int32 code = 1; + string message = 2; +} + +message KvGetRequest { + repeated string keys = 1; + uint32 timeoutMs = 2; // default = 0 +} + +message KvGetResponse { + int32 code = 1; + string message = 2; + repeated bytes values = 3; +} + +message KvDelRequest { + repeated string keys = 1; +} + +message KvDelResponse { + int32 code = 1; + string message = 2; + repeated string failedKeys = 3; +} \ No newline at end of file diff --git a/go/pkg/common/faas_common/protobuf/function_service.proto b/go/pkg/common/faas_common/protobuf/function_service.proto new file mode 100644 index 0000000000000000000000000000000000000000..9bbcdb92839d3919c10baf1a813609c9968cf103 --- /dev/null +++ b/go/pkg/common/faas_common/protobuf/function_service.proto @@ -0,0 +1,174 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +package function_service; + +import "common_args.proto"; + +option go_package = "faas/pkg/common/faas_common/grpc/pb/function;function"; + +// Core service provides APIs to runtime, +service CoreService { + // Create an instance for specify function + rpc Create (CreateRequest) returns (CreateResponse) {} + // invoke the created instance + rpc Invoke (InvokeRequest) returns (InvokeResponse) {} + // terminate the created instance + rpc Terminate (TerminateRequest) returns (TerminateResponse) {} + // exit the created instance + rpc Exit (ExitRequest) returns (ExitResponse) {} + // save state of the created instance + rpc SaveState (StateSaveRequest) returns (StateSaveResponse) {} + // load state of the created instance + rpc LoadState (StateLoadRequest) returns (StateLoadResponse) {} + // Kill the signal to instance + rpc Kill (KillRequest) returns (KillResponse) {} +} + +enum AffinityType { + PreferredAffinity = 0; + PreferredAntiAffinity = 1; + RequiredAffinity = 2; + RequiredAntiAffinity = 3; +} + +message SchedulingOptions { + int32 priority = 1; + map resources = 2; + map extension = 3; + // will deprecate in future + map affinity = 4; + commonargs.Affinity scheduleAffinity = 5; +} + +message CreateRequest { + string function = 1; + repeated commonargs.Arg args = 2; + SchedulingOptions schedulingOps = 3; + string requestID = 4; + string traceID = 5; + repeated string labels = 6; // "key:value" or "key2" + // optional. if designated instanceID is not empty, the created instance id will be assigned designatedInstanceID + string designatedInstanceID = 7; + map createOptions = 8; +} + +message CreateResponse { + commonargs.ErrorCode code = 1; + string message = 2; + string instanceID = 3; +} + +// gang scheduling +message CreateRequests { + repeated CreateRequest requests = 1; +} + +// gang scheduling +message CreateResponses { + commonargs.ErrorCode code = 1; + string message = 2; + repeated string instanceIDs = 3; +} + +message InvokeRequest { + string function = 1; + repeated commonargs.Arg args = 2; + string instanceID = 3; + string requestID = 4; + string traceID = 5; + repeated string returnObjectIDs = 6; + string spanID = 7; +} + +message InvokeResponse { + commonargs.ErrorCode code = 1; + string message = 2; + string returnObjectID = 3; +} + +message NotifyRequest { + string requestID = 1; + commonargs.ErrorCode code = 2; + string message = 3; + repeated commonargs.SmallObject smallObjects = 4; +} + +message CallResult { + commonargs.ErrorCode code = 1; + string message = 2; + string instanceID = 3; + string requestID = 4; + repeated commonargs.SmallObject smallObjects = 5; +} + +message CallResultAck { + commonargs.ErrorCode code = 1; + string message = 2; +} + +message TerminateRequest { + string instanceID = 1; +} + +message TerminateResponse { + commonargs.ErrorCode code = 1; + string message = 2; +} + +message ExitRequest { + commonargs.ErrorCode code = 1; + string message = 2; +} + +message ExitResponse { + commonargs.ErrorCode code = 1; + string message = 2; +} + +message StateSaveRequest { + bytes state = 1; +} + +message StateSaveResponse { + commonargs.ErrorCode code = 1; + string message = 2; + string checkpointID = 3; +} + +message StateLoadRequest { + string checkpointID = 1; +} + +message StateLoadResponse { + commonargs.ErrorCode code = 1; + string message = 2; + bytes state = 3; +} + +message KillRequest { + string instanceID = 1; + int32 signal = 2; + bytes payload = 3; +} + +message KillResponse { + commonargs.ErrorCode code = 1; + string message = 2; +} + diff --git a/go/pkg/common/faas_common/protobuf/lease_service.proto b/go/pkg/common/faas_common/protobuf/lease_service.proto new file mode 100644 index 0000000000000000000000000000000000000000..5b612b1a886ece486d3f03ba9f1ee8e846e759e1 --- /dev/null +++ b/go/pkg/common/faas_common/protobuf/lease_service.proto @@ -0,0 +1,31 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +package lease_service; + +import "common_args.proto"; + +option go_package = "faas/pkg/common/faas_common/grpc/pb/lease;lease"; + +message LeaseRequest { + string remoteClientId = 1; +} +message LeaseResponse { + commonargs.ErrorCode code = 1; + string message = 2; +} \ No newline at end of file diff --git a/go/pkg/frontend/types/type.go b/go/pkg/frontend/types/type.go new file mode 100644 index 0000000000000000000000000000000000000000..f0873dde3fac314b414e84cd5794b7c5ca40e14f --- /dev/null +++ b/go/pkg/frontend/types/type.go @@ -0,0 +1,349 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Package types - +package types + +import ( + "encoding/json" + "time" + + "frontend/pkg/common/faas_common/alarm" + "frontend/pkg/common/faas_common/crypto" + "frontend/pkg/common/faas_common/etcd3" + "frontend/pkg/common/faas_common/localauth" + "frontend/pkg/common/faas_common/logger/config" + "frontend/pkg/common/faas_common/redisclient" + "frontend/pkg/common/faas_common/sts/raw" + "frontend/pkg/common/faas_common/tls" + "frontend/pkg/common/faas_common/types" + wisecloudTypes "frontend/pkg/common/faas_common/wisecloudtool/types" +) + +// FunctionRequestInfo function response info +type FunctionRequestInfo struct { + URN string `json:"Frn"` + BusinessID string `json:"BusinessId"` + TenantID string `json:"TenantId"` + Name string `json:"FuncName"` + Version string `json:"FuncVersion"` + TraceID string `json:"TraceId"` + Alias string `json:"Alias"` + AppID string `json:"AppID"` + StateKey string `json:"StateKey"` + NodeLabel string `json:"NodeLabel"` + FutureID string `json:"-"` +} + +// InvokeErrorResponse invoke error response +type InvokeErrorResponse struct { + Code int `json:"code"` + Message string `json:"message"` +} + +// ResourceSpecification contains resource specification of a requested instance +type ResourceSpecification struct { + CPU int64 `json:"cpu"` + Memory int64 `json:"memory"` + CustomResource map[string]int64 `json:"customResource"` +} + +// CallReq is the msg structure sent from the frontend to the executor +type CallReq struct { + Header map[string]string `json:"header"` + Path string `json:"path"` + Method string `json:"method"` + Query string `json:"query"` + Body json.RawMessage `json:"body"` +} + +// CallResp is the msg structure returned by the executor to the frontend +type CallResp struct { + Headers map[string]string `json:"headers"` + BillingDuration string `json:"billingDuration"` + InnerCode string `json:"innerCode"` + InvokeSummary string `json:"invokeSummary"` + LogResult string `json:"logResult"` + UserFuncTime float64 `json:"userFuncTime"` + ExecutorTime float64 `json:"executorTime"` + Body json.RawMessage `json:"body"` +} + +// InitResp - +type InitResp struct { + ErrorCode string `json:"errorCode"` + Message json.RawMessage `json:"message"` +} + +// Config is the config used by faas frontend function +type Config struct { + InstanceNum int `json:"instanceNum"` + CPU float64 `json:"cpu" valid:"optional"` + Memory float64 `json:"memory" valid:"optional"` + SLAQuota int `json:"slaQuota" valid:"optional"` + Runtime RuntimeConfig `json:"runtime" valid:"optional"` + LocalAuth *localauth.AuthConfig `json:"localAuth"` + MetaEtcd etcd3.EtcdConfig `json:"metaEtcd" valid:"required"` + DataSystemEtcd etcd3.EtcdConfig `json:"dataSystemEtcd" valid:"optional"` + CAEMetaEtcd etcd3.EtcdConfig `json:"caeMetaEtcd" valid:"optional"` + RouterEtcd etcd3.EtcdConfig `json:"routerEtcd" valid:"required"` + RedisConfig RedisConfig `json:"redisConfig" valid:"optional"` + HTTPConfig *FrontendHTTP `json:"http" valid:"optional"` + HTTPSConfig *tls.InternalHTTPSConfig `json:"httpsConfig" valid:"optional"` + DataSystemConfig *types.DataSystemConfig `json:"dataSystemConfig" valid:"optional"` + StreamEnable bool `json:"streamEnable" valid:"optional"` + StateDisable bool `json:"stateDisable" valid:"optional"` + BusinessType int `json:"businessType"` + FunctionInvokeBackend int `json:"functionInvokeBackend" valid:"optional"` + SccConfig crypto.SccConfig `json:"sccConfig" valid:"optional"` + Image string `json:"image" valid:"optional"` + SchedulerKeyPrefixType string `json:"schedulerKeyPrefixType" valid:"optional"` + MemoryControlConfig *types.MemoryControlConfig `json:"memoryControlConfig" valid:"optional"` + MemoryEvaluatorConfig *MemoryEvaluatorConfig `json:"memoryEvaluatorConfig" valid:"optional"` + DefaultTenantLimitQuota int `json:"defaultTenantLimitQuota" valid:"optional"` + // frontend pool + DynamicPoolEnable bool `json:"dynamicPoolEnable" valid:"optional"` + // CaaS config + AuthenticationEnable bool `json:"authenticationEnable" valid:"optional"` + RawStsConfig raw.StsConfig `json:"rawStsConfig,omitempty"` + TrafficLimitParams *TrafficLimitParams `json:"trafficLimitParams" valid:"optional"` + NodeSelector map[string]string `json:"nodeSelector,omitempty"` + AzID string `json:"azID" valid:"optional"` + ClusterID string `json:"clusterID" valid:"optional"` + ClusterName string `json:"clusterName" valid:"optional"` + AlarmConfig alarm.Config `json:"alarmConfig" valid:"optional"` + Version string `json:"version" valid:"optional"` + // FunctionGraph config + FunctionNameSeparator string `json:"functionNameSeparator" valid:"optional"` + AlarmServerAddress string `json:"alarmServerAddress" valid:"optional"` + InvokeMaxRetryTimes int `json:"invokeMaxRetryTimes" valid:"optional"` + EtcdLeaseConfig *EtcdLeaseConfig `json:"etcdLeaseConfig" valid:"optional"` + HeartbeatConfig *HeartbeatConfig `json:"heartbeatConfig" valid:"optional"` + E2EMaxDelayTime int64 `json:"e2eMaxDelayTime" valid:"optional"` + RetryConfig *RetryConfig `json:"retry" valid:"optional"` + ShareKeys ShareKeys `json:"shareKeys" valid:"optional"` + Affinity string `json:"affinity"` + RPCClientConcurrentNum int `json:"rpcClientConcurrentNum" valid:"optional"` + NodeAffinity string `json:"nodeAffinity" valid:"optional"` + NodeAffinityPolicy string `json:"nodeAffinityPolicy" valid:"optional"` + AuthConfig AuthConfig `json:"authConfig" valid:"optional"` + WiseCloudConfig WiseCloudConfig `json:"wiseCloudConfig" valid:"optional"` +} + +// WiseCloudConfig - +type WiseCloudConfig struct { + ServiceAccountJwt wisecloudTypes.ServiceAccountJwt `json:"serviceAccountJwt" valid:"optional"` +} + +// RetryConfig define retry config +type RetryConfig struct { + InstanceExceptionRetry bool `json:"instanceExceptionRetry" valid:"optional"` +} + +// RedisConfig redis config +type RedisConfig struct { + ClusterID string `json:"clusterID,omitempty" valid:",optional"` + ServerAddr string `json:"serverAddr,omitempty" valid:",optional"` + ServerMode string `json:"serverMode,omitempty" valid:",optional"` + Password string `json:"password,omitempty" valid:",optional"` + EnableTLS bool `json:"enableTLS,omitempty" valid:",optional"` + TimeoutConf redisclient.TimeoutConf `json:"timeoutConf,omitempty" valid:",optional"` +} + +// MemoryEvaluatorConfig memory evaluator config +type MemoryEvaluatorConfig struct { + RequestMemoryEvaluator float64 `json:"requestMemoryEvaluator" valid:",optional"` +} + +// ShareKeys - +type ShareKeys struct { + AccessKey string `json:"accessKey" valid:"optional"` +} + +// RuntimeConfig config info +type RuntimeConfig struct { + Port string `json:"port" valid:",optional"` + AvailableZoneKey string `json:"azkey,omitempty" valid:",optional"` + + // SDK + LogConfig config.CoreInfo `json:"logConfig" valid:"optional"` + SystemAuthConfig SystemAuthConfig `json:"systemAuthConfig" valid:"optional"` + EnableSigaction bool `json:"enableSigaction" valid:"optional"` +} + +// FrontendHTTP Used to configure the ResponseTimeout +type FrontendHTTP struct { + RespTimeOut int64 `json:"resptimeout" valid:",optional"` + WorkerInstanceReadTimeOut int64 `json:"workerInstanceReadTimeOut" valid:",optional"` + // MaxRequestBodySize unit is M + MaxRequestBodySize int `json:"maxRequestBodySize" valid:"required"` + // MaxStreamRequestBodySize unit is M + MaxStreamRequestBodySize int `json:"maxStreamRequestBodySize" valid:"optional"` + // ServerReadTimeout unit is S + ServerReadTimeout int `json:"serverReadTimeout" valid:"optional"` + // ServerWriteTimeout unit is S + ServerWriteTimeout int `json:"serverWriteTimeout" valid:"optional"` + // ClientIdleTimeout unit is S + ClientIdleTimeout int `json:"clientIdleTimeout" valid:"optional"` + // MaxDataSystemMultiDataBodySize unit is M + MaxDataSystemMultiDataBodySize int `json:"maxDataSystemMultiDataBodySize" valid:"optional"` + ServerListenPort int `json:"serverListenPort" valid:"optional"` + ServerListenIP string `json:"serverListenIP" valid:"optional"` +} + +// TrafficLimitParams parameters of traffic limitation +type TrafficLimitParams struct { + InstanceLimitRate float64 `json:"instanceLimitRate" valid:",optional"` + InstanceBucketSize int `json:"instanceBucketSize" valid:",optional"` + FuncLimitRate float64 `json:"funcLimitRate" valid:",optional"` + FuncBucketSize int `json:"funcBucketSize" valid:",optional"` +} + +// StreamContext - +type StreamContext struct { + StreamName string + TimeoutMs uint32 + ExpectNum int32 +} + +// InvokeProcessContext - +type InvokeProcessContext struct { + // func basic info + TraceID string + RequestID string + FuncKey string + ShouldRetry bool + TrafficLimited bool + StartTime time.Time + RequestTraceInfo *RequestTraceInfo + IsHTTPUploadStream bool + StreamInfo *StreamInvokeInfo + AcquireTimeout int64 + InvokeTimeout int64 + InvokeWithoutScheduler bool + + // request info + ReqHeader map[string]string + ReqPath string + ReqMethod string + ReqQuery string + ReqBody []byte + // response info + StatusCode int + RespHeader map[string]string + RespBody []byte + + // 响应透传 + NeedReadRespHeader bool + + // stream + StreamCtx *StreamContext +} + +// CreateInvokeProcessContext - +func CreateInvokeProcessContext() *InvokeProcessContext { + return &InvokeProcessContext{ + ReqHeader: make(map[string]string), + RespHeader: make(map[string]string), + StartTime: time.Now(), + } +} + +// AuthConfig - +type AuthConfig struct { + LocalAuthConfig LocalAuthConfig `json:"localAuthConfig"` +} + +// PolicyConfig - +type PolicyConfig struct { + Allow string `json:"allow"` + Deny string `json:"deny"` +} + +// LocalAuthConfig - +type LocalAuthConfig struct { + LocalAuthCryptoPath string `json:"localAuthCryptoPath"` +} + +// SystemAuthConfig - +type SystemAuthConfig struct { + Enable bool `json:"enable" validate:"optional"` + AccessKey string `json:"accessKey" validate:"optional"` + SecretKey string `json:"secretKey" validate:"optional"` +} + +// APIGTriggerResponse extern interface of web response +type APIGTriggerResponse struct { + Body string `json:"body"` + Headers map[string][]string `json:"headers"` + StatusCode int `json:"statusCode"` + IsBase64Encoded bool `json:"isBase64Encoded"` +} + +// APIGTriggerEvent extern interface of web request +type APIGTriggerEvent struct { + IsBase64Encoded bool `json:"isBase64Encoded"` + HTTPMethod string `json:"httpMethod"` + Path string `json:"path"` + Body string `json:"body"` + PathParameters map[string]string `json:"pathParameters"` + RequestContext APIGRequestContext `json:"requestContext"` + Headers map[string]interface{} `json:"headers"` + QueryStringParameters map[string]interface{} `json:"queryStringParameters"` + UserData string `json:"user_data"` +} + +// APIGRequestContext - +type APIGRequestContext struct { + APIID string `json:"apiId"` + RequestID string `json:"requestId"` + Stage string `json:"stage"` + SourceIP string `json:"sourceIp"` +} + +// EtcdLeaseConfig etcd lease config +type EtcdLeaseConfig struct { + LeaseTTL int64 `yaml:"leaseTTL" valid:"optional"` + RenewTTL int64 `yaml:"renewTTL" valid:"optional"` +} + +// HeartbeatConfig heartbeat config +type HeartbeatConfig struct { + HeartbeatTimeout int `json:"heartbeatTimeout" valid:",optional"` + HeartbeatInterval int `json:"heartbeatInterval" valid:"optional"` + HeartbeatTimeoutThreshold int `json:"heartbeatTimeoutThreshold" valid:"optional"` +} + +// RequestTraceInfo - +type RequestTraceInfo struct { + URN string + BusinessID string + TenantID string + FuncName string + Version string + AnonymizeURN string + TryCount int + InnerCode int + AllBusCost time.Duration + LastBusCost time.Duration + Deadline time.Time + CallInstance string + CallNode string + TotalCost time.Duration + FrontendCost time.Duration + BusCost time.Duration + WorkerCost time.Duration +} diff --git a/go/pkg/functionscaler/config/config.go b/go/pkg/functionscaler/config/config.go index d967d49fe530e58afe463f557e40dd226e4bcf6d..54429c6287b9721e67fc79c694e60070b74e2743 100644 --- a/go/pkg/functionscaler/config/config.go +++ b/go/pkg/functionscaler/config/config.go @@ -22,6 +22,8 @@ import ( "fmt" "os" + "github.com/asaskevich/govalidator/v11" + "yuanrong.org/kernel/pkg/common/faas_common/alarm" "yuanrong.org/kernel/pkg/common/faas_common/crypto" "yuanrong.org/kernel/pkg/common/faas_common/etcd3" diff --git a/go/pkg/functionscaler/metrics/bucket_collector.go b/go/pkg/functionscaler/metrics/bucket_collector.go new file mode 100644 index 0000000000000000000000000000000000000000..6e0bd8353d9ad6448f3e2b51d222a14583b53071 --- /dev/null +++ b/go/pkg/functionscaler/metrics/bucket_collector.go @@ -0,0 +1,236 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +// Package metrics - +package metrics + +import ( + "math" + "sync" + "time" + + "yuanrong.org/kernel/pkg/common/faas_common/logger/log" + "yuanrong.org/kernel/pkg/common/faas_common/utils" + "yuanrong.org/kernel/pkg/functionscaler/types" +) + +const ( + second2Millisecond = 1000 + defaultBucketBufferSize = 100 + defaultBucketRotatePace = 1 + factor = 0.7 +) + +// Bucket collects metrics over a fixed time window +type Bucket struct { + // number of in-used instance threads + inUseInsThdNum float64 + // max number of in-used instance threads since started + maxInUseInsThdNum float64 + // number of instance thread request + insThdReqNum float64 + prevInsThdReqNumPS float64 + // average process time of all instance threads + avgProcTime float64 + prevAvgProcTime float64 + // number of requests processed by one instance thread per second, this is a average value + insThdProcNumPS float64 + prevInsThdProcNumPS float64 + initialized bool + isFirst bool + recordTime time.Time +} + +// BucketCollector collects metrics into buckets periodically +type BucketCollector struct { + lastValidBucket *Bucket + bucketBuffer []*Bucket + funcKey string + resKey string + rotatePace int + curIndex int + started bool + collected bool + stopCh chan struct{} + sync.RWMutex +} + +// NewBucketMetricsCollector creates a BucketCollector +func NewBucketMetricsCollector(funcKey, resKey string) Collector { + return &BucketCollector{ + funcKey: funcKey, + resKey: resKey, + bucketBuffer: make([]*Bucket, defaultBucketBufferSize, defaultBucketBufferSize), + curIndex: 0, + rotatePace: defaultBucketRotatePace, + started: false, + stopCh: make(chan struct{}), + } +} + +// InvokeMetricsCollected checks if invoke metrics is collected +func (bm *BucketCollector) InvokeMetricsCollected() bool { + bm.RLock() + collected := bm.collected + bm.RUnlock() + return collected +} + +// UpdateInvokeRequests updates invoke request number +func (bm *BucketCollector) UpdateInvokeRequests(insThdReqNum int) { + bm.Lock() + if !bm.started { + bm.started = true + bm.bucketBuffer[bm.curIndex] = &Bucket{isFirst: true, recordTime: time.Now()} + go bm.StartRotateBucket() + } + curBucket := bm.bucketBuffer[bm.curIndex] + curBucket.insThdReqNum += float64(insThdReqNum) + bm.Unlock() +} + +// UpdateInvokeMetrics updates invoke metrics +func (bm *BucketCollector) UpdateInvokeMetrics(insThdMetrics *types.InstanceThreadMetrics) { + bm.Lock() + defer bm.Unlock() + if !bm.started { + bm.started = true + bm.bucketBuffer[bm.curIndex] = &Bucket{isFirst: true, recordTime: time.Now()} + go bm.StartRotateBucket() + } + curBucket := bm.bucketBuffer[bm.curIndex] + // if ProcReqNum == 0 then consider this insThdMetrics is meaningless + if insThdMetrics != nil && insThdMetrics.ProcReqNum != 0 { + if insThdMetrics.AvgProcTime == 0 { + log.GetLogger().Errorf("invalid value in metrics of instance thread %s metrics value %+v", + insThdMetrics.InsThdID, + insThdMetrics) + return + } + curThdProcNumPS := second2Millisecond / float64(insThdMetrics.AvgProcTime) + if !bm.collected { + bm.collected = true + } + if !curBucket.initialized { + curBucket.initialized = true + curBucket.avgProcTime = float64(insThdMetrics.AvgProcTime) + curBucket.insThdProcNumPS = curThdProcNumPS + } else { + // average with AvgProcTime in metrics for one instance thread then average with other instance threads + // to get general avgProcTime + curBucket.avgProcTime = ((curBucket.avgProcTime+float64(insThdMetrics.AvgProcTime))/2 + + curBucket.avgProcTime*(curBucket.maxInUseInsThdNum-1)) / curBucket.maxInUseInsThdNum + // average with curThdPocNumPS calculated from metrics for one instance thread then average with other + // instance threads to get general insThdProcNumPS + curBucket.insThdProcNumPS = ((curBucket.insThdProcNumPS+curThdProcNumPS)/2 + curBucket.insThdProcNumPS* + (curBucket.maxInUseInsThdNum-1)) / curBucket.maxInUseInsThdNum + } + } +} + +// UpdateInsThdMetrics updates instance thread metrics +func (bm *BucketCollector) UpdateInsThdMetrics(inUseInsThdDiff int) { + if inUseInsThdDiff == 0 { + return + } + bm.Lock() + if !bm.started { + bm.started = true + bm.bucketBuffer[bm.curIndex] = &Bucket{recordTime: time.Now()} + go bm.StartRotateBucket() + } + curBucket := bm.bucketBuffer[bm.curIndex] + curBucket.inUseInsThdNum += float64(inUseInsThdDiff) + curBucket.maxInUseInsThdNum = math.Max(curBucket.inUseInsThdNum, curBucket.maxInUseInsThdNum) + bm.Unlock() +} + +// GetCalculatedInvokeMetrics will get three parameters used by autoScaler: +// 1. average process time of instance thread +// 2. number of requests processed by one instance thread per second +// 3. number of instance thread requests per second +func (bm *BucketCollector) GetCalculatedInvokeMetrics() (float64, float64, float64) { + bm.RLock() + curBucket := bm.bucketBuffer[bm.curIndex] + avgProcTime := averageWithPrev(curBucket.avgProcTime, curBucket.prevAvgProcTime) + insThdProcNumPS := averageWithPrev(curBucket.insThdProcNumPS, curBucket.prevInsThdProcNumPS) + // unlike avgProcTime and insThdProcNum, 0 is meaningful for insThdReqNum. calculate the value of insThdReqNumPS + // by the granularity of second + curInsThdReqNumPS := curBucket.insThdReqNum / math.Max(math.Ceil(time.Now().Sub(curBucket.recordTime).Seconds()), 1) + insThdReqNumPS := factor*curInsThdReqNumPS + (1-factor)*curBucket.prevInsThdReqNumPS + bm.RUnlock() + return avgProcTime, insThdProcNumPS, insThdReqNumPS +} + +// Stop will stop metrics collector, may be called multiple times +func (bm *BucketCollector) Stop() { + utils.SafeCloseChannel(bm.stopCh) +} + +// StartRotateBucket rotates buckets by rotatePace +func (bm *BucketCollector) StartRotateBucket() { + ticker := time.NewTicker(time.Duration(bm.rotatePace) * time.Second) + for { + select { + case <-ticker.C: + bm.Lock() + prevBucket := bm.bucketBuffer[bm.curIndex] + bm.curIndex = (bm.curIndex + 1) % len(bm.bucketBuffer) + bm.bucketBuffer[bm.curIndex] = bm.rotateMetricsBucket(prevBucket) + bm.Unlock() + case <-bm.stopCh: + log.GetLogger().Warnf("stop collect metrics of function %s resource %s", bm.funcKey, bm.resKey) + return + } + } +} + +func (bm *BucketCollector) rotateMetricsBucket(prevBucket *Bucket) *Bucket { + prevAvgProcTime := averageWithPrev(prevBucket.avgProcTime, prevBucket.prevAvgProcTime) + prevInsThdProcNumPS := averageWithPrev(prevBucket.insThdProcNumPS, prevBucket.prevInsThdProcNumPS) + // unlike avgProcTime and insThdProcNum, 0 is meaningful for insThdReqNum + var prevInsThdReqNumPS float64 + insThdReqNumPS := prevBucket.insThdReqNum / float64(bm.rotatePace) + // if preBucket is the init bucket then prevInsThdReqNumPS should not be averaged with its prev + if prevBucket.isFirst { + prevInsThdReqNumPS = insThdReqNumPS + } else { + prevInsThdReqNumPS = factor*insThdReqNumPS + (1-factor)*prevBucket.prevInsThdReqNumPS + } + return &Bucket{ + inUseInsThdNum: prevBucket.inUseInsThdNum, + maxInUseInsThdNum: prevBucket.maxInUseInsThdNum, + prevAvgProcTime: prevAvgProcTime, + prevInsThdProcNumPS: prevInsThdProcNumPS, + prevInsThdReqNumPS: prevInsThdReqNumPS, + recordTime: time.Now(), + } +} + +// avoid to average with 0 value for parameters such as avgProcTime and insThdProcNumPS, an assumption is made that +// value and preValue won't both be 0 +func averageWithPrev(value, prevValue float64) float64 { + avgValue := float64(0) + if value != 0 && prevValue != 0 { + avgValue = factor*value + (1-factor)*prevValue + } else if value == 0 { + avgValue = prevValue + } else if prevValue == 0 { + avgValue = value + } + return avgValue +} diff --git a/go/pkg/functionscaler/metrics/collector.go b/go/pkg/functionscaler/metrics/collector.go new file mode 100644 index 0000000000000000000000000000000000000000..10890e2a4b17cd4ed1278179f3ff0aae6a0bb3a3 --- /dev/null +++ b/go/pkg/functionscaler/metrics/collector.go @@ -0,0 +1,33 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +// Package metrics - +package metrics + +import ( + "yuanrong.org/kernel/pkg/functionscaler/types" +) + +// Collector collects metrics data +type Collector interface { + InvokeMetricsCollected() bool + UpdateInvokeRequests(insThdReqNum int) + UpdateInvokeMetrics(insThdMetrics *types.InstanceThreadMetrics) + UpdateInsThdMetrics(inUseInsThdDiff int) + GetCalculatedInvokeMetrics() (float64, float64, float64) + Stop() +} diff --git a/go/pkg/functionscaler/metrics/prometheus_metric.go b/go/pkg/functionscaler/metrics/prometheus_metric.go new file mode 100644 index 0000000000000000000000000000000000000000..49031c8a21ad09027dbdfed2666412fe46995c6f --- /dev/null +++ b/go/pkg/functionscaler/metrics/prometheus_metric.go @@ -0,0 +1,243 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +// Package metrics - +package metrics + +import ( + "context" + "net/http" + "sync" + + "github.com/prometheus/client_golang/prometheus/promhttp" + k8stype "k8s.io/apimachinery/pkg/types" + + "yuanrong.org/kernel/pkg/common/faas_common/logger/log" + "yuanrong.org/kernel/pkg/common/faas_common/tls" + "yuanrong.org/kernel/pkg/common/faas_common/wisecloudtool" + "yuanrong.org/kernel/pkg/functionscaler/config" + "yuanrong.org/kernel/pkg/functionscaler/types" +) + +var ( + pendingRequest = map[*types.InstanceAcquireRequest]*k8stype.NamespacedName{} + pendingRequestMu sync.RWMutex + + metricsReporter = wisecloudtool.NewMetricProvider() +) + +// InitServerMetric start prometheus http server +func InitServerMetric(stopCh <-chan struct{}) { + if config.GlobalConfig.Scenario != types.ScenarioWiseCloud { + return + } + // start metric server + serveMetric(stopCh, config.GlobalConfig.MetricsAddr) +} + +func serveMetric(stopCh <-chan struct{}, metricAddr string) { + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.Handler()) + // 启动 HTTP 服务器 + server := &http.Server{Addr: metricAddr, Handler: mux} + go func() { + if err := startServer(server); err != nil && err != http.ErrServerClosed { + log.GetLogger().Errorf("Metrics HTTP server failed: %v", err) + } + }() + + // 等待停止信号 + <-stopCh + log.GetLogger().Infof("Shutting down Metrics server...") + if err := server.Shutdown(context.Background()); err != nil { + log.GetLogger().Errorf("Server shutdown failed: %v", err) + } else { + log.GetLogger().Infof("Server exited cleanly") + } +} + +func startServer(httpServer *http.Server) error { + if !config.GlobalConfig.MetricsHTTPSEnable || config.GlobalConfig.HTTPSConfig == nil { + err := httpServer.ListenAndServe() + if err != nil { + log.GetLogger().Errorf("failed to http ListenAndServe: %s", err.Error()) + } + return err + } + err := tls.InitTLSConfig(*config.GlobalConfig.HTTPSConfig) + if err != nil { + log.GetLogger().Errorf("failed to init the HTTPS config: %s", err.Error()) + return err + } + httpServer.TLSConfig = tls.GetClientTLSConfig() + err = httpServer.ListenAndServeTLS("", "") + if err != nil { + log.GetLogger().Errorf("failed to HTTPListenAndServeTLS: %s", err.Error()) + return err + } + return nil +} + +// EnsureConcurrencyGaugeWithLabel clear yuanrong_concurrency_num +func EnsureConcurrencyGaugeWithLabel(funcKey string, invokeLabel string, labels []string) { + if config.GlobalConfig.Scenario != types.ScenarioWiseCloud { + return + } + if len(labels) != 8 { // 祥云调用label标准长度 + log.GetLogger().Warnf("create yuanrong_concurrency_num gauge with label failed, labels len is %d", len(labels)) + return + } + + metricsReporter.AddWorkLoad(funcKey, invokeLabel, &k8stype.NamespacedName{ + Namespace: labels[5], // namespace index + Name: labels[6], // name index + }) + if err := metricsReporter.EnsureConcurrencyGaugeWithLabel(labels); err != nil { + log.GetLogger().Warnf("create yuanrong_concurrency_num gauge with label failed, err is %s", err.Error()) + } +} + +// EnsureLeaseRequestTotal clear yuanrong_lease_total +func EnsureLeaseRequestTotal(labels []string) { + if config.GlobalConfig.Scenario != types.ScenarioWiseCloud { + return + } + if err := metricsReporter.EnsureLeaseRequestTotalWithLabel(labels); err != nil { + log.GetLogger().Warnf("create yuanrong_lease_total counter with label failed, err is %s", err.Error()) + } +} + +// OnAcquireLease inc metric when lease acquired +func OnAcquireLease(insAcqReq *types.InstanceAllocation) { + if config.GlobalConfig.Scenario != types.ScenarioWiseCloud { + return + } + if insAcqReq == nil || insAcqReq.Instance == nil { + log.GetLogger().Warnf("inc metric for lease with label failed, instance info is empty") + return + } + if err := metricsReporter.IncConcurrencyGaugeWithLabel(insAcqReq.Instance.MetricLabelValues); err != nil { + log.GetLogger().Warnf("inc lease gauge with label failed, err is %s", err.Error()) + } + if err := metricsReporter.IncLeaseRequestTotalWithLabel(insAcqReq.Instance.MetricLabelValues); err != nil { + log.GetLogger().Warnf("inc concurrency gauge with label failed, err is %s", err.Error()) + } +} + +// OnReleaseLease dec metric when lease released +func OnReleaseLease(insAcqReq *types.InstanceAllocation) { + if config.GlobalConfig.Scenario != types.ScenarioWiseCloud { + return + } + if insAcqReq == nil || insAcqReq.Instance == nil { + log.GetLogger().Warnf("dec metric for lease with label failed, instance info is empty") + return + } + if err := metricsReporter.DecConcurrencyGaugeWithLabel(insAcqReq.Instance.MetricLabelValues); err != nil { + log.GetLogger().Warnf("dec concurrency gauge with label failed, err is %s", err.Error()) + } +} + +func OnPendingRequestAdd(insAcqReq *types.InstanceAcquireRequest) { + if config.GlobalConfig.Scenario != types.ScenarioWiseCloud { + return + } + if insAcqReq == nil || insAcqReq.FuncSpec == nil || insAcqReq.ResSpec == nil { + log.GetLogger().Errorf("insAcqReq invalid, skip") + return + } + + deployment := metricsReporter.GetRandomDeployment(insAcqReq.FuncSpec.FuncKey, insAcqReq.ResSpec.InvokeLabel) + if deployment == nil { + log.GetLogger().Infof("cold starting, no need inc metrics for %s", insAcqReq.FuncSpec.FuncKey) + return + } + + pendingRequestMu.Lock() + pendingRequest[insAcqReq] = deployment + pendingRequestMu.Unlock() + invokeLabel := insAcqReq.ResSpec.InvokeLabel + + err := metricsReporter.IncConcurrencyGaugeWithLabel(wisecloudtool.GetMetricLabels(&insAcqReq.FuncSpec.FuncMetaData, + invokeLabel, deployment.Namespace, deployment.Name, "pendingRequest")) + if err != nil { + log.GetLogger().Warnf("inc concurrency gauge with label failed, err is %s", err.Error()) + } +} + +func OnPendingRequestRelease(insAcqReq *types.InstanceAcquireRequest) { + if config.GlobalConfig.Scenario != types.ScenarioWiseCloud { + return + } + if insAcqReq == nil || insAcqReq.FuncSpec == nil || insAcqReq.ResSpec == nil { + log.GetLogger().Errorf("insAcqReq invalid, skip") + return + } + pendingRequestMu.Lock() + deployment, ok := pendingRequest[insAcqReq] + delete(pendingRequest, insAcqReq) + pendingRequestMu.Unlock() + if !ok { + log.GetLogger().Warnf("pending request not store, %s, %s", + insAcqReq.FuncSpec.FuncKey, insAcqReq.ResSpec.InvokeLabel) + return + } + + if !metricsReporter.Exist(insAcqReq.FuncSpec.FuncKey, insAcqReq.ResSpec.InvokeLabel) { + log.GetLogger().Infof("function instance config has been clean, skip, %s, %s", + insAcqReq.FuncSpec.FuncKey, insAcqReq.ResSpec.InvokeLabel) + return + } + invokeLabel := insAcqReq.ResSpec.InvokeLabel + labels := wisecloudtool.GetMetricLabels(&insAcqReq.FuncSpec.FuncMetaData, invokeLabel, deployment.Namespace, + deployment.Name, "pendingRequest") + if err := metricsReporter.DecConcurrencyGaugeWithLabel(labels); err != nil { + log.GetLogger().Warnf("dec concurrency gauge with label failed, err is %s", err.Error()) + } +} + +// ClearConcurrencyGaugeWithLabel clear yuanrong_concurrency_num +func ClearConcurrencyGaugeWithLabel(labels []string) { + if config.GlobalConfig.Scenario != types.ScenarioWiseCloud { + return + } + metricsReporter.ClearConcurrencyGaugeWithLabel(labels) +} + +// ClearLeaseRequestTotal clear yuanrong_lease_total +func ClearLeaseRequestTotal(labels []string) { + if config.GlobalConfig.Scenario != types.ScenarioWiseCloud { + return + } + metricsReporter.ClearLeaseRequestTotalWithLabel(labels) +} + +// ClearMetricsForFunction Clear deployments for function version. +func ClearMetricsForFunction(funcSpec *types.FunctionSpecification) { + if config.GlobalConfig.Scenario != types.ScenarioWiseCloud { + return + } + metricsReporter.ClearMetricsForFunction(&funcSpec.FuncMetaData) +} + +// ClearMetricsForFunctionInsConfig Clear deployments for function version label +func ClearMetricsForFunctionInsConfig(funcSpec *types.FunctionSpecification, invokeLabel string) { + if config.GlobalConfig.Scenario != types.ScenarioWiseCloud { + return + } + metricsReporter.ClearMetricsForInsConfig(&funcSpec.FuncMetaData, invokeLabel) +} diff --git a/scripts/package_yuanrong.sh b/scripts/package_yuanrong.sh index 44c2b6faabda7599f55bef76c91bef0dc59a5e05..fe76ccc7a364a7dfa3d3326de06168b6f740c151 100644 --- a/scripts/package_yuanrong.sh +++ b/scripts/package_yuanrong.sh @@ -104,6 +104,12 @@ if [ -n "${frontend_filename}" ]; then cp -fr ${OUTPUT_DIR}/openyuanrong/pattern/pattern_faas/init_frontend_args.json ${OUTPUT_DIR}/openyuanrong/function_system/config/ fi +faas_filename=$(ls *faas*.tar.gz) +if [ -n "${faas_filename}" ]; then + tar -zxvf ${faas_filename} -C ${OUTPUT_DIR}/openyuanrong + cp -fr ${OUTPUT_DIR}/openyuanrong/pattern/pattern_faas/init_scheduler_args.json ${OUTPUT_DIR}/openyuanrong/function_system/config/ +fi + dashboard_filename=$(ls *dashboard*.tar.gz) if [ -n "${dashboard_filename}" ]; then tar -zxvf ${dashboard_filename} -C ${OUTPUT_DIR}/openyuanrong/function_system/ diff --git a/tools/download_dependency.sh b/tools/download_dependency.sh index f44c1e4d3260e8640690c7dc08cf4b77551b1a5e..582c3c1bc7cb8f3b0d3884b65b4f817f44cacb33 100644 --- a/tools/download_dependency.sh +++ b/tools/download_dependency.sh @@ -112,7 +112,8 @@ function compile_functionsystem() { return fi cd ${YR_FUNCTIONSYSTEM_BIN_DIR} - bash build.sh + bash run.sh build + bash run.sh pack cd output tar -xf ${YR_FUNCTIONSYSTEM_BIN_DIR}/output/yr-functionsystem*.tar.gz cp -r ${YR_FUNCTIONSYSTEM_BIN_DIR}/output/function_system/metrics ${RUNTIME_SRC_DIR}/