代码拉取完成,页面将自动刷新
#!/bin/bash
#修改all_config.yaml中的ssh配置
CONFIG_FILE="./src/all_config.yaml"
source ./src/read_config.sh
log_dir=${LOGSDIR:-"./deepseek_logs"}
if [ ! -d "$log_dir" ]; then
mkdir -p "$log_dir"
fi
log_file="$log_dir/ssh_mindie.log"
user=$USER
image=$IMAGE
mount_dir=$MOUNT_DIR
container_dir=$CONTAINER_DIR
container_name=$CONTAINER_NAME
mindie_ascend_deploy_dir=$MINDIE_ASCEND_DEPLOY_DIR
delete_container=$DELETE_CONTAINER
other_commands=$OTHER_COMMANDS
check_ssh_key() {
local ip=$1
ssh -o BatchMode=yes -o ConnectTimeout=5 $ip echo "SSH_OK" &> /dev/null
if [ $? -ne 0 ]; then
echo "Error: SSH key not set up for $ip. please use ssh_rank_table.py or manually configure the ip."
exit 1
fi
}
execute_container_commands() {
local IP=$1
local MIES_CONTAINER_IP=$2
local USE_SSH=false
if [ "$IP" == "$(echo "$IPS" | head -n 1 | awk '{print $1}')" ]; then
echo "Processing master node at IP: $IP with container IP: $MIES_CONTAINER_IP"
CONTAINER_EXISTS=$(docker ps -aqf name=$container_name)
else
echo "Processing node at IP: $IP with container IP: $MIES_CONTAINER_IP"
check_ssh_key $IP
USE_SSH=true
CONTAINER_EXISTS=$(ssh $user@$IP "docker ps -aqf name=$container_name")
fi
if [ ! -z "$CONTAINER_EXISTS" ] && [ "$delete_container" = true ]; then
echo "Container $CONTAINER_EXISTS already exists. Deleting it..."
if $USE_SSH; then
ssh $user@$IP "docker rm -f $container_name"
else
docker rm -f $container_name
fi
CONTAINER_EXISTS=""
fi
if [ -z "$CONTAINER_EXISTS" ]; then
echo "Container does not exist. Creating a new one..."
docker_command="docker run -itd -u 0 -e MIES_CONTAINER_IP=$MIES_CONTAINER_IP --ipc=host --network host \
--name $container_name \
--privileged \
--device=/dev/davinci_manager \
--device=/dev/devmm_svm \
--device=/dev/hisi_hdc \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro \
-v /usr/local/sbin:/usr/local/sbin:ro \
-v $mount_dir:$container_dir \
$image \
/bin/bash"
if $USE_SSH; then
CONTAINER_ID=$(ssh $user@$IP "$docker_command")
if ! ssh $user@$IP "ping -c 1 www.baidu.com" &> /dev/null; then
NoInt='Internet connection failed remotely. Please check your network.'
else
NoInt=''
fi
else
CONTAINER_ID=$(eval "$docker_command")
if ! ping -c 1 www.baidu.com &> /dev/null; then
NoInt='Internet connection failed locally. Please check your network.'
else
NoInt=''
fi
fi
if [ -n "$NoInt" ]; then
echo "$NoInt"
exec_in_container "cd $container_dir/$mindie_ascend_deploy_dir && if ! command -v jq &> /dev/null; then cp ./jq-linux-arm64 /usr/bin/jq && chmod +x /usr/bin/jq; fi"
else
exec_in_container "if command -v yum &> /dev/null; then yum install -y jq; elif command -v apt-get &> /dev/null; then apt-get update && apt-get install -y jq; fi"
fi
exec_in_container "cd $container_dir/$mindie_ascend_deploy_dir && pip install -r ./src/requirements.txt"
if [ ! -z "$other_commands" ]; then exec_in_container "$other_commands"; fi
else
CONTAINER_ID=$CONTAINER_EXISTS
fi
exec_in_container "cd $container_dir/$mindie_ascend_deploy_dir && bash start_mindie.sh >> $log_file 2>&1 &"
}
exec_in_container() {
local command=$1
if $USE_SSH; then
ssh $user@$IP "docker exec $CONTAINER_ID bash -c '$command'"
else
docker exec $CONTAINER_ID bash -c "$command"
fi
}
while read -r IP MIES_CONTAINER_IP; do
execute_container_commands "$IP" "$MIES_CONTAINER_IP"
done <<< "$IPS"
echo "查看日志: $log_file"
tail -f $log_file
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。