diff --git a/.gitee-ci.yml b/.gitee-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..4f5f3e635723e2e63560a37c2c9dbf134b19f7ef --- /dev/null +++ b/.gitee-ci.yml @@ -0,0 +1,36 @@ +stages: + - build + - package + +variables: + CARGO_TERM_COLOR: always + +build-job: + stage: build + image: registry.cn-hangzhou.aliyuncs.com/gitee-go/fedora:39 + script: + - dnf install -y cargo rust clang llvm gcc make pkg-config openssl-devel rpm-build + - rustup default stable || true # 如果镜像内已带 Rust,可忽略 + - cargo fmt --all -- --check + - cargo check --workspace + - cargo test --workspace -- --skip ebpf # 可按需调整/筛选 + artifacts: + expire_in: 7 days + paths: + - target/debug + - target/release + +package-job: + stage: package + image: registry.cn-hangzhou.aliyuncs.com/gitee-go/fedora:39 + needs: [build-job] + script: + - dnf install -y cargo rust clang llvm gcc make rpm-build + - bash scripts/build-rpm.sh + - mkdir -p ci_artifacts + - cp -r $HOME/rpmbuild/RPMS ci_artifacts/RPMS + - cp -r $HOME/rpmbuild/SRPMS ci_artifacts/SRPMS + artifacts: + expire_in: 30 days + paths: + - ci_artifacts \ No newline at end of file diff --git a/.gitignore b/.gitignore index c7b955639c9978e32b3078072a2f1ddf4333aa06..7273815fa4f828ccae7affce2789a64608a2ddfe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ /target .vscode/ /healer/target +/simple_test/target +/healer-demo +/healer-demo/* +quickstart-config.yaml \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 869f2181afc323860c1c2d10833bb2730e3e3d3a..1426ec65722a634cba65c84fa7ca5af67745ed55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1893,6 +1893,10 @@ dependencies = [ "libc", ] +[[package]] +name = "simple_test_process" +version = "0.1.0" + [[package]] name = "slab" version = "0.4.10" diff --git a/Cargo.toml b/Cargo.toml index 2adbd8096c112edc6d8bbfbb16a60a2357079fa5..c530612f69f9d5f1af70eee2e5be3c7e58014ded 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ members = [ "healer", "healer-ebpf", "healer-common", + "simple_test_process", ] resolver = "2" default-members = ["healer", "healer-common"] diff --git a/README.md b/README.md index 5a74f5b1b14460958efbaf3a21aff83f077d748e..d2ea4e2b2cfdc3d51d36197643087706b1090732 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,71 @@ ## 介绍 A high-performance daemon leveraging eBPF for reliable, low-overhead monitoring and automatic recovery of critical processes to ensure service continuity. +## 快速上手指南 + +下面的流程使用仓库内的示例进程 `simple_test_process`,演示 Healer 如何监控并自动拉起目标进程。 + +### 前置条件 +- Linux x86_64(Fedora/CentOS 等 systemd 发行版均可) +- Rust stable toolchain(用于编译 Healer 与示例进程) + +### 步骤 + +1. 获取代码并编译所需二进制: + ```bash + git clone https://gitee.com/你/仓库地址.git + cd healer-process + cargo build -p healer -p simple_test_process + ``` + +2. 使用仓库自带脚本生成示例所需目录(默认创建在项目根目录的 `healer-demo/` 下): + ```bash + chmod +x simple_test_process/init.sh + ./simple_test_process/init.sh + ``` + +3. 编写快速体验配置 `quickstart-config.yaml`: + ```bash + cat < quickstart-config.yaml + log_level: "info" + log_directory: "$(pwd)/healer-demo/logs" + pid_file_directory: "$(pwd)/healer-demo/run" + working_directory: "$(pwd)" + processes: + - name: "demo_counter" + enabled: true + command: "$(pwd)/target/debug/simple_test_process" + args: [] + run_as_root: false + monitor: + type: "pid" + pid_file_path: "$(pwd)/healer-demo/run/simple_counter.pid" + interval_secs: 3 + recovery: + type: "regular" + retries: 3 + retry_window_secs: 60 + cooldown_secs: 180 + EOF + ``` + +4. 在终端 A 启动示例进程(它会写入 PID 文件并持续输出心跳): + ```bash + target/debug/simple_test_process + ``` + +5. 在终端 B 前台启动 Healer 并加载刚才的配置: + ```bash + HEALER_NO_DAEMON=1 HEALER_CONFIG=$(pwd)/quickstart-config.yaml RUST_LOG=info target/debug/healer + ``` + +6. 体验自动恢复:在终端 A 或新的终端里执行并观察 Healer 日志出现重启信息(PID 会变化): + ```bash + pkill -x simple_test_process + ``` + + Healer 会捕获退出事件并重新拉起示例进程。按 `Ctrl+C` 可结束 Healer,结束后记得清理临时文件/目录。 + ## 安装与编译 ### 从源码编译 diff --git a/simple_test_process/.gitignore b/simple_test_process/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ea8c4bf7f35f6f77f75d92ad8ce8349f6e81ddba --- /dev/null +++ b/simple_test_process/.gitignore @@ -0,0 +1 @@ +/target diff --git a/simple_test_process/Cargo.lock b/simple_test_process/Cargo.lock new file mode 100644 index 0000000000000000000000000000000000000000..9b619f1905eb017ad903cb5f1d437c275dbe777c --- /dev/null +++ b/simple_test_process/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "simple_test_process" +version = "0.1.0" diff --git a/simple_test_process/Cargo.toml b/simple_test_process/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..c1b3b5c3c39e506866553abcd1dc171a2ca660a3 --- /dev/null +++ b/simple_test_process/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "simple_test_process" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/simple_test_process/init.sh b/simple_test_process/init.sh new file mode 100755 index 0000000000000000000000000000000000000000..4cb8108a6397f6e514764da6a4cd57bf5ab5e3e7 --- /dev/null +++ b/simple_test_process/init.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -e + +ROOT_DIR=$(cd "$(dirname "$0")/.." && pwd) +HEALER_ROOT="${ROOT_DIR}/healer-demo" +RUN_DIR="${HEALER_ROOT}/run" +LOG_DIR="${HEALER_ROOT}/logs" + +echo "正在为您准备 healer 快速上手目录:" +echo " ROOT: ${HEALER_ROOT}" +echo " RUN : ${RUN_DIR}" +echo " LOG : ${LOG_DIR}" +echo "------------------------------------------" + +mkdir -p "${RUN_DIR}" "${LOG_DIR}" + +echo "目录已创建:" +ls -ld "${HEALER_ROOT}" "${RUN_DIR}" "${LOG_DIR}" diff --git a/simple_test_process/src/main.rs b/simple_test_process/src/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..a3d7f28e7ddb2f97e1c9853138ddaa6db1d02bd9 --- /dev/null +++ b/simple_test_process/src/main.rs @@ -0,0 +1,66 @@ +// src/main.rs + +use std::env; +use std::fs; +use std::io::{self, Write}; +use std::path::PathBuf; +use std::process; +use std::thread; +use std::time::Duration; + +fn resolve_pid_file() -> PathBuf { + let base = env::var("HEALER_DEMO_BASE").ok().map(PathBuf::from).unwrap_or_else(|| { + std::env::current_dir() + .map(|p| p.join("healer-demo")) + .expect("无法获取当前目录") + }); + base.join("run/simple_counter.pid") +} + +fn main() { + // 1. 获取并打印自己的进程ID (PID) + // 这是最重要的信息,你的 healer 程序需要监控这个 PID + let pid_file_path = resolve_pid_file(); + let my_pid = process::id(); + println!("测试进程已启动!PID: {}", my_pid); + if let Some(parent_dir) = pid_file_path.parent() { + // create_dir_all 会创建所有不存在的父目录,非常方便。 + if let Err(e) = fs::create_dir_all(parent_dir) { + eprintln!( + "错误:无法创建 PID 文件所在的目录 {}: {}", + parent_dir.display(), + e + ); + process::exit(1); // 如果目录都创建不了,直接退出 + } + } + println!("你可以随时在另一个终端中使用 'kill {}' 或 'kill -9 {}' 来终止我。", my_pid, my_pid); + match fs::write(&pid_file_path, my_pid.to_string()) { + Ok(_) => println!("成功写入 PID 文件到: {}", pid_file_path.display()), + Err(e) => { + eprintln!( + "错误:无法写入 PID 文件 {}: {}", + pid_file_path.display(), + e + ); + process::exit(1); + } + } + // 2. 启动一个无限循环,模拟一个正在持续工作的进程 + let mut counter = 0; + loop { + // 3. 周期性地打印存活信息,让我们知道它还在运行 + // 我们使用 `\r` 让光标回到行首,可以实现单行刷新的效果,避免刷屏 + print!("\r[PID: {}] 我还活着... 计数器: {}", my_pid, counter); + + // 刷新标准输出,确保信息能立刻显示在终端上 + io::stdout().flush().unwrap(); + + // 4. 让当前线程休眠2秒钟 + // 这可以防止这个循环把你的CPU吃到100% + thread::sleep(Duration::from_secs(2)); + + // 5. 更新计数器 + counter += 1; + } +} \ No newline at end of file