From 1ce4f1f1fcb2ae69f441c6a473cf1243eb05d35e Mon Sep 17 00:00:00 2001 From: "chengshuyi.csy" Date: Mon, 14 Feb 2022 10:39:00 +0800 Subject: [PATCH 01/11] rtrace-rs: global struct FunctionContainer and add new probe way. Since rtrace-drop need to construct Vec from str, let struct FunctionContainer can be accessed publicly. Rtrace add new probe way which probe by Vec as parameters. Signed-off-by: chengshuyi.csy --- .../net_diag/rtrace/rtrace-rs/src/rtrace.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/source/tools/detect/net_diag/rtrace/rtrace-rs/src/rtrace.rs b/source/tools/detect/net_diag/rtrace/rtrace-rs/src/rtrace.rs index 02def72c..efa0d952 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-rs/src/rtrace.rs +++ b/source/tools/detect/net_diag/rtrace/rtrace-rs/src/rtrace.rs @@ -47,8 +47,17 @@ pub struct Function { } // see: https://github.com/alexcrichton/toml-rs/issues/395 #[derive(Clone, Debug, Deserialize, Serialize)] -struct FunctionContainer { - function: Vec, +pub struct FunctionContainer { + pub function: Vec, +} + +impl FunctionContainer { + pub fn from_str(s: &str) -> Result { + match toml::from_str(s) { + Ok(x) => Ok(x), + Err(y) => Err(anyhow!("str to FunctionContainer failed: {}", y)), + } + } } impl Function { @@ -301,6 +310,11 @@ impl Rtrace { Ok(()) } + pub fn probe_functions_from_functions(&mut self, functions: &Vec) -> Result<()> { + self.__probe_functions(functions)?; + Ok(()) + } + pub fn get_probe(&self, func: &String) -> Option<&Probe> { if self.probes.contains_key(func) { return Some(&self.probes[func]); -- Gitee From 7d2b91b38c0e1ac356aaf03a5a63e8cb20536e88 Mon Sep 17 00:00:00 2001 From: "chengshuyi.csy" Date: Wed, 16 Feb 2022 14:48:30 +0800 Subject: [PATCH 02/11] rtrace-drop: first to commit rtrace packet drop diagnosing code. Add rtrace packet drop traceability diagnostic code. Signed-off-by: chengshuyi.csy --- .../net_diag/rtrace/rtrace-drop/Cargo.toml | 19 + .../net_diag/rtrace/rtrace-drop/README.md | 186 +++++++++- .../net_diag/rtrace/rtrace-drop/src/base.rs | 77 ++++ .../net_diag/rtrace/rtrace-drop/src/l1/irq.rs | 0 .../net_diag/rtrace/rtrace-drop/src/l1/mod.rs | 28 ++ .../net_diag/rtrace/rtrace-drop/src/l2/mod.rs | 28 ++ .../rtrace/rtrace-drop/src/l3/conntrack.rs | 64 ++++ .../net_diag/rtrace/rtrace-drop/src/l3/fib.rs | 36 ++ .../rtrace/rtrace-drop/src/l3/iptables.rs | 101 ++++++ .../net_diag/rtrace/rtrace-drop/src/l3/mod.rs | 34 ++ .../net_diag/rtrace/rtrace-drop/src/l4/mod.rs | 31 ++ .../net_diag/rtrace/rtrace-drop/src/l4/tcp.rs | 177 +++++++++ .../net_diag/rtrace/rtrace-drop/src/l4/udp.rs | 24 ++ .../net_diag/rtrace/rtrace-drop/src/main.rs | 340 ++++++++++++++++++ 14 files changed, 1144 insertions(+), 1 deletion(-) create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/Cargo.toml create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/base.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/irq.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/mod.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/l2/mod.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/conntrack.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/fib.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/iptables.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/mod.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/mod.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/tcp.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/udp.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/main.rs diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/Cargo.toml b/source/tools/detect/net_diag/rtrace/rtrace-drop/Cargo.toml new file mode 100644 index 00000000..51e3acc7 --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "rtrace-drop" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0" +structopt = "0.3" +rtrace-rs = { version = "0.1.0", path = "../rtrace-rs"} +rtrace-parser = { version = "0.1.0", path = "../rtrace-parser"} +crossbeam-channel = "0.5" +once_cell = "1.8.0" +sysctl = "0.4.3" +log = "0.4.14" +uname = "0.1.1" +cached = "0.23.0" +dyn-clone = "1.0.4" \ No newline at end of file diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/README.md b/source/tools/detect/net_diag/rtrace/rtrace-drop/README.md index b9268f7f..af8c18bc 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-drop/README.md +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/README.md @@ -1,3 +1,187 @@ # rtrace-drop -rtrace-parser is a network packet loss tracing and diagnosis tool. \ No newline at end of file +rtrace-drop是基于rtrace的网络丢包溯源诊断,能够有效地且精准地定位到网络丢包,并提供足量的数据信息。 + +rtrace-drop提供了模块化检测。 + + +## 使用说明 + +```shell +rtrace_drop 0.1.0 +Network packet drop traceability diagnosis + +USAGE: + rtrace-drop [OPTIONS] + +FLAGS: + -h, --help Prints help information + -V, --version Prints version information + +OPTIONS: + --config configuration file path + -e, --exclude ... Exclude packet loss points + --gen generate default configuration file + -i, --include ... Included drop points + -l, --list ... show all packet loss points +``` + +### 参数说明 + +* `-e, --exclude`: 表示不诊断的丢包点 +* `-i, --include`: 表示诊断的丢包点 +* `-l, --list`: 查看当前支持诊断的丢包点 +* `--config`: 配置文件 +* `--gen`: 生成默认的配置文件 + +注: `-e`的优先级高于`-i` + +### 使用样例 + + + +## 覆盖场景及检测原理 + +rtrace-drop将丢包点按照网络协议栈分成四个层次,分别是: +* l4层, 即传输层, 实现udp、tcp等网络丢包点的监控 +* l3层, 即网络层, 实现ip网络丢包点的监控 +* l2层, 即数据链路层, 实现中断等丢包点的监控 +* l1层, 即物理层, 实现硬件丢包点的监控 + +除了分成四个层次外,每个层次还可嵌套子模块。如l4层,可新增udp或tcp丢包点监控模块。 + +### l4层 + +l4层包含tcp和udp两大模块。 + +#### tcp + +tcp模块监控的丢包点有: + +* tcp_conn_request: syn或accept队列满丢包 +* tcp_v4_syn_recv_sock: accept队列满丢包 +* tcp_add_backlog: backlog队列满丢包 + +1. tcp_conn_request + +支持检测半连接队列满或全连接队列满导致的丢包, 并给出队列长度信息。 + +* 判断syn队列是否满的条件: `((struct inet_connection_sock *)sk).icsk_accept_queue.qlen.counter > sk.sk_max_ack_backlog` + +* 判断accept队列是否满的条件: `sk.sk_ack_backlog > sk.sk_max_ack_backlog` + +```toml +[[function]] +name = "tcp_conn_request" +params = ["basic"] +exprs = ["sk.sk_ack_backlog", "sk.sk_max_ack_backlog", "((struct inet_connection_sock *)sk).icsk_accept_queue.qlen.counter"] +``` + +2. tcp_v4_syn_recv_sock + +支持检测accept队列满导致的丢包,并给出队列长度信息。 + +* 判断accept队列是否满:`sk.sk_ack_backlog > sk.sk_max_ack_backlog` + +```toml +[[function]] +name = "tcp_v4_syn_recv_sock" +params = ["basic"] +exprs = ["sk.sk_ack_backlog", "sk.sk_max_ack_backlog"] +``` + +3. tcp_add_backlog + +支持检测backlog队列满导致的丢包,并给出当前rcvbuf和sndbuf信息。 + +* 判断backlog队列满:sk->sk_backlog.len + sk.sk_backlog.rmem_alloc > sk.sk_rcvbuf + sk.sk_sndbuf + HEADROOM + +```toml +[[function]] +name = "tcp_add_backlog" +params = ["basic"] +expr = ["sk.sk_rcvbuf", "sk.sk_sndbuf", "sk.sk_backlog.len", "sk.sk_backlog.rmem_alloc"] +``` + +4. tcp_rcv_established + +支持检测csum错误导致的丢包。 + +* 通过获取__skb_checksum_complete的返回值, 丢包条件是:`ret != 0` + +```toml +[[function]] +name = "tcp_rcv_established" +params = ["basic"] + +[[function]] +name = "__skb_checksum_complete" +params = ["basic", "kretprobe"] +``` + +#### UDP + + + +### l3层 + + +目前支持iptables、conntrack和FIB模块的丢包检查。 + +#### iptables + +iptables模块丢包检查,目前支持: + +* ipt_do_table: 跟踪iptable规则导致的丢包 + + +1. ipt_do_table + +* `kretprobe = NF_DROP`时,表示iptables某条drop规则丢弃了改包 +* 额外数据信息: 表信息、链信息; + +```toml +[[function]] +name = "ipt_do_table" +skb = 1 +params = ["basic", "kretprobe"] +exprs = ["state.net.ipv4.iptable_filter", "state.net.ipv4.iptable_mangle", "state.net.ipv4.iptable_raw", "state.net.ipv4.arptable_filter", "state.net.ipv4.iptable_security", "state.net.ipv4.nat_table", "table", "state.hook"] +``` + +#### conntrack + +conntrack模块丢包检查, 目前支持: + +* ipv4_conntrack_in +* ipv4_conntrack_local +* ipv4_helper +* ipv4_confirm + +#### FIB(Forwarding Infomation Base) + +FIB模块丢包检查,目前支持: + +* fib_validate_source + +1. fib_validate_source: 当`kretprobe < 0`时,表示丢包: + +* `kretprobe = -18`时,表示rp_filter过滤丢包 + + +```toml +[[fib_validate_source]] +name = "fib_validate_source" +params = ["basic", "kretprobe"] +``` + +### 其它模块 + +主要是系统环境参数配置方面,具体检测参数如下: + +* tcp_tw_recycle: 回收TIME-WAIT状态的socket。在nat场景下, 一般建议关闭。注:4.12版本后该参数已经被移除 + + + + + + diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/base.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/base.rs new file mode 100644 index 00000000..11e64b2d --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/base.rs @@ -0,0 +1,77 @@ +use anyhow::Result; +use cached::proc_macro::cached; +use dyn_clone::{clone_trait_object, DynClone}; +use rtrace_parser::func::Func; +use std::boxed::Box; +use uname::uname; + +/// +pub enum RtraceDropAction { + Continue, + Consume(String), +} + +/// This trait can be used to define a packet drop point. +/// For example, TcpConnRequest implements RtraceDrop with +/// the packet drop point in tcp_conn_request. It is also +/// possible to define a module, such as the tcp module contains +/// four RtraceDrop instances. +pub trait RtraceDrop: DynClone { + /// do some initialization. + fn init(&mut self) -> Result<()> { + Ok(()) + } + /// Data analysis to determine whether the packet drop condition + /// is established. + fn check_func(&mut self, _: &Func, _: &Vec) -> RtraceDropAction { + RtraceDropAction::Continue + } + /// Returns the tracing toml configuration string. + fn get_probe_string(&self) -> &str { + "" + } + /// Returns the name of the packet drop point. + fn get_name(&self) -> &str; + /// Returns "Support" or "Not Support". + fn get_status(&self) -> &str { + "[Not Support]" + } + /// Get the child RtraceDrop instance contained in the module. + fn get_subpoints(&self) -> Option<&Vec>> { + None + } +} + +clone_trait_object!(RtraceDrop); + +#[cached(size = 1)] +pub fn get_current_kernel_version() -> u64 { + let info = uname().expect("uname failed").release; + let tmps: Vec<&str> = info.split(".").collect(); + if tmps.len() < 3 { + panic!("failed to parser kenel release version"); + } + let major: u64 = tmps[0] + .parse() + .expect("failed to parser kenel release version"); + let minor: u64 = tmps[1] + .parse() + .expect("failed to parser kenel release version"); + let patch: u64 = tmps[2] + .parse() + .expect("failed to parser kenel release version"); + ((major) << 16) + ((minor) << 8) + (patch) +} + +// 0 is equal, <0 is less than current, >0 is larger than. +pub fn kernel_version_compare(major: u64, minor: u64, patch: u64) -> i32 { + let current_version = get_current_kernel_version(); + let target_version = ((major) << 16) + ((minor) << 8) + (patch); + if target_version > current_version { + 1 + } else if target_version < current_version { + -1 + } else { + 0 + } +} diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/irq.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/irq.rs new file mode 100644 index 00000000..e69de29b diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/mod.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/mod.rs new file mode 100644 index 00000000..73223630 --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/mod.rs @@ -0,0 +1,28 @@ + + +use crate::base::{RtraceDrop, RtraceDropAction}; +use anyhow::Result; + +#[derive(Default, Clone)] +pub struct L1 { + points: Vec>, +} + +impl RtraceDrop for L1 { + + fn init(&mut self) -> Result<()> { + for point in &mut self.points { + point.init()?; + } + Ok(()) + } + + fn get_name(&self) -> &str { + "l1" + } + + fn get_subpoints(&self) -> Option<&Vec>> { + Some(&self.points) + } + +} \ No newline at end of file diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l2/mod.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l2/mod.rs new file mode 100644 index 00000000..9e76334c --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l2/mod.rs @@ -0,0 +1,28 @@ + + +use crate::base::{RtraceDrop, RtraceDropAction}; +use anyhow::Result; + +#[derive(Default, Clone)] +pub struct L2 { + points: Vec>, +} + +impl RtraceDrop for L2 { + + fn init(&mut self) -> Result<()> { + for point in &mut self.points { + point.init()?; + } + Ok(()) + } + + fn get_name(&self) -> &str { + "l2" + } + + fn get_subpoints(&self) -> Option<&Vec>> { + Some(&self.points) + } + +} \ No newline at end of file diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/conntrack.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/conntrack.rs new file mode 100644 index 00000000..905b2b26 --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/conntrack.rs @@ -0,0 +1,64 @@ +use crate::base::{RtraceDrop, RtraceDropAction}; +use anyhow::Result; +use rtrace_parser::func::Func; +use rtrace_rs::bindings::*; +use std::boxed::Box; + +#[derive(Default, Clone)] +pub struct Conntrack { + points: Vec>, +} + +impl RtraceDrop for Conntrack { + fn init(&mut self) -> Result<()> { + self.points.push(Box::new(Ipv4ConntrackIn::default())); + self.points.push(Box::new(Ipv4ConntrackLocal::default())); + self.points.push(Box::new(Ipv4Helper::default())); + self.points.push(Box::new(Ipv4Confirm::default())); + Ok(()) + } + + fn get_name(&self) -> &str { + "conntrack" + } + + fn get_subpoints(&self) -> Option<&Vec>> { + Some(&self.points) + } + + fn get_status(&self) -> &str { + "[Support]" + } +} + +#[derive(Default, Clone)] +pub struct Ipv4ConntrackIn {} +impl RtraceDrop for Ipv4ConntrackIn { + fn get_name(&self) -> &str { + "ipv4_conntrack_in" + } +} + +#[derive(Default, Clone)] +pub struct Ipv4ConntrackLocal {} +impl RtraceDrop for Ipv4ConntrackLocal { + fn get_name(&self) -> &str { + "ipv4_conntrack_local" + } +} + +#[derive(Default, Clone)] +pub struct Ipv4Helper {} +impl RtraceDrop for Ipv4Helper { + fn get_name(&self) -> &str { + "ipv4_helper" + } +} + +#[derive(Default, Clone)] +pub struct Ipv4Confirm {} +impl RtraceDrop for Ipv4Confirm { + fn get_name(&self) -> &str { + "ipv4_confirm" + } +} diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/fib.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/fib.rs new file mode 100644 index 00000000..b55afcae --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/fib.rs @@ -0,0 +1,36 @@ +use crate::base::{RtraceDrop, RtraceDropAction}; +use anyhow::Result; +use rtrace_parser::func::Func; +use rtrace_rs::bindings::*; +use std::boxed::Box; + +#[derive(Default, Clone)] +pub struct Fib { + points: Vec>, +} + +impl RtraceDrop for Fib { + fn init(&mut self) -> Result<()> { + Ok(()) + } + + fn get_name(&self) -> &str { + "fib" + } + + fn get_subpoints(&self) -> Option<&Vec>> { + Some(&self.points) + } + + fn get_status(&self) -> &str { + "[Support]" + } +} + +#[derive(Default, Clone)] +struct FibValidateSource {} +impl RtraceDrop for FibValidateSource { + fn get_name(&self) -> &str { + "fib_validate_source" + } +} diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/iptables.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/iptables.rs new file mode 100644 index 00000000..7b36569a --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/iptables.rs @@ -0,0 +1,101 @@ +use crate::base::{RtraceDrop, RtraceDropAction}; +use anyhow::Result; +use rtrace_parser::func::Func; +use rtrace_rs::bindings::*; +use std::boxed::Box; + +#[derive(Default, Clone)] +pub struct Iptables { + points: Vec>, +} + +impl RtraceDrop for Iptables { + fn init(&mut self) -> Result<()> { + self.points.push(Box::new(IptDoTable::default())); + Ok(()) + } + + fn get_name(&self) -> &str { + "iptables" + } + + fn get_subpoints(&self) -> Option<&Vec>> { + Some(&self.points) + } + + fn get_status(&self) -> &str { + "[Support]" + } +} + +#[derive(Default, Clone)] +struct IptDoTable { + table: String, + chain: String, +} + +impl RtraceDrop for IptDoTable { + fn get_probe_string(&self) -> &str { + r#" +[[function]] +name = "ipt_do_table" +skb = 1 +params = ["basic", "kretprobe"] +exprs = ["state.net.ipv4.iptable_filter", "state.net.ipv4.iptable_mangle", "state.net.ipv4.iptable_raw", "state.net.ipv4.arptable_filter", "state.net.ipv4.iptable_security", "state.net.ipv4.nat_table", "table", "state.hook"] + "# + } + + fn get_name(&self) -> &str { + "ipt_do_table" + } + + fn get_status(&self) -> &str { + "[Support]" + } + + fn check_func(&mut self, func: &Func, vals: &Vec) -> RtraceDropAction { + if func.is_kretprobe() { + let bi = func + .get_struct(INFO_TYPE::BASIC_INFO) + .expect("failed to find basic info") + as *const BASIC_INFO_struct; + let ret = unsafe { (*bi).ret }; + if ret == 0 { + return RtraceDropAction::Consume(format!( + "{} of {} drop packet", + self.chain, self.table + )); + } + } else { + self.table = "none".to_owned(); + if vals[6] == vals[0] { + self.table = "filter".to_owned(); + } + if vals[6] == vals[0] { + self.table = "mangle".to_owned(); + } + if vals[6] == vals[0] { + self.table = "raw".to_owned(); + } + if vals[6] == vals[0] { + self.table = "arp".to_owned(); + } + if vals[6] == vals[0] { + self.table = "security".to_owned(); + } + if vals[6] == vals[0] { + self.table = "nat".to_owned(); + } + + match vals[7] { + 0 => self.chain = "PREROUTING".to_owned(), + 1 => self.chain = "LOCAL IN".to_owned(), + 2 => self.chain = "FORWARD".to_owned(), + 3 => self.chain = "LOCAL OUT".to_owned(), + 4 => self.chain = "POSTROUTING".to_owned(), + _ => self.chain = "none".to_owned(), + } + } + RtraceDropAction::Continue + } +} diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/mod.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/mod.rs new file mode 100644 index 00000000..051ca7bf --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/mod.rs @@ -0,0 +1,34 @@ +mod conntrack; +mod fib; +mod iptables; + +use crate::base::RtraceDrop; +use crate::l3::conntrack::Conntrack; +use crate::l3::fib::Fib; +use crate::l3::iptables::Iptables; +use anyhow::Result; + +#[derive(Default, Clone)] +pub struct L3 { + points: Vec>, +} + +impl RtraceDrop for L3 { + fn init(&mut self) -> Result<()> { + self.points.push(Box::new(Iptables::default())); + self.points.push(Box::new(Conntrack::default())); + self.points.push(Box::new(Fib::default())); + for point in &mut self.points { + point.init()?; + } + Ok(()) + } + + fn get_name(&self) -> &str { + "l3" + } + + fn get_subpoints(&self) -> Option<&Vec>> { + Some(&self.points) + } +} diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/mod.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/mod.rs new file mode 100644 index 00000000..6e423cf4 --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/mod.rs @@ -0,0 +1,31 @@ +mod tcp; +mod udp; + +use crate::base::RtraceDrop; +use crate::l4::tcp::Tcp; +use crate::l4::udp::Udp; +use anyhow::Result; + +#[derive(Default, Clone)] +pub struct L4 { + points: Vec>, +} + +impl RtraceDrop for L4 { + fn init(&mut self) -> Result<()> { + self.points.push(Box::new(Tcp::default())); + self.points.push(Box::new(Udp::default())); + for point in &mut self.points { + point.init()?; + } + Ok(()) + } + + fn get_subpoints(&self) -> Option<&Vec>> { + Some(&self.points) + } + + fn get_name(&self) -> &str { + "l4" + } +} diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/tcp.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/tcp.rs new file mode 100644 index 00000000..0e9656f4 --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/tcp.rs @@ -0,0 +1,177 @@ +use crate::base::{kernel_version_compare, RtraceDrop, RtraceDropAction}; +use anyhow::Result; +use rtrace_parser::func::Func; +use rtrace_rs::bindings::*; +use std::boxed::Box; + +#[derive(Default, Clone)] +pub struct Tcp { + points: Vec>, +} + +impl RtraceDrop for Tcp { + fn init(&mut self) -> Result<()> { + self.points.push(Box::new(TcpConnRequest::default())); + self.points.push(Box::new(Tcpv4SynRecvSock::default())); + self.points.push(Box::new(TcpAddBacklog::default())); + self.points.push(Box::new(SkbChecksumComplete::default())); + Ok(()) + } + + fn get_name(&self) -> &str { + "tcp" + } + + fn get_subpoints(&self) -> Option<&Vec>> { + Some(&self.points) + } +} + +impl RtraceDrop for TcpConnRequest { + fn get_probe_string(&self) -> &str { + r#" +[[function]] +name = "tcp_conn_request" +params = ["basic"] +exprs = ["sk.sk_ack_backlog", "sk.sk_max_ack_backlog", "((struct inet_connection_sock *)sk).icsk_accept_queue.qlen.counter"] + "# + } + + fn get_name(&self) -> &str { + "tcp_conn_request" + } + + fn get_status(&self) -> &str { + "[Support]" + } + + fn check_func(&mut self, func: &Func, vals: &Vec) -> RtraceDropAction { + // ((struct inet_connection_sock *)sk).icsk_accept_queue.qlen.counter > sk.sk_max_ack_backlog : syn queue overflow + if vals[2] > vals[1] { + return RtraceDropAction::Consume(format!( + "Syn queue overflow: {} > {}", + vals[2], vals[1] + )); + } + // sk.sk_ack_backlog > sk.sk_max_ack_backlog : accept queue overflow + if vals[0] > vals[1] { + return RtraceDropAction::Consume(format!( + "Accept queue overflow: {} > {}", + vals[0], vals[1] + )); + } + RtraceDropAction::Continue + } +} +#[derive(Default, Clone)] +struct Tcpv4SynRecvSock {} +impl RtraceDrop for Tcpv4SynRecvSock { + fn get_probe_string(&self) -> &str { + r#" +[[function]] +name = "tcp_v4_syn_recv_sock" +params = ["basic"] +expr = ["sk.sk_ack_backlog", "sk.sk_max_ack_backlog"] + "# + } + + fn get_name(&self) -> &str { + "tcp_v4_syn_recv_sock" + } + + fn get_status(&self) -> &str { + "[Support]" + } + + fn check_func(&mut self, func: &Func, vals: &Vec) -> RtraceDropAction { + // sk.sk_ack_backlog > sk.sk_max_ack_backlog : accept queue overflow + if vals[0] > vals[1] { + return RtraceDropAction::Consume(format!( + "Accept queue overflow: {} > {}", + vals[0], vals[1] + )); + } + RtraceDropAction::Continue + } +} + +#[derive(Default, Clone)] +struct TcpConnRequest {} +#[derive(Default, Clone)] +struct TcpAddBacklog { + headroom: u64, +} +impl RtraceDrop for TcpAddBacklog { + fn init(&mut self) -> Result<()> { + let res = kernel_version_compare(4, 19, 0); + if res < 0 { + self.headroom = 0; + } else { + self.headroom = 64 * 1024; + } + Ok(()) + } + fn get_probe_string(&self) -> &str { + r#" +[[function]] +name = "tcp_add_backlog" +params = ["basic"] +expr = ["sk.sk_backlog.len", "sk.sk_backlog.rmem_alloc", "sk.sk_rcvbuf", "sk.sk_sndbuf" ] + "# + } + + fn get_status(&self) -> &str { + "[Support]" + } + + fn check_func(&mut self, _func: &Func, vals: &Vec) -> RtraceDropAction { + if vals[0] + vals[1] > vals[2] + vals[3] + self.headroom { + return RtraceDropAction::Consume(format!( + "Backlog queue overflow: + Judge Expression: sk_backlog.len + sk_backlog.rmem_alloc > sk_rcvbuf + sk_sndbuf + HEADROOM + Actual Expression: {} + {} > {} + {} + {} + ", + vals[0], vals[1], vals[2], vals[3], self.headroom + )); + } + RtraceDropAction::Continue + } + + fn get_name(&self) -> &str { + "tcp_add_backlog" + } +} + +#[derive(Default, Clone)] +struct SkbChecksumComplete {} +impl RtraceDrop for SkbChecksumComplete { + fn get_probe_string(&self) -> &str { + r#" +[[function]] +name = "__skb_checksum_complete" +params = ["basic", "kretprobe"] + "# + } + + fn get_status(&self) -> &str { + "[Support]" + } + + fn check_func(&mut self, func: &Func, _vals: &Vec) -> RtraceDropAction { + if func.is_kretprobe() { + let bi = func + .get_struct(INFO_TYPE::BASIC_INFO) + .expect("failed to find basic info") + as *const BASIC_INFO_struct; + let ret = unsafe { (*bi).ret }; + if ret == 0 { + return RtraceDropAction::Consume(format!("csum error drop packet")); + } + } + RtraceDropAction::Continue + } + + fn get_name(&self) -> &str { + "__skb_checksum_complete" + } +} diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/udp.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/udp.rs new file mode 100644 index 00000000..c52a5f58 --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/udp.rs @@ -0,0 +1,24 @@ +use crate::base::{RtraceDrop, RtraceDropAction}; +use anyhow::Result; + +#[derive(Default, Clone)] +pub struct Udp { + points: Vec>, +} + +impl RtraceDrop for Udp { + fn init(&mut self) -> Result<()> { + for point in &mut self.points { + point.init()?; + } + Ok(()) + } + + fn get_name(&self) -> &str { + "udp" + } + + fn get_subpoints(&self) -> Option<&Vec>> { + Some(&self.points) + } +} diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/main.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/main.rs new file mode 100644 index 00000000..56d44910 --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/main.rs @@ -0,0 +1,340 @@ +mod base; +mod l1; +mod l2; +mod l3; +mod l4; + +use crate::base::{RtraceDrop, RtraceDropAction}; +use crate::l1::L1; +use crate::l2::L2; +use crate::l3::L3; +use crate::l4::L4; +use anyhow::anyhow; +use anyhow::Result; +use log::*; +use rtrace_parser::func::Func; +use rtrace_parser::perf::{perf_inital_thread2, perf_recv_timeout}; +use rtrace_rs::rtrace::{Config, FunctionContainer, Rtrace}; +use std::boxed::Box; +use std::collections::{HashMap, HashSet}; +use std::io::Write; +use std::path::PathBuf; +use std::time::Duration; +use structopt::StructOpt; +use uname::uname; + +#[derive(Debug, StructOpt)] +#[structopt( + name = "rtrace_drop", + about = "Network packet drop traceability diagnosis" +)] +pub struct Cli { + #[structopt(long, help = "configuration file path")] + config: Option, + #[structopt(long, help = "generate default configuration file")] + gen: Option, + #[structopt(long, short, help = "Included drop points")] + include: Option>, + #[structopt(long, short, help = "Exclude packet loss points")] + exclude: Option>, + #[structopt(long, short, help = "show all packet loss points")] + list: Option>, +} + +/// main entry +#[derive(Default, Clone)] +struct AllDrop { + points: Vec>, +} + +impl RtraceDrop for AllDrop { + fn init(&mut self) -> Result<()> { + // l1 to l4 + self.points.push(Box::new(L1::default())); + self.points.push(Box::new(L2::default())); + self.points.push(Box::new(L3::default())); + self.points.push(Box::new(L4::default())); + for point in &mut self.points { + point.init()?; + } + Ok(()) + } + + fn get_subpoints(&self) -> Option<&Vec>> { + Some(&self.points) + } + + fn get_name(&self) -> &str { + "all" + } +} + +fn main() { + let cli = Cli::from_args(); + let include_hs = build_hs(&cli.include, "all"); + let exclude_hs = build_hs(&cli.exclude, "none"); + let list_hs = build_hs(&cli.list, "all"); + let mut ad: Box = Box::new(AllDrop::default()); + ad.init().expect("failed to init drop instance"); + + if let Some(path) = cli.gen { + gen_config(&path).expect("unable to generate config file"); + return; + } + + if list_hs.len() != 0 { + list_points(&ad, &list_hs, 0, false); + return; + } + + let mut rtrace; + + match &cli.config { + Some(config) => rtrace = Rtrace::from_file(config).expect("failed to create Rtrace object"), + None => {println!("please specify config file");return;}, + } + + let mut enabled_points = + get_enabled_points(&ad, &include_hs, &exclude_hs).expect("failed to solve points"); + let function_mapping = + probe_funcitons(&mut rtrace, &enabled_points).expect("Failed to insert probe function"); + + let (rx, tx) = crossbeam_channel::unbounded(); + perf_inital_thread2(rtrace.perf_fd(), (rx, tx)); + + loop { + let res = perf_recv_timeout(Duration::from_millis(100)); + match res { + Ok(data) => { + let f = Func::new(data.1); + match function_mapping.get(&f.get_name_no_offset()) { + None => {} + Some(names) => { + for name in names { + if let Some(point) = enabled_points.get_mut(name) { + match point.0.check_func(&f, &Vec::new()) { + RtraceDropAction::Continue => {} + RtraceDropAction::Consume(x) => { + println!("{}", x); + return; + } + } + } + } + } + } + } + _ => {} + } + } +} + +fn get_exprs_vals(rtrace: &Rtrace, f: &Func) -> Result> { + // todo: fix confict + let mut vals = Vec::new(); + let name = f.get_name_no_offset(); + if let Some(probe) = rtrace.get_probe(&name) { + let mut off = 0; + let mut val; + for sz in probe.get_expr_sz() { + let ptr = f.get_extra(off as usize); + match sz { + 1 => val = unsafe { *ptr } as u64, + 2 => val = unsafe { *(ptr as *const u16) } as u64, + 4 => val = unsafe { *(ptr as *const u32) } as u64, + 8 => val = unsafe { *(ptr as *const u64) }, + _ => return Err(anyhow!("size: {} not support", sz)), + } + vals.push(val); + off += sz; + } + } else { + return Err(anyhow!("entry {} not found in probes of rtrace", name)); + } + Ok(vals) +} + +fn gen_config(path: &str) -> Result<()> { + let mut p = PathBuf::from(path); + std::fs::create_dir_all(&p)?; + p.push("drop.toml"); + let text = r#" +[basic] +debug = false +btf_path = "/boot/vmlinux-4.19.91-007.ali4000.alios7.x86_64" +duration = 0 +protocol = "tcp" +recv = true +[[filter]] +pid = 0 +dst = "0.0.0.0:0" +src = "0.0.0.0:0" + "#; + let mut config = Config::from_str(text)?; + config.basic.btf_path = Some(get_btf_path()); + let string = Config::to_string(&config)?; + let mut output = std::fs::File::create(p)?; + write!(output, "{}", string)?; + Ok(()) +} + +fn probe_funcitons( + rtrace: &mut Rtrace, + enabled_points: &HashMap, FunctionContainer)>, +) -> Result>> { + let mut function_mapping = HashMap::new(); + let mut function_hm = HashMap::new(); + for (_, point) in enabled_points { + for function in &point.1.function { + function_hm + .entry(function.name.clone()) + .or_insert(function.clone()); + + let tmp = function_mapping + .entry(function.name.clone()) + .or_insert(Vec::new()); + tmp.push(point.0.get_name().to_string()); + } + } + + let mut functions = Vec::new(); + for (_, v) in function_hm { + debug!("probe drop point: {}", v.name); + functions.push(v); + } + + rtrace.probe_functions_from_functions(&functions)?; + Ok(function_mapping) +} + +fn get_btf_path() -> String { + let mut default = String::from("/boot/vmlinux-"); + let info = uname().expect("uname failed"); + default.push_str(&info.release[..]); + default +} + +// get enabled points according to include and exclude. +fn get_enabled_points( + rd: &Box, + include_hs: &HashSet, + exclude_hs: &HashSet, +) -> Result, FunctionContainer)>> { + let mut enabled_points = HashMap::new(); + let mut disabled_points = HashMap::new(); + let mut returned_points = HashMap::new(); + if include_hs.len() == 0 { + build_enabled_points(&mut enabled_points, rd, &include_hs, true); + } else { + build_enabled_points(&mut enabled_points, rd, &include_hs, false); + } + if exclude_hs.len() != 0 { + build_disabled_points(&mut disabled_points, rd, &exclude_hs, false); + } + + // exclude has the higher priority than include. + for (name, _) in &disabled_points { + match enabled_points.remove(name) { + None => warn!("Set exclude flag {} is meaningless", name), + _ => {} + } + } + // clone all enabled points. + for (name, point) in enabled_points { + returned_points.insert( + name, + ( + point.clone(), + FunctionContainer::from_str(&point.get_probe_string())?, + ), + ); + } + Ok(returned_points) +} + +// generate disabled points according to include. +fn build_enabled_points<'a>( + enabled_points: &mut HashMap>, + rd: &'a Box, + hs: &HashSet, + mut parent_enable: bool, +) { + let name = rd.get_name(); + if parent_enable == false && hs.contains(name) { + parent_enable = true; + } + if parent_enable { + enabled_points.entry(name.clone().to_owned()).or_insert(rd); + } + + if let Some(points) = rd.get_subpoints() { + for point in points { + build_enabled_points(enabled_points, point, hs, parent_enable); + } + } +} + +// generate disabled points according to exclude. +fn build_disabled_points<'a>( + disabled_points: &mut HashMap>, + rd: &'a Box, + hs: &HashSet, + mut parent_disable: bool, +) { + let name = rd.get_name(); + if parent_disable == false && hs.contains(name) { + parent_disable = true; + } + if parent_disable { + disabled_points.entry(name.clone().to_owned()).or_insert(rd); + } + + if let Some(points) = rd.get_subpoints() { + for point in points { + build_disabled_points(disabled_points, point, hs, parent_disable); + } + } +} + +// Display currently supported and unsupported packet drop points. +fn list_points( + rd: &Box, + hs: &HashSet, + indent: usize, + mut parent_enable: bool, +) { + let name = rd.get_name(); + if parent_enable == false && hs.contains(name) { + parent_enable = true; + } + if parent_enable { + print!("{:indent$}{:<30}", "", name, indent = indent * 4); + match rd.get_subpoints() { + // Implement RtraceDrop as a module. + Some(_) => println!(), + // Implement RtraceDrop as a specific packet drop point. + None => println!("\t\t{}", rd.get_status()), + } + } + if let Some(points) = rd.get_subpoints() { + for point in points { + list_points(point, hs, indent + 1, parent_enable); + } + } +} + +// Translate `Vec` to `HashSet`. If vec is None, +// we will insert a default data. +fn build_hs(vec: &Option>, default: &str) -> HashSet { + let mut hs = HashSet::new(); + if let Some(items) = vec { + for item in items { + hs.insert(item.clone()); + } + + if hs.len() == 0 { + hs.insert(default.to_owned()); + } + } + hs +} -- Gitee From 4a1a4a73cae176bfbe058fcc66eb74bfc4ae086c Mon Sep 17 00:00:00 2001 From: "chengshuyi.csy" Date: Wed, 16 Feb 2022 14:50:57 +0800 Subject: [PATCH 03/11] rtrace-ebpf: Adjust the debug log print macro. Removed debug log related macros from common.bpf.h and moved it to common.usr.h. Because currently only user mode code uses this macro Signed-off-by: chengshuyi.csy --- .../detect/net_diag/rtrace/ebpf/common.def.h | 22 ------------------- .../detect/net_diag/rtrace/ebpf/common.usr.h | 13 +++++++++++ 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/source/tools/detect/net_diag/rtrace/ebpf/common.def.h b/source/tools/detect/net_diag/rtrace/ebpf/common.def.h index a339df6c..60957445 100644 --- a/source/tools/detect/net_diag/rtrace/ebpf/common.def.h +++ b/source/tools/detect/net_diag/rtrace/ebpf/common.def.h @@ -6,28 +6,6 @@ #define MAC_HEADER_SIZE 14 #define FILTER_RULES_MAX_NUM 10 -#define USR_DEUBG - -#if defined(__VMLINUX_H__) && defined(BPF_DEBUG) -#define output(...) __bpf_printk(__VA_ARGS__); -#elif !defined(__VMLINUX_H__) && defined(USR_DEUBG) -#define output(...) printf(__VA_ARGS__); -#else -#define output(...) -#endif - -#define pr_err(fmt, ...) \ - do \ - { \ - output("ERROR: " fmt, ##__VA_ARGS__); \ - } while (0) - -#define pr_dbg(fmt, ...) \ - do \ - { \ - output("DEBUG: " fmt, ##__VA_ARGS__); \ - } while (0) - #define TO_STR(a) #a #define TO_STRING(a) TO_STR(a) diff --git a/source/tools/detect/net_diag/rtrace/ebpf/common.usr.h b/source/tools/detect/net_diag/rtrace/ebpf/common.usr.h index 58e1503b..463290b3 100644 --- a/source/tools/detect/net_diag/rtrace/ebpf/common.usr.h +++ b/source/tools/detect/net_diag/rtrace/ebpf/common.usr.h @@ -7,6 +7,19 @@ extern bool gdebug; +#define pr_err(fmt, ...) \ + do \ + { \ + printf("ERROR: " fmt, ##__VA_ARGS__); \ + } while (0) + +#define pr_dbg(fmt, ...) \ + do \ + { \ + if (gdebug) \ + printf("DEBUG: " fmt, ##__VA_ARGS__); \ + } while (0) + #ifndef zfree #define zfree(ptr) ( \ { \ -- Gitee From e1e58940d53e5e7d85d0be1e698b0ab62a858871 Mon Sep 17 00:00:00 2001 From: "chengshuyi.csy" Date: Wed, 16 Feb 2022 14:52:59 +0800 Subject: [PATCH 04/11] rtrace-ebpf: define more kprobe_sk(x)_skb(y) ebpf function. Because the sk parameter position of the tcp_conn_request function is 3, and the skb parameter position is 4. Therefore, the kprobe_sk3_skb4 function needs to be defined in advance Signed-off-by: chengshuyi.csy --- source/tools/detect/net_diag/rtrace/ebpf/rtrace.bpf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/tools/detect/net_diag/rtrace/ebpf/rtrace.bpf.c b/source/tools/detect/net_diag/rtrace/ebpf/rtrace.bpf.c index 764cf667..90402a12 100644 --- a/source/tools/detect/net_diag/rtrace/ebpf/rtrace.bpf.c +++ b/source/tools/detect/net_diag/rtrace/ebpf/rtrace.bpf.c @@ -717,5 +717,7 @@ SK0_SKB_ARG_FN(5) SK_SKB_ARG_FN(1, 2) SK_SKB_ARG_FN(2, 3) +SK_SKB_ARG_FN(3, 4) +SK_SKB_ARG_FN(4, 5) char LICENSE[] SEC("license") = "GPL"; \ No newline at end of file -- Gitee From 3d9501a99712338ea86185d1a1d2e193afdb7b45 Mon Sep 17 00:00:00 2001 From: "chengshuyi.csy" Date: Wed, 16 Feb 2022 14:55:29 +0800 Subject: [PATCH 05/11] rtrace-ebpf: fix bug in rtrace_trace_program. fix bug in rtrace_trace_program, and refactor code. Signed-off-by: chengshuyi.csy --- .../detect/net_diag/rtrace/ebpf/rtrace.c | 66 +++++++++++-------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/source/tools/detect/net_diag/rtrace/ebpf/rtrace.c b/source/tools/detect/net_diag/rtrace/ebpf/rtrace.c index 8b0afa6b..8470e91b 100644 --- a/source/tools/detect/net_diag/rtrace/ebpf/rtrace.c +++ b/source/tools/detect/net_diag/rtrace/ebpf/rtrace.c @@ -45,15 +45,14 @@ static int libbpf_print_fn(enum libbpf_print_level level, int bump_memlock_rlimit(void) { - struct rlimit rlim_new = { - .rlim_cur = RLIM_INFINITY, - .rlim_max = RLIM_INFINITY, - }; + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; - return setrlimit(RLIMIT_MEMLOCK, &rlim_new); + return setrlimit(RLIMIT_MEMLOCK, &rlim_new); } - /** * @brief enable debug or not * @@ -181,30 +180,45 @@ err_out: struct bpf_program *rtrace_trace_program(struct rtrace *r, char *func, int sk, int skb) { struct bpf_program *prog; - int func_proto_id; + int err, func_proto_id; + + err = 0; if (is_special_func(func)) { prog = bpf_object__find_program_by_name(r->obj->obj, func); + goto find_prog; } - else + + // When skb is 0, it means that the skb parameter position + // needs to be automatically located. + if (skb == 0) { - if (sk == 0 && skb == 0) + func_proto_id = btf_find_func_proto_id(r->btf, func); + sk = btf_func_proto_find_param_pos(r->btf, func_proto_id, "sock", NULL); + sk = sk < 0 ? 0 : sk; + skb = btf_func_proto_find_param_pos(r->btf, func_proto_id, "sk_buff", NULL); + if (skb <= 0) { - func_proto_id = btf_find_func_proto_id(r->btf, func); - sk = btf_func_proto_find_param_pos(r->btf, func_proto_id, "sock", NULL); - sk = sk < 0 ? 0 : sk; - skb = btf_func_proto_find_param_pos(r->btf, func_proto_id, "sk_buff", NULL); - if (skb < 0) - { - pr_err("func-%s prog is null, sk = %d, skb = %d.\n", func, sk, skb); - return NULL; - } + err = skb; + goto err_out; } - prog = object_find_program(r->obj->obj, sk, skb); } - // if (gdebug) - // insns_dump(bpf_program__insns(prog), bpf_program__insn_cnt(prog)); + prog = object_find_program(r->obj->obj, sk, skb); + +find_prog: + if (!prog) + { + err = -ENOENT; + goto err_out; + } + + pr_dbg("find prog: %s for func: %s, sk = %d, skb = %d\n", bpf_program__name(prog), func, sk, skb); return prog; + +err_out: + pr_err("failed to find prog for func: %s, sk = %d, skb = %d, err = %d.\n", func, sk, skb, err); + errno = -err; + return NULL; } /** @@ -240,7 +254,7 @@ int rtrace_trace_load_prog(struct rtrace *r, struct bpf_program *prog, if (gdebug) fd = bpf_load_program_xattr(&attr, log_buf, log_buf_size); - else + else fd = bpf_load_program_xattr(&attr, NULL, 0); if (fd < 0) { @@ -299,7 +313,7 @@ static int dynamic_ptregs_param_offset(int param_pos) /** * @brief Calculate the corresponding offset according to the accessed structure member - * + * * @param r rtrace context * @param df array of members accessed by the structure * @param df_cnt array length @@ -326,7 +340,7 @@ int rtrace_dynamic_gen_offset(struct rtrace *r, struct dynamic_fields *df, } err = btf_func_proto_find_param_pos(r->btf, func_proto_id, NULL, df[0].ident); - if (err <= 0) + if (err <= 0) goto err_out; cnt = 0; @@ -411,8 +425,8 @@ err_out: } /** - * @brief - * + * @brief + * * @param r rtrace context * @param dos offsets for struct members * @param insns pointer to save instructions -- Gitee From 0b658744cde023e3493d18f87663831a296b9115 Mon Sep 17 00:00:00 2001 From: "chengshuyi.csy" Date: Wed, 16 Feb 2022 17:27:14 +0800 Subject: [PATCH 06/11] rtrace-ebpf: remove dump insns and add more debug log. Signed-off-by: chengshuyi.csy --- .../detect/net_diag/rtrace/ebpf/rtrace.c | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/source/tools/detect/net_diag/rtrace/ebpf/rtrace.c b/source/tools/detect/net_diag/rtrace/ebpf/rtrace.c index 8470e91b..48079fd2 100644 --- a/source/tools/detect/net_diag/rtrace/ebpf/rtrace.c +++ b/source/tools/detect/net_diag/rtrace/ebpf/rtrace.c @@ -239,8 +239,8 @@ int rtrace_trace_load_prog(struct rtrace *r, struct bpf_program *prog, char log_buf[log_buf_size]; int fd; - if (gdebug) - insns_dump(insns, insns_cnt); + // if (gdebug) + // insns_dump(insns, insns_cnt); memset(&attr, 0, sizeof(attr)); attr.prog_type = bpf_program__get_type(prog); @@ -252,10 +252,8 @@ int rtrace_trace_load_prog(struct rtrace *r, struct bpf_program *prog, attr.kern_version = bpf_object__kversion(r->obj->obj); attr.prog_ifindex = 0; - if (gdebug) - fd = bpf_load_program_xattr(&attr, log_buf, log_buf_size); - else - fd = bpf_load_program_xattr(&attr, NULL, 0); + // fd = bpf_load_program_xattr(&attr, log_buf, log_buf_size); + fd = bpf_load_program_xattr(&attr, NULL, 0); if (fd < 0) { printf("%s\n", log_buf); @@ -332,25 +330,28 @@ int rtrace_dynamic_gen_offset(struct rtrace *r, struct dynamic_fields *df, return -EINVAL; root_typeid = btf_func_proto_find_param(r->btf, func_proto_id, NULL, df[0].ident); - if (root_typeid < 0) + if (root_typeid <= 0) { - pr_dbg("failed to find param: %s in function", df[0].ident); + pr_err("failed to find param: %s in function", df[0].ident); err = root_typeid; goto err_out; } err = btf_func_proto_find_param_pos(r->btf, func_proto_id, NULL, df[0].ident); if (err <= 0) + { + pr_err("failed to find param pos: %s in function", df[0].ident); goto err_out; + } cnt = 0; - dp.attr[cnt].offset = err; if (df[0].cast_type > 0) { root_typeid = btf__find_by_name_kind(r->btf, df[0].cast_name, df[0].cast_type); if (root_typeid < 0) { + pr_err("failed to do casting\n"); err = root_typeid; goto err_out; } @@ -371,6 +372,7 @@ int rtrace_dynamic_gen_offset(struct rtrace *r, struct dynamic_fields *df, if (mem == NULL) { err = -errno; + pr_err("failed to find member: %s in struct: %s, err = %d\n", df[i].ident, btf__name_by_offset(r->btf, btf__type_by_id(r->btf, pre_typeid)->name_off), err); goto err_out; } dp.attr[cnt].offset = offset; @@ -379,6 +381,7 @@ int rtrace_dynamic_gen_offset(struct rtrace *r, struct dynamic_fields *df, pre_typeid = btf__find_by_name_kind(r->btf, df[i].cast_name, df[i].cast_type); if (pre_typeid < 0) { + pr_err("failed to do casting\n"); err = pre_typeid; goto err_out; } @@ -467,8 +470,8 @@ int rtrace_dynamic_gen_insns(struct rtrace *r, struct dynamic_offsets *dos, stru insns[insns_cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0); pr_dbg("generate new insns, insns cnt: %d\n", insns_cnt); - if (gdebug) - insns_dump(insns, insns_cnt); + // if (gdebug) + // insns_dump(insns, insns_cnt); return insns_cnt; } -- Gitee From 0f09c5b077dafbedf7aead3360b0753dbff98776 Mon Sep 17 00:00:00 2001 From: "chengshuyi.csy" Date: Wed, 16 Feb 2022 17:29:02 +0800 Subject: [PATCH 07/11] rtrace-ebpf: fix bugs in find member in struct. Need to skip ptr and modifiers. Signed-off-by: chengshuyi.csy --- source/tools/detect/net_diag/rtrace/ebpf/utils/btf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/source/tools/detect/net_diag/rtrace/ebpf/utils/btf.c b/source/tools/detect/net_diag/rtrace/ebpf/utils/btf.c index ab8cbc5a..a5526af4 100644 --- a/source/tools/detect/net_diag/rtrace/ebpf/utils/btf.c +++ b/source/tools/detect/net_diag/rtrace/ebpf/utils/btf.c @@ -89,9 +89,12 @@ const struct btf_member *btf_find_member(struct btf *btf, int typeid, const struct btf_member *m, *tmpm; const char *name; int i; + t = btf__type_by_id(btf, typeid); - t = btf_type_skip_modifiers(btf, typeid, (uint32_t *)&typeid); - t = btf_type_skip_ptr(btf, typeid); + while(btf_type_is_modifier(t) || btf_is_ptr(t)) { + t = btf_type_skip_modifiers(btf, typeid, (uint32_t *)&typeid); + t = btf_type_skip_ptr(btf, typeid); + } m = btf_members(t); for (i = 0; i < btf_vlen(t); i++, m++) { -- Gitee From e84d17d0ece1aeff832328f4d6ee76d3391813ba Mon Sep 17 00:00:00 2001 From: "chengshuyi.csy" Date: Wed, 16 Feb 2022 17:35:38 +0800 Subject: [PATCH 08/11] rtrace-rs: add default method into FunctionContainer. Signed-off-by: chengshuyi.csy --- .../tools/detect/net_diag/rtrace/rtrace-rs/src/rtrace.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/source/tools/detect/net_diag/rtrace/rtrace-rs/src/rtrace.rs b/source/tools/detect/net_diag/rtrace/rtrace-rs/src/rtrace.rs index efa0d952..304d6651 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-rs/src/rtrace.rs +++ b/source/tools/detect/net_diag/rtrace/rtrace-rs/src/rtrace.rs @@ -33,7 +33,7 @@ pub struct Filterx { pub src: String, } -#[derive(Clone, Debug, Deserialize, Serialize)] +#[derive(Default, Clone, Debug, Deserialize, Serialize)] pub struct Function { pub name: String, pub enable: Option, @@ -46,7 +46,7 @@ pub struct Function { offsets: Option>, } // see: https://github.com/alexcrichton/toml-rs/issues/395 -#[derive(Clone, Debug, Deserialize, Serialize)] +#[derive(Default, Clone, Debug, Deserialize, Serialize)] pub struct FunctionContainer { pub function: Vec, } @@ -169,9 +169,10 @@ impl Rtrace { if let Some(x) = self.progs.remove(name) { return Ok(x); } - + + let cname = CString::new(name.clone())?; let prog = unsafe { - rtrace_trace_program(self.ptr, CString::new(name.clone())?.as_ptr(), skv, skbv) + rtrace_trace_program(self.ptr, cname.as_ptr(), skv, skbv) }; if prog == std::ptr::null_mut() { -- Gitee From 291a76f6cf671b9fd380bc51f28e9a1c54f2eef2 Mon Sep 17 00:00:00 2001 From: "chengshuyi.csy" Date: Wed, 16 Feb 2022 17:36:56 +0800 Subject: [PATCH 09/11] rtrace-drop: support l4 packet drop check. Now support to check l4 packet drop. Signed-off-by: chengshuyi.csy --- .../net_diag/rtrace/rtrace-drop/Cargo.toml | 3 +- .../net_diag/rtrace/rtrace-drop/src/l4/tcp.rs | 11 +++--- .../net_diag/rtrace/rtrace-drop/src/main.rs | 35 ++++++++++++------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/Cargo.toml b/source/tools/detect/net_diag/rtrace/rtrace-drop/Cargo.toml index 51e3acc7..6b69639e 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-drop/Cargo.toml +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/Cargo.toml @@ -16,4 +16,5 @@ sysctl = "0.4.3" log = "0.4.14" uname = "0.1.1" cached = "0.23.0" -dyn-clone = "1.0.4" \ No newline at end of file +dyn-clone = "1.0.4" +env_logger = "0.9.0" \ No newline at end of file diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/tcp.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/tcp.rs index 0e9656f4..26b6f10e 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/tcp.rs +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l4/tcp.rs @@ -27,6 +27,9 @@ impl RtraceDrop for Tcp { } } +#[derive(Default, Clone)] +struct TcpConnRequest {} + impl RtraceDrop for TcpConnRequest { fn get_probe_string(&self) -> &str { r#" @@ -45,7 +48,7 @@ exprs = ["sk.sk_ack_backlog", "sk.sk_max_ack_backlog", "((struct inet_connection "[Support]" } - fn check_func(&mut self, func: &Func, vals: &Vec) -> RtraceDropAction { + fn check_func(&mut self, _func: &Func, vals: &Vec) -> RtraceDropAction { // ((struct inet_connection_sock *)sk).icsk_accept_queue.qlen.counter > sk.sk_max_ack_backlog : syn queue overflow if vals[2] > vals[1] { return RtraceDropAction::Consume(format!( @@ -71,7 +74,7 @@ impl RtraceDrop for Tcpv4SynRecvSock { [[function]] name = "tcp_v4_syn_recv_sock" params = ["basic"] -expr = ["sk.sk_ack_backlog", "sk.sk_max_ack_backlog"] +exprs = ["sk.sk_ack_backlog", "sk.sk_max_ack_backlog"] "# } @@ -95,8 +98,6 @@ expr = ["sk.sk_ack_backlog", "sk.sk_max_ack_backlog"] } } -#[derive(Default, Clone)] -struct TcpConnRequest {} #[derive(Default, Clone)] struct TcpAddBacklog { headroom: u64, @@ -116,7 +117,7 @@ impl RtraceDrop for TcpAddBacklog { [[function]] name = "tcp_add_backlog" params = ["basic"] -expr = ["sk.sk_backlog.len", "sk.sk_backlog.rmem_alloc", "sk.sk_rcvbuf", "sk.sk_sndbuf" ] +exprs = ["sk.sk_backlog.len", "sk.sk_backlog.rmem_alloc", "sk.sk_rcvbuf", "sk.sk_sndbuf"] "# } diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/main.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/main.rs index 56d44910..97c62f30 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/main.rs +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/main.rs @@ -22,6 +22,7 @@ use std::path::PathBuf; use std::time::Duration; use structopt::StructOpt; use uname::uname; +use rtrace_parser::ksyms::ksyms_load; #[derive(Debug, StructOpt)] #[structopt( @@ -70,6 +71,7 @@ impl RtraceDrop for AllDrop { } fn main() { + env_logger::init(); let cli = Cli::from_args(); let include_hs = build_hs(&cli.include, "all"); let exclude_hs = build_hs(&cli.exclude, "none"); @@ -91,13 +93,18 @@ fn main() { match &cli.config { Some(config) => rtrace = Rtrace::from_file(config).expect("failed to create Rtrace object"), - None => {println!("please specify config file");return;}, + None => { + println!("please specify config file"); + return; + } } + ksyms_load(&"/proc/kallsyms".to_owned()); + rtrace.probe_filter().expect("init filter failed"); let mut enabled_points = get_enabled_points(&ad, &include_hs, &exclude_hs).expect("failed to solve points"); let function_mapping = - probe_funcitons(&mut rtrace, &enabled_points).expect("Failed to insert probe function"); + probe_funcitons(&mut rtrace, &enabled_points).expect("Failed to insert probe functions"); let (rx, tx) = crossbeam_channel::unbounded(); perf_inital_thread2(rtrace.perf_fd(), (rx, tx)); @@ -110,9 +117,10 @@ fn main() { match function_mapping.get(&f.get_name_no_offset()) { None => {} Some(names) => { + let vals = get_exprs_vals(&rtrace, &f).expect("failed to parse expression values."); for name in names { if let Some(point) = enabled_points.get_mut(name) { - match point.0.check_func(&f, &Vec::new()) { + match point.0.check_func(&f, &vals) { RtraceDropAction::Continue => {} RtraceDropAction::Consume(x) => { println!("{}", x); @@ -161,7 +169,6 @@ fn gen_config(path: &str) -> Result<()> { let text = r#" [basic] debug = false -btf_path = "/boot/vmlinux-4.19.91-007.ali4000.alios7.x86_64" duration = 0 protocol = "tcp" recv = true @@ -198,8 +205,9 @@ fn probe_funcitons( } let mut functions = Vec::new(); - for (_, v) in function_hm { - debug!("probe drop point: {}", v.name); + for (_, mut v) in function_hm { + debug!("probe packet drop point: {}", v.name); + v.enable = Some(true); functions.push(v); } @@ -241,13 +249,14 @@ fn get_enabled_points( } // clone all enabled points. for (name, point) in enabled_points { - returned_points.insert( - name, - ( - point.clone(), - FunctionContainer::from_str(&point.get_probe_string())?, - ), - ); + let probe_str = point.get_probe_string(); + let fc; + if probe_str.len() == 0 { + fc = FunctionContainer::default(); + } else { + fc = FunctionContainer::from_str(&point.get_probe_string())?; + } + returned_points.insert(name, (point.clone(), fc)); } Ok(returned_points) } -- Gitee From d1349889c83e7d1621f0cf8364dcfc54f35a10c2 Mon Sep 17 00:00:00 2001 From: "chengshuyi.csy" Date: Thu, 17 Feb 2022 18:29:22 +0800 Subject: [PATCH 10/11] rtrace-drop: support to monitor system parameters. Use netlink to monitor the underlying hardware packet loss, and use proc to check whether the system configuration is normal Signed-off-by: chengshuyi.csy --- .../net_diag/rtrace/rtrace-drop/Cargo.toml | 4 +- .../net_diag/rtrace/rtrace-drop/README.md | 14 ++- .../net_diag/rtrace/rtrace-drop/src/base.rs | 9 ++ .../net_diag/rtrace/rtrace-drop/src/l1/irq.rs | 0 .../net_diag/rtrace/rtrace-drop/src/l1/mod.rs | 6 +- .../net_diag/rtrace/rtrace-drop/src/l3/fib.rs | 30 +++++ .../rtrace/rtrace-drop/src/l3/iptables.rs | 2 +- .../net_diag/rtrace/rtrace-drop/src/main.rs | 36 +++++- .../rtrace/rtrace-drop/src/monitor/mod.rs | 31 +++++ .../rtrace/rtrace-drop/src/monitor/netlink.rs | 114 ++++++++++++++++++ .../rtrace/rtrace-drop/src/monitor/proc.rs | 69 +++++++++++ 11 files changed, 303 insertions(+), 12 deletions(-) delete mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/irq.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/monitor/mod.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/monitor/netlink.rs create mode 100644 source/tools/detect/net_diag/rtrace/rtrace-drop/src/monitor/proc.rs diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/Cargo.toml b/source/tools/detect/net_diag/rtrace/rtrace-drop/Cargo.toml index 6b69639e..cda9d349 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-drop/Cargo.toml +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/Cargo.toml @@ -17,4 +17,6 @@ log = "0.4.14" uname = "0.1.1" cached = "0.23.0" dyn-clone = "1.0.4" -env_logger = "0.9.0" \ No newline at end of file +env_logger = "0.9.0" +netlink-packet-route = "0.10.0" +netlink-sys = "0.8.1" \ No newline at end of file diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/README.md b/source/tools/detect/net_diag/rtrace/rtrace-drop/README.md index af8c18bc..0eb1cde4 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-drop/README.md +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/README.md @@ -174,9 +174,19 @@ name = "fib_validate_source" params = ["basic", "kretprobe"] ``` -### 其它模块 +### monitor模块 -主要是系统环境参数配置方面,具体检测参数如下: +主要是监控系统自带的统计参数及系统环境参数。比如, 利用netlink监测由硬件的ring buffer溢出导致的丢包数。 + +#### netlink + +利用netlink检测丢包, 目前支持: + +* overrun: 表示由于网卡硬件缓冲区不足导致的丢包数; + +#### proc + +查看proc目录下是否存在可能导致丢包的配置, 目前支持: * tcp_tw_recycle: 回收TIME-WAIT状态的socket。在nat场景下, 一般建议关闭。注:4.12版本后该参数已经被移除 diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/base.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/base.rs index 11e64b2d..4340db8d 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/base.rs +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/base.rs @@ -40,6 +40,15 @@ pub trait RtraceDrop: DynClone { fn get_subpoints(&self) -> Option<&Vec>> { None } + + fn is_periodic(&self) -> bool { + false + } + + fn run_periodically(&mut self) -> RtraceDropAction { + RtraceDropAction::Continue + } + } clone_trait_object!(RtraceDrop); diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/irq.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/irq.rs deleted file mode 100644 index e69de29b..00000000 diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/mod.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/mod.rs index 73223630..2fa904c7 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/mod.rs +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l1/mod.rs @@ -1,5 +1,3 @@ - - use crate::base::{RtraceDrop, RtraceDropAction}; use anyhow::Result; @@ -9,7 +7,6 @@ pub struct L1 { } impl RtraceDrop for L1 { - fn init(&mut self) -> Result<()> { for point in &mut self.points { point.init()?; @@ -24,5 +21,4 @@ impl RtraceDrop for L1 { fn get_subpoints(&self) -> Option<&Vec>> { Some(&self.points) } - -} \ No newline at end of file +} diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/fib.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/fib.rs index b55afcae..ef2dc6e5 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/fib.rs +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/fib.rs @@ -11,6 +11,10 @@ pub struct Fib { impl RtraceDrop for Fib { fn init(&mut self) -> Result<()> { + self.points.push(Box::new(FibValidateSource::default())); + for point in &mut self.points { + point.init()?; + } Ok(()) } @@ -33,4 +37,30 @@ impl RtraceDrop for FibValidateSource { fn get_name(&self) -> &str { "fib_validate_source" } + + fn get_status(&self) -> &str { + "[Support: rp_filter]" + } + + fn check_func(&mut self, func: &Func, vals: &Vec) -> RtraceDropAction { + if func.is_kretprobe() { + let bi = func + .get_struct(INFO_TYPE::BASIC_INFO) + .expect("failed to find basic info") + as *const BASIC_INFO_struct; + let ret = unsafe { (*bi).ret } as i64; + if ret < 0 { + match ret { + -18 => return RtraceDropAction::Consume(format!("rp_filter drop packet")), + _ => { + return RtraceDropAction::Consume(format!( + "Unable to parse {}, but the packet is lost here", + ret + )) + } + } + } + } + RtraceDropAction::Continue + } } diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/iptables.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/iptables.rs index 7b36569a..fe90c98a 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/iptables.rs +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/l3/iptables.rs @@ -50,7 +50,7 @@ exprs = ["state.net.ipv4.iptable_filter", "state.net.ipv4.iptable_mangle", "stat } fn get_status(&self) -> &str { - "[Support]" + "[Not Support]" } fn check_func(&mut self, func: &Func, vals: &Vec) -> RtraceDropAction { diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/main.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/main.rs index 97c62f30..5c7e939d 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/main.rs +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/main.rs @@ -3,17 +3,21 @@ mod l1; mod l2; mod l3; mod l4; +mod monitor; use crate::base::{RtraceDrop, RtraceDropAction}; use crate::l1::L1; use crate::l2::L2; use crate::l3::L3; use crate::l4::L4; +use crate::monitor::Mointor; use anyhow::anyhow; use anyhow::Result; use log::*; use rtrace_parser::func::Func; +use rtrace_parser::ksyms::ksyms_load; use rtrace_parser::perf::{perf_inital_thread2, perf_recv_timeout}; +use rtrace_parser::utils; use rtrace_rs::rtrace::{Config, FunctionContainer, Rtrace}; use std::boxed::Box; use std::collections::{HashMap, HashSet}; @@ -22,7 +26,6 @@ use std::path::PathBuf; use std::time::Duration; use structopt::StructOpt; use uname::uname; -use rtrace_parser::ksyms::ksyms_load; #[derive(Debug, StructOpt)] #[structopt( @@ -40,6 +43,13 @@ pub struct Cli { exclude: Option>, #[structopt(long, short, help = "show all packet loss points")] list: Option>, + #[structopt( + long, + short, + default_value = "1", + help = "monitor program running cycle, defaule 1 second" + )] + period: u64, } /// main entry @@ -55,6 +65,7 @@ impl RtraceDrop for AllDrop { self.points.push(Box::new(L2::default())); self.points.push(Box::new(L3::default())); self.points.push(Box::new(L4::default())); + self.points.push(Box::new(Mointor::default())); for point in &mut self.points { point.init()?; } @@ -72,7 +83,7 @@ impl RtraceDrop for AllDrop { fn main() { env_logger::init(); - let cli = Cli::from_args(); + let mut cli = Cli::from_args(); let include_hs = build_hs(&cli.include, "all"); let exclude_hs = build_hs(&cli.exclude, "none"); let list_hs = build_hs(&cli.list, "all"); @@ -109,6 +120,8 @@ fn main() { let (rx, tx) = crossbeam_channel::unbounded(); perf_inital_thread2(rtrace.perf_fd(), (rx, tx)); + let mut pre_checktimeout_ts = 0; + cli.period = cli.period * 1_000_000_000; loop { let res = perf_recv_timeout(Duration::from_millis(100)); match res { @@ -117,7 +130,8 @@ fn main() { match function_mapping.get(&f.get_name_no_offset()) { None => {} Some(names) => { - let vals = get_exprs_vals(&rtrace, &f).expect("failed to parse expression values."); + let vals = get_exprs_vals(&rtrace, &f) + .expect("failed to parse expression values."); for name in names { if let Some(point) = enabled_points.get_mut(name) { match point.0.check_func(&f, &vals) { @@ -134,6 +148,22 @@ fn main() { } _ => {} } + + let cur_ts = utils::get_timestamp(); + if cur_ts - pre_checktimeout_ts > cli.period { + for (_, v) in enabled_points.iter_mut() { + if v.0.is_periodic() { + match v.0.run_periodically() { + RtraceDropAction::Continue => {} + RtraceDropAction::Consume(x) => { + println!("{}", x); + return; + } + } + } + } + pre_checktimeout_ts = cur_ts; + } } } diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/monitor/mod.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/monitor/mod.rs new file mode 100644 index 00000000..c58cc11d --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/monitor/mod.rs @@ -0,0 +1,31 @@ +mod netlink; +mod proc; + +use crate::base::{RtraceDrop, RtraceDropAction}; +use crate::monitor::netlink::Netlink; +use crate::monitor::proc::Proc; +use anyhow::Result; + +#[derive(Default, Clone)] +pub struct Mointor { + points: Vec>, +} + +impl RtraceDrop for Mointor { + fn init(&mut self) -> Result<()> { + self.points.push(Box::new(Netlink::default())); + self.points.push(Box::new(Proc::default())); + for point in &mut self.points { + point.init()?; + } + Ok(()) + } + + fn get_name(&self) -> &str { + "mointor" + } + + fn get_subpoints(&self) -> Option<&Vec>> { + Some(&self.points) + } +} diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/monitor/netlink.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/monitor/netlink.rs new file mode 100644 index 00000000..486607c9 --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/monitor/netlink.rs @@ -0,0 +1,114 @@ +use crate::base::{RtraceDrop, RtraceDropAction}; +use anyhow::Result; +use netlink_packet_route::traits::Parseable; +use netlink_packet_route::{ + nlas::link::Nla, nlas::link::Stats64, nlas::link::Stats64Buffer, nlas::NlaBuffer, LinkMessage, + NetlinkHeader, NetlinkMessage, NetlinkPayload, RtnlMessage, NLM_F_DUMP, NLM_F_REQUEST, +}; +use netlink_sys::{protocols::NETLINK_ROUTE, Socket, SocketAddr}; +use std::collections::HashMap; + +#[derive(Default, Clone)] +struct Index { + overrun: u64, +} + +#[derive(Default, Clone)] +pub struct Netlink { + index_hm: HashMap, +} + +impl RtraceDrop for Netlink { + fn get_name(&self) -> &str { + "netlink" + } + + fn get_status(&self) -> &str { + "[Support: overrun]" + } + + fn is_periodic(&self) -> bool { + true + } + + fn run_periodically(&mut self) -> RtraceDropAction { + let mut socket = Socket::new(NETLINK_ROUTE).unwrap(); + let _port_number = socket.bind_auto().unwrap().port_number(); + socket.connect(&SocketAddr::new(0, 0)).unwrap(); + let mut packet = NetlinkMessage { + header: NetlinkHeader::default(), + payload: NetlinkPayload::from(RtnlMessage::GetLink(LinkMessage::default())), + }; + packet.header.flags = NLM_F_DUMP | NLM_F_REQUEST; + packet.header.sequence_number = 1; + packet.finalize(); + let mut buf = vec![0; packet.header.length as usize]; + packet.serialize(&mut buf[..]); + socket.send(&buf[..], 0).unwrap(); + let mut receive_buffer = vec![0; 4096]; + let mut offset = 0; + let mut stat = None; + let mut name = None; + + loop { + let size = socket.recv(&mut &mut receive_buffer[..], 0).unwrap(); + loop { + let bytes = &receive_buffer[offset..]; + let rx_packet: NetlinkMessage = + NetlinkMessage::deserialize(bytes).unwrap(); + match rx_packet.payload { + NetlinkPayload::Done => return RtraceDropAction::Continue, + NetlinkPayload::InnerMessage(RtnlMessage::NewLink(link)) => { + for item in link.nlas.iter() { + match item { + Nla::Stats64(buff) => { + stat = Some( + Stats64::parse(&Stats64Buffer::new(buff)) + .expect("failed to parse") + .clone(), + ); + } + Nla::IfName(n) => { + name = Some(n.clone()); + } + _ => {} + } + } + } + _ => {} + } + + if let Some(x) = &name { + if let Some(y) = stat { + let mut idx = self.index_hm.entry(x.clone()).or_insert(Index::default()); + let pre_overrun = idx.overrun; + idx.overrun = y.rx_over_errors; + if idx.overrun > pre_overrun { + return RtraceDropAction::Consume(format!( + "NIC: {} ring buffer overflow, {} > {}", + x, idx.overrun, pre_overrun + )); + } + } + } + + offset += rx_packet.header.length as usize; + if offset == size || rx_packet.header.length == 0 { + offset = 0; + break; + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_iproute() { + let mut ip = Iproute::default(); + ip.run_periodically(); + } +} diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/src/monitor/proc.rs b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/monitor/proc.rs new file mode 100644 index 00000000..882ce933 --- /dev/null +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/src/monitor/proc.rs @@ -0,0 +1,69 @@ +use crate::base::{RtraceDrop, RtraceDropAction}; +use anyhow::Result; + +#[derive(Default, Clone)] +pub struct Proc { + points: Vec>, +} + +impl RtraceDrop for Proc { + fn init(&mut self) -> Result<()> { + self.points.push(Box::new(TcpTwRecycle::default())); + for point in &mut self.points { + point.init()?; + } + Ok(()) + } + + fn get_name(&self) -> &str { + "proc" + } + fn get_subpoints(&self) -> Option<&Vec>> { + Some(&self.points) + } +} + +#[derive(Default, Clone)] +struct TcpTwRecycle { + run: bool, +} + +impl RtraceDrop for TcpTwRecycle { + fn get_name(&self) -> &str { + "tcp_tw_recycle" + } + + fn is_periodic(&self) -> bool { + true + } + + fn run_periodically(&mut self) -> RtraceDropAction { + if self.run { + return RtraceDropAction::Continue; + } + self.run = true; + let path = std::path::Path::new("/proc/sys/net/ipv4/tcp_tw_recycle"); + if path.exists() { + match std::fs::read_to_string(path) { + Ok(mut x) => { + x.truncate(x.len() - 1); + let recycle = x.parse::().expect("failed to parse string to number"); + if recycle != 0 { + return RtraceDropAction::Consume(format!( + "tcp_tw_recycle not closed: {}", + recycle + )); + } + } + Err(y) => { + println!("failed to check tcp_tw_recycle: {:?}", y); + } + } + } + RtraceDropAction::Continue + } + + fn get_status(&self) -> &str { + "[Support]" + } +} -- Gitee From ce861547e881b9fd750847d17a0eea05e0573011 Mon Sep 17 00:00:00 2001 From: "chengshuyi.csy" Date: Thu, 17 Feb 2022 18:39:03 +0800 Subject: [PATCH 11/11] rtrace-drop: add monitor type doc. add doc to describe monitor type. Signed-off-by: chengshuyi.csy --- .../net_diag/rtrace/rtrace-drop/README.md | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/source/tools/detect/net_diag/rtrace/rtrace-drop/README.md b/source/tools/detect/net_diag/rtrace/rtrace-drop/README.md index 0eb1cde4..f88176f2 100644 --- a/source/tools/detect/net_diag/rtrace/rtrace-drop/README.md +++ b/source/tools/detect/net_diag/rtrace/rtrace-drop/README.md @@ -24,6 +24,7 @@ OPTIONS: --gen generate default configuration file -i, --include ... Included drop points -l, --list ... show all packet loss points + -p, --period monitor program running cycle, defaule 1 second [default: 1] ``` ### 参数说明 @@ -33,12 +34,50 @@ OPTIONS: * `-l, --list`: 查看当前支持诊断的丢包点 * `--config`: 配置文件 * `--gen`: 生成默认的配置文件 +* `-p --period`: 监控程序的运行周期,默认1秒运行一次 注: `-e`的优先级高于`-i` ### 使用样例 +#### 查看当前支持的丢包检测点 +`sysak rtrace-drop -l`输出如下: + +``` +all + l1 + l2 + l3 + iptables + ipt_do_table [Not Support] + conntrack + ipv4_conntrack_in [Not Support] + ipv4_conntrack_local [Not Support] + ipv4_helper [Not Support] + ipv4_confirm [Not Support] + fib + fib_validate_source [Support: rp_filter] + l4 + tcp + tcp_conn_request [Support] + tcp_v4_syn_recv_sock [Support] + tcp_add_backlog [Support] + __skb_checksum_complete [Support] + udp + mointor + netlink [Support: overrun] + proc + tcp_tw_recycle [Support] +``` + +#### 监控系统丢包 + +运行命令`sysak rtrace-drop --config `, 即可诊断当前系统是否存在丢包及丢包原因。 + +#### 监控l4层丢包 + +运行命令`sysak rtrace-drop -i l4 --config ` ## 覆盖场景及检测原理 @@ -48,7 +87,7 @@ rtrace-drop将丢包点按照网络协议栈分成四个层次,分别是: * l2层, 即数据链路层, 实现中断等丢包点的监控 * l1层, 即物理层, 实现硬件丢包点的监控 -除了分成四个层次外,每个层次还可嵌套子模块。如l4层,可新增udp或tcp丢包点监控模块。 +需要注意的是每个层次还可嵌套子模块。如l4层,可新增udp或tcp丢包点监控模块。除了分成四个层次外,还包含一个特殊的模块,即monitor。monitor主要用来检查系统环境的配置是否正确及系统丢包统计参数。 ### l4层 -- Gitee