From 3a4d9eba7c99f8f15e66c9a47171ace2077978ef Mon Sep 17 00:00:00 2001 From: sixu_chen Date: Mon, 30 Sep 2024 10:48:14 +0800 Subject: [PATCH 1/3] net/smc: Introduce tracepoint for smc send start This introduces a tracepoint for the start of the sending process of a smc communication in the kernel network protocol stack, which can help us diagnosis issues of date path. Signed-off-by: sixu_chen --- net/smc/af_smc.c | 1 + net/smc/smc_tracepoint.c | 1 + net/smc/smc_tracepoint.h | 7 +++++++ 3 files changed, 9 insertions(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 881cd5504868..dfd3857a3901 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -4165,6 +4165,7 @@ int smc_inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) int rc; smc = smc_sk(sk); + trace_smc_send_start(smc, len); /* Send before connected, might be fastopen or user's incorrect usage, but * whatever, in either case, we do not need to replace it with SMC any more. diff --git a/net/smc/smc_tracepoint.c b/net/smc/smc_tracepoint.c index 8d47ced5a492..96d96818611a 100644 --- a/net/smc/smc_tracepoint.c +++ b/net/smc/smc_tracepoint.c @@ -6,4 +6,5 @@ EXPORT_TRACEPOINT_SYMBOL(smc_switch_to_fallback); EXPORT_TRACEPOINT_SYMBOL(smc_tx_sendmsg); EXPORT_TRACEPOINT_SYMBOL(smc_rx_recvmsg); +EXPORT_TRACEPOINT_SYMBOL(smc_send_start); EXPORT_TRACEPOINT_SYMBOL(smcr_link_down); diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h index ec17f29646f5..6abb4dcb9ac9 100644 --- a/net/smc/smc_tracepoint.h +++ b/net/smc/smc_tracepoint.h @@ -75,6 +75,13 @@ DEFINE_EVENT(smc_msg_event, smc_rx_recvmsg, TP_ARGS(smc, len) ); +DEFINE_EVENT(smc_msg_event, smc_send_start, + + TP_PROTO(const struct smc_sock *smc, size_t len), + + TP_ARGS(smc, len) +); + TRACE_EVENT(smcr_link_down, TP_PROTO(const struct smc_link *lnk, void *location), -- Gitee From 8a31d4d0493bddb9e79342f95b1f00698a7a9008 Mon Sep 17 00:00:00 2001 From: sixu_chen Date: Mon, 30 Sep 2024 11:05:43 +0800 Subject: [PATCH 2/3] net/smc: Introduce tracepoint for smc send complete This introduces a tracepoint in a CDC message,since it marks the sending process of an RDMA WRITE has been completed. Signed-off-by: sixu_chen --- net/smc/smc_cdc.c | 5 +++++ net/smc/smc_tracepoint.c | 1 + net/smc/smc_tracepoint.h | 7 +++++++ 3 files changed, 13 insertions(+) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index bcdf3c75b76f..732f099e2f92 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -19,6 +19,7 @@ #include "smc_rx.h" #include "smc_close.h" #include "smc_ism.h" +#include "smc_tracepoint.h" /********************************** send *************************************/ @@ -291,6 +292,10 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) } else { rc = smcr_cdc_get_slot_and_msg_send(conn); } + struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + size_t len = conn->sndbuf_desc->len - atomic_read(&conn->sndbuf_space); + + trace_smc_send_complete(smc, len); return rc; } diff --git a/net/smc/smc_tracepoint.c b/net/smc/smc_tracepoint.c index 96d96818611a..da65b1e4dd9d 100644 --- a/net/smc/smc_tracepoint.c +++ b/net/smc/smc_tracepoint.c @@ -7,4 +7,5 @@ EXPORT_TRACEPOINT_SYMBOL(smc_switch_to_fallback); EXPORT_TRACEPOINT_SYMBOL(smc_tx_sendmsg); EXPORT_TRACEPOINT_SYMBOL(smc_rx_recvmsg); EXPORT_TRACEPOINT_SYMBOL(smc_send_start); +EXPORT_TRACEPOINT_SYMBOL(smc_send_complete); EXPORT_TRACEPOINT_SYMBOL(smcr_link_down); diff --git a/net/smc/smc_tracepoint.h b/net/smc/smc_tracepoint.h index 6abb4dcb9ac9..abddf95466d3 100644 --- a/net/smc/smc_tracepoint.h +++ b/net/smc/smc_tracepoint.h @@ -82,6 +82,13 @@ DEFINE_EVENT(smc_msg_event, smc_send_start, TP_ARGS(smc, len) ); +DEFINE_EVENT(smc_msg_event, smc_send_complete, + + TP_PROTO(const struct smc_sock *smc, size_t len), + + TP_ARGS(smc, len) +); + TRACE_EVENT(smcr_link_down, TP_PROTO(const struct smc_link *lnk, void *location), -- Gitee From e0ee97e8274ce7ffd686f1b9f7e762ba71233efa Mon Sep 17 00:00:00 2001 From: sixu_chen Date: Mon, 30 Sep 2024 12:06:48 +0800 Subject: [PATCH 3/3] net/smc: analyse the time latency of smc protocol Track key events in the SMC network protocol, capture packets' message, and log the timestamp of each event.Provide data in json format,make it easier for follow up analyses. Signed-off-by: sixu_chen --- tools/bpf/smctrace/smc_trace.c | 178 +++++++++++++++++++++++++++++++++ tools/bpf/smctrace/smctrace.py | 62 ++++++++++++ 2 files changed, 240 insertions(+) create mode 100644 tools/bpf/smctrace/smc_trace.c create mode 100644 tools/bpf/smctrace/smctrace.py diff --git a/tools/bpf/smctrace/smc_trace.c b/tools/bpf/smctrace/smc_trace.c new file mode 100644 index 000000000000..96e84680f770 --- /dev/null +++ b/tools/bpf/smctrace/smc_trace.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include + +struct hook_time_pair { + char hook[32]; + uint64_t timestamp; +}; + +struct packet_trace { + struct hook_time_pair hooks_time[4]; +}; + +struct packet_id { + uint32_t portpair; + uint64_t addrpair; + uint16_t protocol; +} + +struct packet { + struct packet_id p_id; + struct packet_trace p_trace; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1024); + __type(key, struct packet_id); + __type(value, struct packet_trace); +} packet_map SEC(".maps"); + +static __always_inline void get_hook_msg(struct packet *p, + const char *hook_name, + uint32_t portpair, uint64_t addrpair, + uint16_t protocol, int hook_index) { + + p->p_trace.hooks_time[hook_index].timestamp = bpf_ktime_get_ns(); + + bpf_probe_read_str(p->p_trace.hooks_time[hook_index].hook, + sizeof(p->p_trace.hooks_time[hook_index].hook), + hook_name); + + p->p_id.portpair = portpair; + p->p_id.addrpair = addrpair; + p->p_id.protocol = protocol; +} + +TRACEPOINT_PROBE(smc_msg_event, smc_send_start) { + struct packet p; + uint32_t portpair; + uint64_t addrpair; + uint16_t protocol; + + uint32_t *portpair_ptr = &args->smc->sk->sk_portpair; + uint64_t *addrpair_ptr = &args->smc->sk->sk_addrpair; + uint16_t *protocol_ptr = &args->smc->sk->protocol; + + bpf_probe_read(&portpair, sizeof(portpair), portpair_ptr); + bpf_probe_read(&addrpair, sizeof(addrpair), addrpair_ptr); + bpf_probe_read(&protocol, sizeof(protocol), protocol_ptr); + + get_hook_msg(&p, "smc_send_start", + portpair, addrpair, protocol, 0); + + struct packet_trace *existing_trace; + + existing_trace = bpf_map_lookup_elem(&packet_map, &p.p_id); + + if (!existing_trace) { + bpf_map_update_elem(&packet_map, &p.p_id, &p.p_trace, BPF_ANY); + } else { + struct packet_trace updated_trace = *existing_trace; + + updated_trace.hooks_time[0] = p.p_trace.hooks_time[0]; + bpf_map_update_elem(&packet_map, &p.p_id, &updated_trace, BPF_ANY); + } + return 0; +} + +TRACEPOINT_PROBE(smc_msg_event, smc_tx_sendmsg) { + struct packet p; + uint32_t portpair; + uint64_t addrpair; + uint16_t protocol; + + uint32_t *portpair_ptr = &args->smc->sk->sk_portpair; + uint64_t *addrpair_ptr = &args->smc->sk->sk_addrpair; + uint16_t *protocol_ptr = &args->smc->sk->protocol; + + bpf_probe_read(&portpair, sizeof(portpair), portpair_ptr); + bpf_probe_read(&addrpair, sizeof(addrpair), addrpair_ptr); + bpf_probe_read(&protocol, sizeof(protocol), protocol_ptr); + + get_hook_msg(&p, "smc_tx_sendmsg", + portpair, addrpair, protocol, 1); + + struct packet_trace *existing_trace; + + existing_trace = bpf_map_lookup_elem(&packet_map, &p.p_id); + + if (!existing_trace) { + bpf_map_update_elem(&packet_map, &p.p_id, &p.p_trace, BPF_ANY); + } else { + struct packet_trace updated_trace = *existing_trace; + + updated_trace.hooks_time[1] = p.p_trace.hooks_time[1]; + bpf_map_update_elem(&packet_map, &p.p_id, &updated_trace, BPF_ANY); + } + return 0; +} + +TRACEPOINT_PROBE(drivers, RDMA_sendmsg) +{ + struct packet p; + uint32_t portpair; + uint64_t addrpair; + uint16_t protocol; + + uint32_t *portpair_ptr = &args->smc->sk->sk_portpair; + uint64_t *addrpair_ptr = &args->smc->sk->sk_addrpair; + uint16_t *protocol_ptr = &args->smc->sk->protocol; + + bpf_probe_read(&portpair, sizeof(portpair), portpair_ptr); + bpf_probe_read(&addrpair, sizeof(addrpair), addrpair_ptr); + bpf_probe_read(&protocol, sizeof(protocol), protocol_ptr); + + get_hook_msg(&p, "RDMA_sendmsg", + portpair, addrpair, protocol, 2); + + struct packet_trace *existing_trace; + + existing_trace = bpf_map_lookup_elem(&packet_map, &p.p_id); + + if (!existing_trace) { + bpf_map_update_elem(&packet_map, &p.p_id, &p.p_trace, BPF_ANY); + } else { + struct packet_trace updated_trace = *existing_trace; + + updated_trace.hooks_time[2] = p.p_trace.hooks_time[2]; + bpf_map_update_elem(&packet_map, &p.p_id, &updated_trace, BPF_ANY); + } + return 0; +} + +TRACEPOINT_PROBE(smc_msg_event, smc_send_complete) +{ + struct packet p; + uint32_t portpair; + uint64_t addrpair; + uint16_t protocol; + + uint32_t *portpair_ptr = &args->smc->sk->sk_portpair; + uint64_t *addrpair_ptr = &args->smc->sk->sk_addrpair; + uint16_t *protocol_ptr = &args->smc->sk->protocol; + + bpf_probe_read(&portpair, sizeof(portpair), portpair_ptr); + bpf_probe_read(&addrpair, sizeof(addrpair), addrpair_ptr); + bpf_probe_read(&protocol, sizeof(protocol), protocol_ptr); + + get_hook_msg(&p, "smc_send_complete", + portpair, addrpair, protocol, 3); + + struct packet_trace *existing_trace; + + existing_trace = bpf_map_lookup_elem(&packet_map, &p.p_id); + + if (!existing_trace) { + bpf_map_update_elem(&packet_map, &p.p_id, &p.p_trace, BPF_ANY); + } else { + struct packet_trace updated_trace = *existing_trace; + + updated_trace.hooks_time[3] = p.p_trace.hooks_time[3]; + bpf_map_update_elem(&packet_map, &p.p_id, &updated_trace, BPF_ANY); + } + return 0; +} diff --git a/tools/bpf/smctrace/smctrace.py b/tools/bpf/smctrace/smctrace.py new file mode 100644 index 000000000000..7f0780d66046 --- /dev/null +++ b/tools/bpf/smctrace/smctrace.py @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: GPL-2.0-only + +import json +import time +from bcc import BPF + +bpf_program_file = "smc_trace.c" +b = BPF(src_file=bpf_program_file) + +def generate_packet_data(b): + packet_data = [] + try: + packet_map = b.get_table("packet_map") + for key, value in packet_map.items(): + packet_info = { + "packet_id": { + "portpair": key.portpair, + "addrpair": key.addrpair, + "protocol": key.protocol + }, + "packet_trace": { + "hooks_time": [ + {"hook": value.hooks_time[0].hook.decode('utf-8', 'replace').rstrip('\x00'), "timestamp": value.hooks_time[0].timestamp}, + {"hook": value.hooks_time[1].hook.decode('utf-8', 'replace').rstrip('\x00'), "timestamp": value.hooks_time[1].timestamp}, + {"hook": value.hooks_time[2].hook.decode('utf-8', 'replace').rstrip('\x00'), "timestamp": value.hooks_time[2].timestamp}, + {"hook": value.hooks_time[3].hook.decode('utf-8', 'replace').rstrip('\x00'), "timestamp": value.hooks_time[3].timestamp} + ] + } + } + packet_data.append(packet_info) + except Exception as e: + print(f"Error generating packet data: {e}") + return packet_data + +def save_to_json(filename, data): + try: + with open(filename, 'a') as f: + json.dump(data, f, indent=4) + f.write('\n') + except Exception as e: + print(f"Error saving data to file: {e}") + +if __name__ == "__main__": + json_file = f'latency_data_{time.strftime("%Y-%m-%d_%H-%M-%S")}.json' + + cache = [] + + try: + print("smctrace starts running...") + while True: + data = generate_packet_data(b) + cache.extend(data) + if len(cache) >= 100: + save_to_json(json_file, cache) + cache.clear() + time.sleep(10) + + except KeyboardInterrupt: + print("terminated by user, loading final data...") + if cache: + save_to_json(json_file, cache) + print(f"all data is saved to: {json_file}") \ No newline at end of file -- Gitee