diff --git a/source/tools/detect/pmu/cmtcat/cmtcat.c b/source/tools/detect/pmu/cmtcat/cmtcat.c index 738fa50d161db11ab975b975ef2e6d14c5b7feac..b367981eea35672ea04efe7c1545e694e786e3c9 100644 --- a/source/tools/detect/pmu/cmtcat/cmtcat.c +++ b/source/tools/detect/pmu/cmtcat/cmtcat.c @@ -3,6 +3,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -21,9 +24,19 @@ int maxRMID; long nr_cpu; __u64 l3_factor; +struct env { + bool verbose; + bool local, total, cache, percpu; +} env = { + .verbose = false, + .local = false, + .cache = false, + .total = true, + .percpu = false, +}; #define FAIL (-1) -void *test(void *cookie) +void *threadFunc(void *cookie) { char array[1024*1024]; __u64 old, new; @@ -31,15 +44,55 @@ void *test(void *cookie) //printf("%d\n", rmid); set_msr_assoc(msr, msr->rmid); sleep(1); - old = read_mb_local(msr)*l3_factor; + old = extract_val(read_mb_local(msr))*l3_factor; memset(array, 'a', sizeof(array)); - new = read_mb_local(msr)*l3_factor; - printf("rmid%u, old=%llu, new=%llu, delta=%llu\n", + new = extract_val(read_mb_local(msr))*l3_factor; + printf("rmid%-4u, old=%-12llu, new=%-12llu, delta=%-12llu\n", msr->rmid, old, new, new-old); } -typedef void *(*start_routine) (void *); +int collect(struct env *env, msr_t *msr) +{ + int i; + char array[1024*1024]; + __u64 oldc, oldl, oldt, newc, newl, newt; + + oldc = oldl = oldt = newc = newl = newt = 0; + printf("msr->rmid=%d\n", msr->rmid); + sleep(1); + for (i = 0; i < nr_cpu; i++) { + set_msr_assoc(&msr[i], msr[i].rmid); + if (env->cache) + oldc += extract_val(read_l3_cache(&msr[i]))*l3_factor; + if (env->local) + oldl += extract_val(read_mb_local(&msr[i]))*l3_factor; + if (env->total) + oldt += extract_val(read_mb_total(&msr[i]))*l3_factor; + } + memset(array, 'a', sizeof(array)); + sleep(1); + + for (i = 0; i < nr_cpu; i++) { + if (env->cache) { + newc += extract_val(read_l3_cache(&msr[i]))*l3_factor; + } + if (env->local) { + newl += extract_val(read_mb_local(&msr[i]))*l3_factor; + } + if (env->total) { + newt += extract_val(read_mb_total(&msr[i]))*l3_factor; + } + } + printf("cacheOc, old=%-12llu, new=%-12llu, delta=%-12llu\n", + oldc, newc, newc-oldc); + printf("localMBM, old=%-12llu, new=%-12llu, delta=%-12llu\n", + oldl, newl, newl-oldl); + printf("totalMBM, old=%-12llu, new=%-12llu, delta=%-12llu\n", + oldt, newt, newt-oldt); +} + +typedef void *(*start_routine) (void *); int percpu_threads(start_routine thread_fun, void *cookie) { int i; @@ -68,12 +121,79 @@ int percpu_threads(start_routine thread_fun, void *cookie) pthread_join(pth[i], NULL); } -int main() +const char *argp_program_version = "cmtcat 0.1"; +const char argp_program_doc[] = +"Catch the MBM and the CAT.\n" +"\n" +"USAGE: cmtcat [--help] [-t THRESH(ms)] [-S SHM] [-f LOGFILE] [duration(s)]\n" +"\n" +"EXAMPLES:\n" +" cmtcat # run forever, and detect cmtcat more than 10ms(default)\n" +" cmtcat -l # cat memory bandwidth of local\n" +" cmtcat -t # detect cmtcat with threshold 15ms (default 10ms)\n" +" cmtcat -c # record result to a.log (default to ~sysak/cmtcat/cmtcat.log)\n"; + +static const struct argp_option opts[] = { + { "local", 'l', "NULL", 0, "Local memory bandwidth"}, + { "total", 't', "NULL", 0, "Total memory bandwidth"}, + { "cache", 'c', "NULL", 0, "L3 cache Ocu" }, + { "cache", 'p', "NULL", 0, "percpu data" }, + { "verbose", 'v', NULL, 0, "Verbose debug output" }, + { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + char *p; + int ret = errno; + static int pos_args; + + switch (key) { + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + case 'l': + env.local = true; + break; + case 't': + env.total = true; + break; + case 'c': + env.cache = true; + break; + case 'v': + env.verbose = true; + break; + case 'p': + env.percpu = true; + break; + case ARGP_KEY_ARG: + if (pos_args++) { + fprintf(stderr, + "unrecognized positional argument: %s\n", arg); + argp_usage(state); + } + errno = 0; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +int main(int argc, char *argv[]) { - int i, ret, maxRMID; long nr_fd; msr_t *msrs; int *rmid; + int i, ret, maxRMID; + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; if (!get_cpuid_maxleaf()) return FAIL; @@ -87,16 +207,28 @@ int main() if((ret=get_cpus(&nr_cpu)) || nr_cpu < 0) return -ret; + ret = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (ret) { + fprintf(stderr, "argp_parse fail\n"); + return ret; + } + if (maxRMID < nr_cpu) nr_cpu = maxRMID; + + l3_factor = cpuid_L3_factor(); nr_fd = init_msr(&msrs); if (!msrs || nr_fd < 0) return FAIL; - - l3_factor = cpuid_L3_factor(); - for (i = 0; i < nr_cpu; i++) - msrs[i].rmid = maxRMID-i; - percpu_threads(test, msrs); + if (env.percpu) { + for (i = 0; i < nr_cpu; i++) + msrs[i].rmid = maxRMID-i; + percpu_threads(threadFunc, msrs); + } else { + for (i = 0; i < nr_cpu; i++) + msrs[i].rmid = 8; + collect(&env, msrs); + } over: deinit_msr(msrs, nr_fd); } diff --git a/source/tools/detect/pmu/cmtcat/msr.h b/source/tools/detect/pmu/cmtcat/msr.h index 7b37b537e9c8f269621dcc216ff99e9450bcdac2..af09f8d2335b77cedddd94a697a84ae0b45b846c 100644 --- a/source/tools/detect/pmu/cmtcat/msr.h +++ b/source/tools/detect/pmu/cmtcat/msr.h @@ -95,6 +95,11 @@ __u64 get_msr_count(msr_t* m, __u64 event) return val; } +__u64 read_l3_cache(msr_t *m) +{ + return get_msr_count(m, 1); +} + __u64 read_mb_total(msr_t *m) { return get_msr_count(m, 2); diff --git a/source/tools/detect/pmu/cmtcat/public.h b/source/tools/detect/pmu/cmtcat/public.h index 34608892cd6c23542102d90888d580991f90a02a..6fe49099967379fa631dd53b4bb25b5ce7066c85 100644 --- a/source/tools/detect/pmu/cmtcat/public.h +++ b/source/tools/detect/pmu/cmtcat/public.h @@ -5,3 +5,42 @@ int get_cpus(long *nr_cpus) *nr_cpus = sysconf(_SC_NPROCESSORS_CONF); return errno; } + +static __u64 build_bit(__u32 beg, __u32 end) +{ + __u64 myll = 0; + if (end == 63) { + myll = -1; + } else { + myll = (1LL << (end + 1)) - 1; + } + myll = myll >> beg; + return myll; +} + +static __u64 extract_bits(__u64 myin, __u32 beg, __u32 end) +{ + __u64 myll = 0; + __u32 beg1, end1; + + // Let the user reverse the order of beg & end. + if (beg <= end) { + beg1 = beg; + end1 = end; + } else { + beg1 = end; + end1 = beg; + } + myll = myin >> beg1; + myll = myll & build_bit(beg1, end1); + //printf("input=0x%x output=0x%x\n", myin, myll); + return myll; +} + +__u64 extract_val(__u64 val) +{ + if (val & (3ULL << 62)) + return LLONG_MAX; + + return extract_bits(val, 0, 61); +} diff --git a/source/tools/monitor/unity/collector/plugin/mbm/Makefile b/source/tools/monitor/unity/collector/plugin/mbm/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d113c07deefc6cc0b6f5087e8f0e36f410ccd3ec --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/mbm/Makefile @@ -0,0 +1,19 @@ +CC := gcc +CFLAG := -g -fpic +LDFLAG := -g -fpic -shared +OBJS := mbm.o +SO := libmbm.so + +all: $(SO) install + +%.o: %.c + $(CC) -c $< -o $@ $(CFLAG) + +$(SO): $(OBJS) + $(CC) -o $@ $(OBJS) $(LDFLAG) + +install: $(SO) + cp $(SO) ../../native/ + +clean: + rm -f $(SO) $(OBJS) diff --git a/source/tools/monitor/unity/collector/plugin/mbm/cpuid.h b/source/tools/monitor/unity/collector/plugin/mbm/cpuid.h new file mode 100644 index 0000000000000000000000000000000000000000..f8f3c647325434685c67a982670950f3dceb09ce --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/mbm/cpuid.h @@ -0,0 +1,102 @@ +typedef union cpuid_info { + int array[4]; + struct { + unsigned int eax, ebx, ecx, edx; + } reg; + struct { + unsigned int reserve, factor, rmid, eventid; + } info; +} cpuid_info; + +int get_cpuid_maxleaf(void) +{ + int maxleaf; + __asm__("mov $0x0, %eax\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%eax, %0\n\t":"=r" (maxleaf)); +#ifdef DEBUG + printf ("maxleaf=0x%x\n", maxleaf); +#endif + if (maxleaf > 7) { + printf("maxleaf supported\n"); + return 1; + } + else { + printf("maxleaf NOT supported\n"); + return 0; + } +} + +int cpid_PQM_supported(void) +{ + int EBX; + __asm__("mov $0x7, %eax\n\t"); + __asm__("mov $0x0, %ecx\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%ebx, %0\n\t":"=r" (EBX)); +#ifdef DEBUG + printf ("PQM=0x%x\n", EBX); +#endif + if (EBX & (1 << 12)) { /*EBX.PQM[bit 12]*/ + printf("PQM supported\n"); + return 1; + } + else { + printf(":PQM NOT supported\n"); + return 0; + } +} + +int cpuid_L3_type_supported(void) +{ + /* check :EDX.L3[bit1] */ + int EDX, EBX; + + __asm__("mov $0xF, %eax\n\t"); + __asm__("mov $0x0, %ecx\n\t"); + __asm__("cpuid\n\t"); + __asm__("mov %%ebx, %0\n\t":"=r" (EBX)); + __asm__("mov %%edx, %0\n\t":"=r" (EDX)); +#ifdef DEBUG + printf ("type=0x%x, rmid=%lu\n", EDX, EBX); +#endif + if (EDX & (1 << 1)) { + printf("L3_type supported\n"); + return 1; + } else { + printf("L3_type NOT supported\n"); + return 0; + } +} + +void cpuid(const unsigned leaf, const unsigned subleaf, cpuid_info* info) +{ + __asm__ __volatile__("cpuid" + :"=a"(info->reg.eax), + "=b"(info->reg.ebx), + "=c"(info->reg.ecx), + "=d"(info->reg.edx) + :"a"(leaf), "c"(subleaf)); +} + +void cpuid_Factor_RMID_eventID(cpuid_info *cpuid_i) +{ + /* + * Factor: CPUID.(EAX=0FH, ECX=1H).EBX + * RMID: CPUID.(EAX=0FH, ECX=1H).ECX + * EVENTID:CPUID.(EAX=0FH, ECX=1H).EDX + **/ + cpuid(0xF, 0x1, cpuid_i); +#ifdef DEBUG + printf("eventid=%lld\n", cpuid_i->info.eventid); +#endif +} + +bool check_cpuid_support(void) +{ + if (!get_cpuid_maxleaf()) + return false; + if (!cpid_PQM_supported()) + return false; + return cpuid_L3_type_supported(); +} diff --git a/source/tools/monitor/unity/collector/plugin/mbm/mbm.c b/source/tools/monitor/unity/collector/plugin/mbm/mbm.c new file mode 100644 index 0000000000000000000000000000000000000000..82acc4034a24d6e0eca2a223082b98ff960bb70b --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/mbm/mbm.c @@ -0,0 +1,156 @@ +#include +/*#define DEBUG 1*/ +#include "mbm.h" +#include "cpuid.h" +#include "msr.h" +#include "public.h" + +msr_t *msrs; +long nr_cpus; +cpuid_info cpuid_i; +__u64 summary[NR_EVENTS]; +static int init_fail; +char *events_str[] = {"L3Occupancy", "MbmTotal", "MbmLocal"}; + +static bool is_resctrl_mounted(void) +{ + struct stat st; + + if (stat("/sys/fs/resctrl/mon_groups", &st) < 0) { + return false; + } + return true; +} + +static void discovery_resctrl_mon(void) +{ +} + +int init(void * arg) +{ + bool resctrl = false; + int i, ret, cgroup_fd; + + ret = get_cpus(&nr_cpus); + if (ret) { + printf("WARN: pmu_events install FAIL sysconf\n"); + init_fail = ret; + return 0; + } + + resctrl = is_resctrl_mounted(); + /*if (resctrl)*/ + if (0) { + discovery_resctrl_mon(); + goto end; + } + if (check_cpuid_support()) { + cpuid_Factor_RMID_eventID(&cpuid_i); + ret = init_msr(&msrs, cpuid_i.info.rmid-1); + if (ret < 0) { + printf("init_msr failed\n"); + return 0; + } + } else { + printf(" not supported\n"); + return 1; + } +end: + printf("pmu_events plugin install.\n"); + init_fail = 0; + return 0; +} + +int collect(msr_t *msr, cpuid_info *cpu, __u64 *sum) +{ + int i; + for (i = 0; i < nr_cpus; i++) { + if (msr[i].fd <= 0) + continue; + else + break; + } + if (cpu->info.eventid & 1<info.factor; + if (cpu->info.eventid & 1<info.factor; + if (cpu->info.eventid & 1<info.factor; +} + +int fill_line(struct unity_line *line, __u64 *summ, char *mode, char *index) +{ + int i; + + unity_set_index(line, 0, mode, index); + for (i = 0; i < NR_EVENTS; i++) + unity_set_value(line, i, events_str[i], summ[i]); +} + +int call(int t, struct unity_lines* lines) +{ + int i; + __u64 sum[MAX_EVENT]; + struct unity_line* line; + + if (init_fail) { + return init_fail; + } + + collect(msrs, &cpuid_i, sum); + + unity_alloc_lines(lines, 1); + line = unity_get_line(lines, 0); + unity_set_table(line, "mbm"); + fill_line(line, sum, "mode", "msr"); + + return 0; +} + +void deinit(void) +{ + deinit_msr(msrs, nr_cpus); + printf("pmu_events plugin uninstall\n"); +} + +#ifdef DEBUG +/* for dev/selftest */ +int call_debug(__u64 *sum) +{ + int i; + + collect(msrs, &cpuid_i, sum); +} + +char datas[1024*1024*1024]; +int main(int argc, char *argv[]) +{ + int ret, i = 8; + __u64 sum[MAX_EVENT], tmp[MAX_EVENT]; + + ret = init(NULL); + if (ret) + return ret; + printf("eventid=%llu, factor=%llu\n", + cpuid_i.info.eventid, + cpuid_i.info.factor); + memset(datas, 'a', sizeof(datas)); + call_debug(tmp); + while(i > 0) { + sleep(1); + memset(sum, 0, sizeof(sum)); + memset(datas, 'b', sizeof(datas)); + call_debug(sum); + printf("l3Oc=%lluKB, totalMB=%lluKB, localMB=%lluKB\n", + sum[0] +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../plugin_head.h" +#define NR_EVENTS 3 + +int init(void * arg); +int call(int t, struct unity_lines* lines); +void deinit(void); + +#endif //UNITY_PMU_EVENT_H diff --git a/source/tools/monitor/unity/collector/plugin/mbm/msr.h b/source/tools/monitor/unity/collector/plugin/mbm/msr.h new file mode 100644 index 0000000000000000000000000000000000000000..34fe48f184604cfe65bc5fb94ceda0653e468934 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/mbm/msr.h @@ -0,0 +1,130 @@ +#define IA32_PQR_ASSOC (0xc8f) +#define IA32_QM_EVTSEL (0xc8d) +#define IA32_QM_CTR (0xc8e) +#define BUF_SIZE 1024 + +extern long nr_cpus; +typedef struct msr { + int fd; + __u32 rmid; + long cpuid; +} msr_t; + +__u64 set_msr_assoc(msr_t* m, int rmid); + +int open_msr(int cpu_id) +{ + char path[BUF_SIZE]; + + snprintf(path, BUF_SIZE, "/dev/cpu/%d/msr", cpu_id); + int fd = open(path, O_RDWR); + if (fd < 0) { + fprintf(stderr, "Failed open %s.\n", path); + } + return fd; +} + +/* + *return value: <0 for fail, or the number of msr + **/ +int init_msr(msr_t **msrs, int init_rmid) +{ + int i, err, suce; + msr_t *p; + + p = calloc(nr_cpus, sizeof(msr_t)); + if (!p) + return -errno; + + suce = -1; + for (i = 0; i < nr_cpus; i++) { + p[i].cpuid = i; + p[i].fd = open_msr(i); + if (p[i].fd > 0) { + p[i].rmid = init_rmid; + set_msr_assoc(&p[i], p[i].rmid); + suce++; + } + } + *msrs = p; + if (suce >= 0) + return nr_cpus; + else + return -1; +} + +int deinit_msr(msr_t *msrs, int nr) +{ + int i; + + for (i = 0; i < nr; i++) { + if (msrs[i].fd > 0) + close(msrs[i].fd); + } + return 0; +} + +__u64 set_msr_assoc(msr_t* m, int rmid) +{ + __u64 msr_pqr_assoc = 0, msr_qm_evtsel = 0; + __u64 val; + + pread(m->fd, &msr_pqr_assoc, sizeof(msr_pqr_assoc), IA32_PQR_ASSOC); + msr_pqr_assoc &= 0xffffffff00000000ULL; + msr_pqr_assoc |= (__u64)(rmid & ((1ULL<<10)-1ULL)); + pwrite(m->fd, &msr_pqr_assoc, sizeof(msr_pqr_assoc), IA32_PQR_ASSOC); + + msr_qm_evtsel = (__u64)(rmid & ((1ULL<<10)-1ULL)); + msr_qm_evtsel <<= 32; + pwrite(m->fd, &msr_qm_evtsel, sizeof(msr_qm_evtsel), IA32_QM_EVTSEL); +} + +/* + * event= + * 1 : L3 Oc + * 2 : L3 Total External Bandwidth + * 3 : L3 Local External Bandwidth + **/ +__u64 get_msr_count(msr_t* m, __u64 event) +{ + __u64 msr_qm_evtsel = 0, value = 0; + __u64 val; + + pread(m->fd, &msr_qm_evtsel, sizeof(msr_qm_evtsel), IA32_QM_EVTSEL); + msr_qm_evtsel &= 0xfffffffffffffff0ULL; + msr_qm_evtsel |= event & ((1ULL << 8) - 1ULL); + + pwrite(m->fd, &msr_qm_evtsel, sizeof(msr_qm_evtsel), IA32_QM_EVTSEL); + pread(m->fd, &val, sizeof(val), IA32_QM_CTR); + return val; +} + +__u64 read_l3_cache(msr_t *m) +{ + __u64 tmp; + tmp = get_msr_count(m, 1); +#ifdef DEBUG + printf("DEBUG:l3oc=%llu\n", tmp); +#endif + return tmp; +} + +__u64 read_mb_total(msr_t *m) +{ + __u64 tmp; + tmp = get_msr_count(m, 2); +#ifdef DEBUG + printf("DEBUG:mbTotal=%llu\n", tmp); +#endif + return tmp; +} + +__u64 read_mb_local(msr_t *m) +{ + __u64 tmp; + tmp = get_msr_count(m, 3); +#ifdef DEBUG + printf("DEBUG:mbLocal=%llu\n", tmp); +#endif + return tmp; +} diff --git a/source/tools/monitor/unity/collector/plugin/mbm/public.h b/source/tools/monitor/unity/collector/plugin/mbm/public.h new file mode 100644 index 0000000000000000000000000000000000000000..66bb10a0b825f09a599a62f73a17e1e16ace328e --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/mbm/public.h @@ -0,0 +1,49 @@ +enum { + L3CHE, + MBTOL, + MBLOC, + MAX_EVENT, +}; + +int get_cpus(long *nr_cpus) +{ + errno = 0; + *nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + return errno; +} + +static __u64 build_bit(__u32 beg, __u32 end) +{ + __u64 myll = 0; + if (end == 63) { + myll = -1; + } else { + myll = (1LL << (end + 1)) - 1; + } + myll = myll >> beg; + return myll; +} + +static __u64 extract_bits(__u64 myin, __u32 beg, __u32 end) +{ + __u64 myll = 0; + __u32 beg1, end1; + if (beg <= end) { + beg1 = beg; + end1 = end; + } else { + beg1 = end; + end1 = beg; + } + myll = myin >> beg1; + myll = myll & build_bit(beg1, end1); + return myll; +} + +__u64 extract_val(__u64 val) +{ + if (val & (3ULL << 62)) + return LLONG_MAX; + + return extract_bits(val, 0, 61); +}