diff --git a/config-host.mak b/config-host.mak new file mode 100644 index 0000000000000000000000000000000000000000..a0dede6b7d052cfdfb4ade4871d0493f4560d82b --- /dev/null +++ b/config-host.mak @@ -0,0 +1,6 @@ +# Automatically generated by configure - do not modify +KERNEL_VERSION = 4.19.91-27.4.2.kos5.x86_64 +OBJPATH = /opt/sysak/out +BUILD_KERNEL_MODULE = YES +BUILD_LIBBPF = NO +TARGET_LIST = /opt/sysak/source/tools/detect/generic/cpuirq /opt/sysak/source/tools/detect/sched/sysmonitor /opt/sysak/source/tools/detect/sched/cpu_flamegraph /opt/sysak/source/tools/detect/mem/memleak diff --git a/source/lib/internal/kernel_module/LICENSE b/source/lib/internal/kernel_module/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..89e08fb002e48e22363b7b3789a5470ffe71fea1 --- /dev/null +++ b/source/lib/internal/kernel_module/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/source/lib/internal/kernel_module/Makefile b/source/lib/internal/kernel_module/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1b0ffc1b376e5339d6a63abe37cd0d64390f90b8 --- /dev/null +++ b/source/lib/internal/kernel_module/Makefile @@ -0,0 +1,72 @@ +ifeq ($(KERNEL_VERSION),) +KERNEL_VERSION = $(shell uname -r) +endif + +KERNEL_BUILD_PATH := /usr/src/kernels/$(KERNEL_VERSION) + +ifneq ($(KERNEL_BUILD_PATH), $(wildcard $(KERNEL_BUILD_PATH))) +KERNEL_BUILD_PATH := /lib/modules/$(KERNEL_VERSION)/build +endif + + +ifneq ($(KERNELRELEASE),) +#common +sysak-objs += ./common/chrdev.o ./common/event.o ./common/hook.o ./common/stack.o ./common/proc.o +sysak-objs += ./common/blackbox.o +sysak-objs += ./common/ksymbol.o +sysak-objs += ./entry.o sysak_mods.o + +#modules +#sysak-objs += modules/test_module/test.o +ifneq ($(findstring tracesig,$(TARGET_LIST)),) +sysak-objs += modules/signal/trace_sig.o +endif +ifneq ($(findstring memleak,$(TARGET_LIST)),) +sysak-objs += modules/memleak/memleak.o +sysak-objs += modules/memleak/objects.o +sysak-objs += modules/memleak/hashlist.o +endif +ifneq ($(findstring runlatency,$(TARGET_LIST)),) +sysak-objs += modules/sched/noschedule.o modules/sched/trace_irqoff.o modules/sched/trace_runqlat.o +endif +ifneq ($(findstring taskctl,$(TARGET_LIST)),) +sysak-objs += modules/task_ctl/task_ctrl.o +endif +ifneq ($(findstring schedtrace,$(TARGET_LIST)),) +sysak-objs += modules/schedtrace/schedtrace.o +endif +ifneq ($(findstring mmaptrace,$(TARGET_LIST)),) +sysak-objs += modules/mmaptrace/mmaptrace.o +endif +ifneq ($(findstring ulockcheck,$(TARGET_LIST)),) +sysak-objs += modules/ulockcheck/ulockcheck.o +endif +ifneq ($(findstring iosdiag,$(TARGET_LIST)),) +sysak-objs += modules/iosdiag/iosdiag.o modules/iosdiag/rq_hang.o modules/iosdiag/virtio_blk.o modules/iosdiag/nvme.o modules/iosdiag/scsi.o +endif +ifneq ($(findstring memhunter,$(TARGET_LIST)),) +sysak-objs += modules/memhunter/memhunter.o +sysak-objs += modules/memhunter/common.o +sysak-objs += modules/memhunter/memcg.o +sysak-objs += modules/memhunter/memcg_dia.o +sysak-objs += modules/memhunter/filecache.o +endif + +obj-m += sysak.o + + +EXTRA_CFLAGS := -I$(MODULE_SRC) +EXTRA_CFLAGS += -I$(MODULE_SRC)/include +ifneq ($(findstring iosdiag,$(TARGET_LIST)),) +EXTRA_CFLAGS += -I$(MODULE_SRC)/modules/iosdiag -I$(MODULE_SRC)/modules/iosdiag/include/$(KERNEL_VERSION) +endif + +else + +export MODULE_SRC=$(shell pwd) +sysak_mod: + make -C $(KERNEL_BUILD_PATH) M=$(MODULE_SRC) + +clean: + make -C $(KERNEL_BUILD_PATH) M=$(MODULE_SRC) clean +endif diff --git a/source/lib/internal/kernel_module/README.md b/source/lib/internal/kernel_module/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7b935f7b9162dd8653a2c657a40d0c761d163806 --- /dev/null +++ b/source/lib/internal/kernel_module/README.md @@ -0,0 +1,11 @@ +# sysak-module + +#### 介绍 +sysak对于部分低版本内核进行问题诊断,或对内核实现一些增强功能,需要通过内核模块的方式来进行实现基础框架。 + +#### 新增功能 +1) kernel module源码放在modules/目录下,新功能自己建子目录或单独放一个文件都可以,比如test/test_module.c +2) 将新功能模块的名字、init函数、exit函数放入sysak_mods.c中的sysak_modules数组中完成注册 +3) 将新功能模块的实现文件加入到模块makefile,比如上面新增的test/test_module.c + vi Makefile + sysak-objs += test/test_module.o diff --git a/source/lib/internal/kernel_module/common/blackbox.c b/source/lib/internal/kernel_module/common/blackbox.c new file mode 100644 index 0000000000000000000000000000000000000000..52f0fdc6f99406b2a166e53530926f221e6c1f9f --- /dev/null +++ b/source/lib/internal/kernel_module/common/blackbox.c @@ -0,0 +1,749 @@ +/* + * Copyright (C) 2018 Alibaba Group + * All rights reserved. + * Written by Wetp Zhang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "proc.h" +#include "blackbox.h" +#include "internal.h" + +#define DEFAULT_BBOX_SIZE 0x2000000 +static unsigned int bbox_total_size = DEFAULT_BBOX_SIZE; +static DEFINE_SPINLOCK(bbox_alloc_lock); +static void *bbox_vmalloc_base; +static unsigned long *bbox_map; +static unsigned int bbox_max_id; +static unsigned int bbox_latest_id; + +static unsigned long *bbox_dyn_map; +static unsigned int bbox_dynamic_max; +static unsigned long bbox_dynamic_start; +RADIX_TREE(bbox_dynamic_tree, GFP_NOWAIT); + +static inline unsigned int bbox_id_to_dyn_idx(unsigned int bbox_id) +{ + return bbox_id - bbox_max_id - 1; +} + +static inline unsigned int dyn_idx_to_bbox_id(unsigned int idx) +{ + return idx + bbox_max_id + 1; +} + +static inline struct bbox_info *get_bbox(unsigned int bbox_id) +{ + unsigned idx = bbox_id; + + if (!bbox_vmalloc_base) + return NULL; + + if (idx < bbox_max_id) + return bbox_vmalloc_base + (idx * BBOX_SIZE); + + idx = bbox_id_to_dyn_idx(bbox_id); + if (idx >= bbox_dynamic_max || !bbox_dyn_map) + return NULL; + + return radix_tree_lookup(&bbox_dynamic_tree, idx); +} + +static inline int bbox_type(struct bbox_info *bbox) +{ + return bbox->flags & BBOX_TYPE_MASK; +} + +static inline void bbox_lock(struct bbox_info *bbox, + unsigned long *flags) +{ + spin_lock_irqsave(&bbox->lock, *flags); +} + +static inline void bbox_unlock(struct bbox_info *bbox, + unsigned long flags) +{ + spin_unlock_irqrestore(&bbox->lock, flags); +} + +static inline void *bbox_record_top(struct bbox_info *bbox) +{ + if (bbox->records.cnt) + return bbox->records.arr[bbox->records.cnt - 1].start; + else + return bbox->data_end; +} + +static inline int avail_size(struct bbox_info *bbox) +{ + if (bbox_type(bbox) == BBOX_TYPE_RING) + return bbox->data_end - bbox->ringbuf.write_ptr; + else + return bbox_record_top(bbox) - bbox->data_base; +} + +static ssize_t bbox_ring_write(struct bbox_info *bbox, + struct bbox_data_info *data_info) +{ + int size = data_info->size; + int tail_size = avail_size(bbox); + int bbox_size = bbox->data_end - bbox->data_base; + + if (likely(size <= tail_size)) { + memcpy(bbox->ringbuf.write_ptr, data_info->data, size); + bbox->ringbuf.write_ptr += size; + } else { + if (size > bbox_size) + size = bbox_size; + + if (tail_size > 0) + memcpy(bbox->ringbuf.write_ptr, + data_info->data, tail_size); + memcpy(bbox->data_base, + data_info->data + tail_size, size - tail_size); + bbox->ringbuf.write_ptr = bbox->data_base + (size - tail_size); + } + + return size; +} + +static ssize_t bbox_record_write(struct bbox_info *bbox, + struct bbox_data_info *data_info) +{ + struct record_info *r_info; + unsigned int size = data_info->size; + unsigned int slot = data_info->slot; + + if (slot >= bbox->records.cnt) + return -EINVAL; + + r_info = &bbox->records.arr[slot]; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0) + ktime_get_real_ts64(&r_info->mtime); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) + ktime_get_ts64(&r_info->mtime); +#else + getnstimeofday64(&r_info->mtime); +#endif + size = min(size, r_info->size); + memcpy(r_info->start, data_info->data, size); + if (virt_addr_valid(data_info->task)) { + strncpy(r_info->tsk_comm, data_info->task->comm, TASK_COMM_LEN); + r_info->cpu = task_cpu(data_info->task); + r_info->pid = data_info->task->pid; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0) + r_info->state = data_info->task->__state; +#elif LINUX_VERSION_CODE == KERNEL_VERSION(4, 18, 0) + r_info->state = data_info->task->__state; +#else + r_info->state = data_info->task->state; +#endif + } + return size; +} + +ssize_t bbox_write(unsigned int bbox_id, struct bbox_data_info *data_info) +{ + struct bbox_info *bbox; + unsigned long flags; + int ret = -EINVAL; + + if (!data_info || !data_info->data) + return ret; + + bbox = get_bbox(bbox_id); + if (!bbox) + return ret; + + bbox_lock(bbox, &flags); + + if (bbox_type(bbox) == BBOX_TYPE_RING) + ret = bbox_ring_write(bbox, data_info); + else + ret = bbox_record_write(bbox, data_info); + + bbox_unlock(bbox, flags); + return ret; +} + +static ssize_t bbox_ring_read(struct bbox_info *bbox, + struct bbox_data_info *data_info) +{ + unsigned int count = 0, avl_sz, size = data_info->size; + void *read_end = READ_ONCE(bbox->ringbuf.write_ptr); + + if (bbox->ringbuf.read_ptr > read_end) { + avl_sz = bbox->data_end - bbox->ringbuf.read_ptr; + count = min(size, avl_sz); + memcpy(data_info->data, bbox->ringbuf.read_ptr, count); + size -= count; + bbox->ringbuf.read_ptr += count; + if (bbox->ringbuf.read_ptr >= bbox->data_end) + bbox->ringbuf.read_ptr = bbox->data_base; + } + + if (!size) + return count; + + avl_sz = read_end - bbox->ringbuf.read_ptr; + size = min(avl_sz, size); + if (size) { + memcpy(data_info->data + count, bbox->ringbuf.read_ptr, size); + bbox->ringbuf.read_ptr += size; + } + + count += size; + return count; +} + +static ssize_t bbox_record_read(struct bbox_info *bbox, + struct bbox_data_info *data_info) +{ + struct record_info *r_info; + unsigned long flags; + unsigned int slot = data_info->slot; + unsigned int size = data_info->size; + + bbox_lock(bbox, &flags); + + if (slot >= bbox->records.cnt) { + bbox_unlock(bbox, flags); + return -EINVAL; + } + + r_info = &bbox->records.arr[slot]; + size = min(size, r_info->size); + memcpy(data_info->data, r_info->start, size); + memcpy(&data_info->mtime, &r_info->mtime, + sizeof(struct timespec64)); + bbox_unlock(bbox, flags); + return size; +} + +ssize_t bbox_read(unsigned int bbox_id, struct bbox_data_info *data_info) +{ + struct bbox_info *bbox; + int ret = -EINVAL; + + if (!data_info || !data_info->data || data_info->size <= 0) + return ret; + + bbox = get_bbox(bbox_id); + if (!bbox) + return ret; + + if (bbox_type(bbox) == BBOX_TYPE_RING) + ret = bbox_ring_read(bbox, data_info); + else + ret = bbox_record_read(bbox, data_info); + + return ret; +} + +void +bbox_set_record_desc(unsigned int bbox_id, unsigned int slot, const char *desc) +{ + struct bbox_info *bbox; + struct record_info *r_info; + unsigned long flags; + + bbox = get_bbox(bbox_id); + if (!bbox) + return; + + if (bbox_type(bbox) != BBOX_TYPE_RECORD) + return; + + bbox_lock(bbox, &flags); + if (slot < bbox->records.cnt) { + r_info = &bbox->records.arr[slot]; + r_info->desc[BBOX_RECORD_DESC_LEN - 1] = 0; + if (desc) + strncpy(r_info->desc, desc, BBOX_RECORD_DESC_LEN - 1); + else + strcpy(r_info->desc, " "); + } + bbox_unlock(bbox, flags); +} + +int bbox_alloc_record_slot(unsigned int bbox_id, unsigned int size, + unsigned int type) +{ + struct bbox_info *bbox; + struct record_info *r_info; + unsigned long flags; + int slot = -EINVAL; + + bbox = get_bbox(bbox_id); + if (!bbox) + return slot; + + if (bbox_type(bbox) != BBOX_TYPE_RECORD) + return slot; + + bbox_lock(bbox, &flags); + + slot = -ENOSPC; + if (avail_size(bbox) < (size + sizeof(struct record_info))) + goto out; + + slot = bbox->records.cnt; + r_info = &bbox->records.arr[slot]; + r_info->start = bbox_record_top(bbox) - size; + r_info->size = size; + r_info->type = type; + r_info->mtime.tv_sec = 0; + r_info->mtime.tv_nsec = 0; + r_info->cpu = -1; + r_info->pid = -1; + r_info->state = -1; + r_info->tsk_comm[0] = '\0'; + r_info->desc[0] = 0; + + bbox->data_base += sizeof(struct record_info); + bbox->records.cnt++; +out: + bbox_unlock(bbox, flags); + return slot; +} + +static inline void bbox_record_clear_one(struct bbox_info *bbox, + unsigned int slot) +{ + struct record_info *r_info; + + if (slot >= bbox->records.cnt) + return; + + r_info = &bbox->records.arr[slot]; + r_info->mtime.tv_sec = 0; + r_info->mtime.tv_nsec = 0; +} + +static void bbox_record_clear_all(struct bbox_info *bbox) +{ + int i; + + for (i = 0; i < bbox->records.cnt; i++) + bbox_record_clear_one(bbox, i); +} + +void bbox_record_clear(unsigned int bbox_id, int slot_id) +{ + unsigned long flags; + struct bbox_info *bbox = get_bbox(bbox_id); + + if (!bbox) + return; + + bbox_lock(bbox, &flags); + if (slot_id < 0) + bbox_record_clear_all(bbox); + else + bbox_record_clear_one(bbox, slot_id); + bbox_unlock(bbox, flags); +} + +static void bbox_setup(struct bbox_info *bbox, + const char *name, int flags, int size) +{ + bbox->magic = BBOX_BUFF_MAGIC; + bbox->name[BBOX_NAME_LEN - 1] = '\0'; + if (name) + strncpy(bbox->name, name, BBOX_NAME_LEN - 1); + else + strncpy(bbox->name, "bbox", BBOX_NAME_LEN - 1); + + /* set flags first, then bbox_type() below can work */ + bbox->flags = flags; + + if (bbox_type(bbox) == BBOX_TYPE_RING) { + bbox->data_base = bbox + 1; + bbox->ringbuf.write_ptr = bbox->data_base; + bbox->ringbuf.read_ptr = bbox->data_base; + } else { + bbox->records.cnt = 0; + bbox->data_base = bbox->records.arr; + } + + bbox->data_end = (void *)bbox + size; + spin_lock_init(&bbox->lock); +} + +int bbox_alloc(const char *name, int flags) +{ + struct bbox_info *bbox; + unsigned int bbox_id; + + spin_lock(&bbox_alloc_lock); + + bbox_id = find_next_zero_bit(bbox_map, bbox_max_id, bbox_latest_id); + if (bbox_id >= bbox_max_id) + bbox_id = find_first_zero_bit(bbox_map, bbox_max_id); + + if (bbox_id >= bbox_max_id) { + spin_unlock(&bbox_alloc_lock); + return -ENOSPC; + } + + set_bit(bbox_id, bbox_map); + bbox_latest_id = bbox_id; + spin_unlock(&bbox_alloc_lock); + + bbox = get_bbox(bbox_id); + if (!bbox) { + /* should never be here */ + WARN_ONCE(true, "bbox_buffer was NULL, id %d\n", bbox_id); + return -EFAULT; + } + + bbox_setup(bbox, name, flags, BBOX_SIZE); + return bbox_id; +} + +void bbox_update_name(unsigned int bbox_id, const char *name) +{ + struct bbox_info *bbox = get_bbox(bbox_id); + unsigned long flags; + + if (!bbox || !name) + return; + + bbox_lock(bbox, &flags); + memset(bbox->name, 0, BBOX_NAME_LEN); + strncpy(bbox->name, name, BBOX_NAME_LEN - 1); + bbox_unlock(bbox, flags); +} + +void bbox_free(unsigned int bbox_id) +{ + if (bbox_id < bbox_max_id) + clear_bit(bbox_id, bbox_map); + else { + unsigned int idx = bbox_id_to_dyn_idx(bbox_id); + struct bbox_info *bbox; + + if (!bbox_dyn_map || idx >= bbox_dynamic_max) + return; + + spin_lock(&bbox_alloc_lock); + bbox = get_bbox(bbox_id); + if (!bbox) { + spin_unlock(&bbox_alloc_lock); + return; + } + + clear_bit(idx, bbox_dyn_map); + radix_tree_delete(&bbox_dynamic_tree, idx); + spin_unlock(&bbox_alloc_lock); + vfree(bbox); + } +} + +int bbox_alloc_dynamic(const char *name, int flags, unsigned int pages) +{ + int idx, ret; + struct bbox_info *bbox; + unsigned int size = pages << PAGE_SHIFT; + + bbox = vmalloc(size); + if (!bbox) + return -ENOMEM; + + spin_lock(&bbox_alloc_lock); + idx = find_next_zero_bit(bbox_dyn_map, bbox_dynamic_max, + bbox_dynamic_start); + if (idx >= bbox_dynamic_max) + idx = find_first_zero_bit(bbox_dyn_map, bbox_dynamic_max); + if (idx >= bbox_dynamic_max) { + spin_unlock(&bbox_alloc_lock); + vfree(bbox); + return -ENOSPC; + } + + ret = radix_tree_insert(&bbox_dynamic_tree, idx, bbox); + if (ret) { + spin_unlock(&bbox_alloc_lock); + vfree(bbox); + return ret; + } + + set_bit(idx, bbox_dyn_map); + bbox_dynamic_start = idx; + spin_unlock(&bbox_alloc_lock); + + bbox_setup(bbox, name, flags, size); + return dyn_idx_to_bbox_id(idx); +} + +static void free_dynamic_bbox(void) +{ + int idx = 0; + + if (!bbox_dyn_map) + return; + + idx = find_first_bit(bbox_dyn_map, bbox_dynamic_max); + while (idx < bbox_dynamic_max) { + bbox_free(dyn_idx_to_bbox_id(idx)); + idx = find_next_bit(bbox_dyn_map, bbox_dynamic_max, idx); + } +} + +/* just think it stores raw strings. */ +static int bbox_ring_show_content(struct seq_file *seq, struct bbox_info *bbox) +{ + char buf[128]; + int ret, i; + struct bbox_data_info data; + + data.data = buf; + data.size = 128; + + while (1) { + ret = bbox_ring_read(bbox, &data); + if (ret <= 0) + break; + + for (i = 0; i < ret; i++) + seq_printf(seq, "%c", buf[i]); + } + return 0; +} + +int bbox_ring_show(struct seq_file *seq, unsigned int bbox_id) +{ + struct bbox_info *bbox = get_bbox(bbox_id); + + if (!seq || !bbox) + return -EINVAL; + + return bbox_ring_show_content(seq, bbox); +} + +static void bbox_show_time(struct seq_file *seq, struct timespec64 *ts) +{ + struct rtc_time tm; + unsigned long local_time; + + local_time = (unsigned long)(ts->tv_sec - (sys_tz.tz_minuteswest * 60)); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) + rtc_time64_to_tm(local_time, &tm); +#else + rtc_time_to_tm(local_time, &tm); + +#endif + seq_printf(seq, "\n[%04d-%02d-%02d %02d:%02d:%02d.%ld]\n", + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, ts->tv_nsec); +} + +static int bbox_record_show_one(struct seq_file *seq, + struct bbox_info *bbox, unsigned int slot) +{ + struct record_info *r_info; + struct bbox_data_info data; + void *buf; + int ret; + + if (slot >= bbox->records.cnt) + return -EINVAL; + + r_info = &bbox->records.arr[slot]; + /*no data had been written, ignore*/ + if (!r_info->mtime.tv_sec) + return 0; + + buf = kmalloc(r_info->size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + data.data = buf; + data.slot = slot; + data.size = r_info->size; + ret = bbox_record_read(bbox, &data); + if (ret <= 0) { + kfree(buf); + return 0; + } + + bbox_show_time(seq, &data.mtime); + + switch (r_info->type) { + case BBOX_DATA_TYPE_STRING: + seq_printf(seq, "%s\n", (char *)buf); + break; + case BBOX_DATA_TYPE_TRACE: + seq_printf(seq, + "CPU: %d PID: %d state %d comm: %s %s Call Trace:\n", + r_info->cpu, r_info->pid, r_info->state, + r_info->tsk_comm, r_info->desc); + while (ret > 0) { + void *ptr = *(void **)buf; + + if (ptr) + seq_printf(seq, "%pS\n", ptr); + buf += sizeof(void *); + ret -= sizeof(void *); + } + break; + case BBOX_DATA_TYPE_DATA: + seq_printf(seq, "%d bytes data:\n", ret); + while (ret > 0) { + seq_printf(seq, "%lx\n", *(unsigned long *)buf); + buf += sizeof(long); + ret -= sizeof(long); + } + break; + default: + break; + } + + kfree(data.data); + return 0; +} + +static int bbox_record_show_all(struct seq_file *seq, struct bbox_info *bbox) +{ + int i; + + seq_printf(seq, "[%s] capacity: %d\n", bbox->name, bbox->records.cnt); + + for (i = 0; i < bbox->records.cnt; i++) { + bbox_record_show_one(seq, bbox, i); + cond_resched(); + } + + return 0; +} + +int bbox_record_show(struct seq_file *seq, unsigned int bbox_id, int slot_id) +{ + struct bbox_info *bbox = get_bbox(bbox_id); + + if (!seq || !bbox) + return -EINVAL; + + if (slot_id < 0) + return bbox_record_show_all(seq, bbox); + else + return bbox_record_show_one(seq, bbox, slot_id); +} + +static int bbox_seq_show(struct seq_file *seq, void *v) +{ + struct bbox_info *bbox = v; + + seq_printf(seq, "Bbox %s:\n", bbox->name); + + if (bbox_type(bbox) == BBOX_TYPE_RING) + bbox_ring_show_content(seq, bbox); + else + bbox_record_show_all(seq, bbox); + + seq_puts(seq, "\n"); + return 0; +} + +static void *bbox_seq_start(struct seq_file *seq, loff_t *pos) +{ + *pos = find_next_bit(bbox_map, bbox_max_id, *pos); + return get_bbox(*pos); +} + +static void *bbox_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + *pos = find_next_bit(bbox_map, bbox_max_id, *pos + 1); + return get_bbox(*pos); +} + +static void bbox_seq_stop(struct seq_file *seq, void *v) +{ +} + +static const struct seq_operations bbox_seq_ops = { + .start = bbox_seq_start, + .next = bbox_seq_next, + .stop = bbox_seq_stop, + .show = bbox_seq_show, +}; + +static int bbox_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &bbox_seq_ops); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) +static const struct proc_ops proc_bbox_operations = { + .proc_open = bbox_seq_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = seq_release, +}; +#else +const struct file_operations proc_bbox_operations = { + .open = bbox_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif + +int sysak_bbox_init(void) +{ + void *addr; + unsigned int nlongs; + + bbox_max_id = bbox_total_size / BBOX_SIZE; + bbox_total_size = bbox_max_id * BBOX_SIZE; + if (!bbox_total_size) + return -EINVAL; + + nlongs = BITS_TO_LONGS(bbox_max_id); + bbox_map = kzalloc(sizeof(long) * nlongs, GFP_KERNEL); + if (!bbox_map) + return -ENOMEM; + + addr = vmalloc(bbox_total_size); + if (!addr) { + kfree(bbox_map); + return -ENOMEM; + } + + bbox_vmalloc_base = addr; + + bbox_dynamic_max = bbox_max_id * 100; + nlongs = BITS_TO_LONGS(bbox_dynamic_max); + bbox_dyn_map = kzalloc(sizeof(long) * nlongs, GFP_KERNEL); + if (!bbox_dyn_map) + printk(KERN_INFO "dynamic bbox is disabled\n"); + + sysak_proc_create("bbox", &proc_bbox_operations); + printk(KERN_INFO "pre-alloc %dB for blackbox\n", bbox_total_size); + return 0; +} + +void sysak_bbox_exit(void) +{ + free_dynamic_bbox(); + if (bbox_dyn_map) + kfree(bbox_dyn_map); + vfree(bbox_vmalloc_base); +} diff --git a/source/lib/internal/kernel_module/common/chrdev.c b/source/lib/internal/kernel_module/common/chrdev.c new file mode 100644 index 0000000000000000000000000000000000000000..78c431df129e7969dc42ca0be142b06d72d223f2 --- /dev/null +++ b/source/lib/internal/kernel_module/common/chrdev.c @@ -0,0 +1,154 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +static DEFINE_MUTEX(dev_mutex); +static int sysak_dev_major = -1; +static struct class *sysak_dev_class = NULL; +static struct device *sysak_dev = NULL; + +struct sysak_dev { + struct cdev cdev; +}; + +int __attribute__((weak)) memhunter_handler_cmd(int cmd, unsigned long arg) +{ + return -ENOSYS; +} + +int __attribute__((weak)) memleak_handler_cmd(int cmd, unsigned long arg) +{ + return -ENOSYS; +} + +static long sysak_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + int ret = -EINVAL; + int type, nr; + + if (!mutex_trylock(&dev_mutex)) + return -EBUSY; + + type = _IOC_TYPE(cmd); + nr = _IOC_NR(cmd); + switch (type) { + case MEMLEAK_IOCTL_CMD: + ret = memleak_handler_cmd(nr, arg); + break; + case MEMHUNTER_IOCTL_CMD: + ret = memhunter_handler_cmd(nr, arg); + break; + default: + printk("defualt ioctl cmd =%d, nr = %d\n", type, nr); + break; + } + + mutex_unlock(&dev_mutex); + return ret; +} + +static int sysak_open(struct inode *inode, struct file *file) +{ + if (!mutex_trylock(&dev_mutex)) + return -EBUSY; + __module_get(THIS_MODULE); + printk("sysak open\n"); + mutex_unlock(&dev_mutex); + + return 0; +} + +static int sysak_release(struct inode *inode, struct file *file) +{ + + if (!mutex_trylock(&dev_mutex)) + return -EBUSY; + + printk("sysak close\n"); + module_put(THIS_MODULE); + mutex_unlock(&dev_mutex); + return 0; +} + +static const struct file_operations sysak_fops = { + .open = sysak_open, + .release = sysak_release, + .unlocked_ioctl = sysak_ioctl, +}; + +static char *sysak_devnode(struct device *dev, umode_t *mode) +{ + if (mode) + *mode = S_IRUGO | S_IRWXUGO | S_IALLUGO; + + return kstrdup("sysak", GFP_KERNEL);; +} + +int sysak_dev_init(void) +{ + int ret = 0; + + sysak_dev_major = register_chrdev(0, CHR_NAME, &sysak_fops);; + + if (sysak_dev_major < 0) { + printk("sysak: failed to register device\n"); + return sysak_dev_major; + } + + sysak_dev_class = class_create(THIS_MODULE, CHR_NAME); + if (IS_ERR(sysak_dev_class)) { + ret = PTR_ERR(sysak_dev_class); + printk(KERN_ERR "sysak: class_create err=%d", ret); + unregister_chrdev(sysak_dev_major, CHR_NAME); + + return ret; + } + sysak_dev_class->devnode = sysak_devnode; + + sysak_dev = device_create(sysak_dev_class, NULL, MKDEV(sysak_dev_major, 0), NULL, CHR_NAME); + if (IS_ERR(sysak_dev)) { + ret = PTR_ERR(sysak_dev); + printk(KERN_ERR "sysak: device_create err=%d", ret); + unregister_chrdev(sysak_dev_major, CHR_NAME); + class_destroy(sysak_dev_class); + + return ret; + } + + return 0; +} + +void sysak_dev_uninit(void) +{ + if (sysak_dev_major >= 0) + unregister_chrdev(sysak_dev_major, CHR_NAME); + + if (sysak_dev != NULL) + device_destroy(sysak_dev_class, MKDEV(sysak_dev_major, 0)); + + if (sysak_dev_class != NULL) + class_destroy(sysak_dev_class); + + sysak_dev_major = -1; + sysak_dev = NULL; + sysak_dev_class = NULL; +} diff --git a/source/lib/internal/kernel_module/common/event.c b/source/lib/internal/kernel_module/common/event.c new file mode 100644 index 0000000000000000000000000000000000000000..9102812af3dbf60ed99837efaff101a2f392976f --- /dev/null +++ b/source/lib/internal/kernel_module/common/event.c @@ -0,0 +1,3 @@ +/* + * event.c + */ diff --git a/source/lib/internal/kernel_module/common/hook.c b/source/lib/internal/kernel_module/common/hook.c new file mode 100644 index 0000000000000000000000000000000000000000..c6eeee989495e1b89ad6828dd92e979b17195c20 --- /dev/null +++ b/source/lib/internal/kernel_module/common/hook.c @@ -0,0 +1,177 @@ +/* + * hook.c + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hook.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) +int hook_tracepoint(const char *name, void *probe, void *data) +{ + return tracepoint_probe_register(name, probe); +} + +int unhook_tracepoint(const char *name, void *probe, void *data) +{ + int ret = 0; + + do { + ret = tracepoint_probe_unregister(name, probe); + } while (ret == -ENOMEM); + + return ret; +} +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +int hook_tracepoint(const char *name, void *probe, void *data) +{ + return tracepoint_probe_register(name, probe, data); +} + +int unhook_tracepoint(const char *name, void *probe, void *data) +{ + int ret = 0; + + do { + ret = tracepoint_probe_unregister(name, probe, data); + } while (ret == -ENOMEM); + + return ret; +} +#else +static struct tracepoint *tp_ret; +static void probe_tracepoint(struct tracepoint *tp, void *priv) +{ + char *n = priv; + + if (strcmp(tp->name, n) == 0) + tp_ret = tp; +} + +static struct tracepoint *find_tracepoint(const char *name) +{ + tp_ret = NULL; + for_each_kernel_tracepoint(probe_tracepoint, (void *)name); + + return tp_ret; +} + +int hook_tracepoint(const char *name, void *probe, void *data) +{ + struct tracepoint *tp; + + tp = find_tracepoint(name); + if (!tp) + return 0; + + return tracepoint_probe_register(tp, probe, data); +} + +int unhook_tracepoint(const char *name, void *probe, void *data) +{ + struct tracepoint *tp; + int ret = 0; + + tp = find_tracepoint(name); + if (!tp) + return 0; + + do { + ret = tracepoint_probe_unregister(tp, probe, data); + } while (ret == -ENOMEM); + + return ret; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) +int hook_kprobe(struct kprobe *kp, const char *name, + kprobe_pre_handler_t pre, kprobe_post_handler_t post) +{ + kprobe_opcode_t *addr; + + if (!name || strlen(name) >= 255) + return -EINVAL; + addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); + if (!addr) + return -EINVAL; + + memset(kp, 0, sizeof(struct kprobe)); + kp->symbol_name = name; + kp->pre_handler = pre; + kp->post_handler = post; + + register_kprobe(kp); + + return 0; +} + +void unhook_kprobe(struct kprobe *kp) +{ + if (kp->symbol_name != NULL) + unregister_kprobe(kp); + + memset(kp, 0, sizeof(struct kprobe)); +} + +int hook_kretprobe(struct kretprobe *ptr_kretprobe, char *kretprobe_func, + kretprobe_handler_t kretprobe_entry_handler, + kretprobe_handler_t kretprobe_ret_handler, + size_t data_size) +{ + memset(ptr_kretprobe, 0, sizeof(struct kretprobe)); + ptr_kretprobe->kp.symbol_name = kretprobe_func; + ptr_kretprobe->handler = kretprobe_ret_handler; + ptr_kretprobe->entry_handler = kretprobe_entry_handler; + ptr_kretprobe->data_size = data_size; + ptr_kretprobe->maxactive = 200; + + return register_kretprobe(ptr_kretprobe); +} + +void unhook_kretprobe(struct kretprobe *ptr_kretprobe) +{ + if (!ptr_kretprobe->kp.addr) + return; + + unregister_kretprobe(ptr_kretprobe); + memset(ptr_kretprobe, 0, sizeof(struct kretprobe)); +} +#else + +int hook_kprobe(struct kprobe *kp, const char *name, + kprobe_pre_handler_t pre, kprobe_post_handler_t post) +{ + return -ENXIO; +} + +void unhook_kprobe(struct kprobe *kp) +{ +} + +int hook_kretprobe(struct kretprobe *ptr_kretprobe, char *kretprobe_func, + kretprobe_handler_t kretprobe_entry_handler, + kretprobe_handler_t kretprobe_ret_handler, + size_t data_size) +{ + return -ENXIO; +} + +void unhook_kretprobe(struct kretprobe *ptr_kretprobe) +{ +} +#endif diff --git a/source/lib/internal/kernel_module/common/internal.h b/source/lib/internal/kernel_module/common/internal.h new file mode 100644 index 0000000000000000000000000000000000000000..43835a21270929fe2ae66ab860938fbed17bb0eb --- /dev/null +++ b/source/lib/internal/kernel_module/common/internal.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2018 Alibaba Group + * All rights reserved. + * Written by Wetp Zhang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + +#define BBOX_MEM_MAX (100 << 20) /* 100M */ + +#define BBOX_SIZE PAGE_SIZE +#define BBOX_NAME_LEN 16 + +struct record_info { + void *start; + unsigned int size; + unsigned int type; + struct timespec64 mtime; + char tsk_comm[TASK_COMM_LEN]; + char desc[BBOX_RECORD_DESC_LEN]; + int cpu; + int pid; + int state; +}; + +/* bbox_info is stored at the head of a bbox */ +struct bbox_info { + u64 magic; + char name[BBOX_NAME_LEN]; + spinlock_t lock; + int flags; + void *data_base; + void *data_end; + union { + struct bbox_ring { + void *write_ptr; + void *read_ptr; + } ringbuf; + struct bbox_record { + unsigned int cnt; + struct record_info arr[0]; + } records; + }; +}; diff --git a/source/lib/internal/kernel_module/common/ksymbol.c b/source/lib/internal/kernel_module/common/ksymbol.c new file mode 100644 index 0000000000000000000000000000000000000000..53e8b3f291dda2b9c5e096675bbbaff4d3941efe --- /dev/null +++ b/source/lib/internal/kernel_module/common/ksymbol.c @@ -0,0 +1,40 @@ + +#include +#include +#include "ksymbol.h" + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0) +#include +static struct kprobe kp = { + .symbol_name = "kallsyms_lookup_name" +}; + +typedef unsigned long (*kallsyms_lookup_name_t)(const char *name); +static kallsyms_lookup_name_t g_syms_lookup_name; +static kallsyms_lookup_name_t get_symbol_kallsyms_lookup_name(void) +{ + unsigned long syms_lookup_name; + + register_kprobe(&kp); + syms_lookup_name = (unsigned long)kp.addr; + unregister_kprobe(&kp); + return (kallsyms_lookup_name_t)syms_lookup_name; +} +#endif + +unsigned long get_func_syms_by_name(const char *name) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0) + if (!g_syms_lookup_name) { + g_syms_lookup_name = get_symbol_kallsyms_lookup_name(); + if (!g_syms_lookup_name) { + pr_err("con't get symbol of kallsyms_lookup_name\n"); + return 0; + } + } + return g_syms_lookup_name(name); +#else + return kallsyms_lookup_name(name); +#endif +} + diff --git a/source/lib/internal/kernel_module/common/proc.c b/source/lib/internal/kernel_module/common/proc.c new file mode 100644 index 0000000000000000000000000000000000000000..814e34fea7c8504f8ab041457f01d80f82003115 --- /dev/null +++ b/source/lib/internal/kernel_module/common/proc.c @@ -0,0 +1,60 @@ +#include "proc.h" + +static struct proc_dir_entry *sysak_root_dir; + +static bool check_sysak_root(void) +{ + if (!sysak_root_dir) { + sysak_root_dir = proc_mkdir("sysak", NULL); + if (!sysak_root_dir) + return false; + } + + return true; +} + +struct proc_dir_entry *sysak_proc_mkdir(const char *name) +{ + if (check_sysak_root()) + return proc_mkdir(name, sysak_root_dir); + + return NULL; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) +struct proc_dir_entry *sysak_proc_create(const char *name, + const struct proc_ops *proc_fops) +#else +struct proc_dir_entry *sysak_proc_create(const char *name, + const struct file_operations *proc_fops) +#endif +{ + if (check_sysak_root()) + return proc_create(name, 0644, sysak_root_dir, proc_fops); + + return NULL; +} + +void sysak_remove_proc_entry(const char *name) +{ + if (sysak_root_dir) + remove_proc_entry(name, sysak_root_dir); +} + +int sysak_remove_proc_subtree(const char *name) +{ + if (sysak_root_dir) + return remove_proc_subtree(name, sysak_root_dir); + return 0; +} + +int sysak_proc_init(void) +{ + return 0; +} + +void sysak_proc_exit(void) +{ + if (sysak_root_dir) + proc_remove(sysak_root_dir); +} diff --git a/source/lib/internal/kernel_module/common/stack.c b/source/lib/internal/kernel_module/common/stack.c new file mode 100644 index 0000000000000000000000000000000000000000..c3b1c09a6549c66f9a1671b547d3a3098a306075 --- /dev/null +++ b/source/lib/internal/kernel_module/common/stack.c @@ -0,0 +1,3 @@ +/* + * stack.c +*/ diff --git a/source/lib/internal/kernel_module/entry.c b/source/lib/internal/kernel_module/entry.c new file mode 100644 index 0000000000000000000000000000000000000000..8491fc9c61df4138ca37cbc272c2fb21c35868a4 --- /dev/null +++ b/source/lib/internal/kernel_module/entry.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "proc.h" +#include "sysak_mods.h" + +void sysak_module_get(int *mod_ref) +{ + if (*mod_ref) + return; + try_module_get(THIS_MODULE); + *mod_ref = 1; +} + +void sysak_module_put(int *mod_ref) +{ + if (*mod_ref) { + *mod_ref = 0; + module_put(THIS_MODULE); + } +} + +static int sysak_mod_init(void) +{ + int i, ret; + + ret = sysak_bbox_init(); + if (ret) + return ret; + sysak_proc_init(); + sysak_dev_init(); + + for (i = 0; i < sysk_module_num; i++) { + if (sysak_modules[i].init()) + printk("WARN: module %s init failed", sysak_modules[i].name); + } + + printk("sysak module loaded.\n"); + return 0; +} + +static void sysak_mod_exit(void) +{ + int i; + + sysak_dev_uninit(); + sysak_bbox_exit(); + sysak_proc_exit(); + + for (i = 0; i < sysk_module_num; i++) + sysak_modules[i].exit(); + + printk("sysak module unloaded.\n"); +} + +module_init(sysak_mod_init) +module_exit(sysak_mod_exit) +MODULE_LICENSE("GPL v2"); diff --git a/source/lib/internal/kernel_module/include/blackbox.h b/source/lib/internal/kernel_module/include/blackbox.h new file mode 100644 index 0000000000000000000000000000000000000000..52d45e453d958e6659b32d2f4224378d4fe5f9c5 --- /dev/null +++ b/source/lib/internal/kernel_module/include/blackbox.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2018 Alibaba Group + * All rights reserved. + * Written by Wetp Zhang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef BLACKBOX_H +#define BLACKBOX_H + +#include +#include + +#define BBOX_FLAG_MASK 0xffff0000 +#define BBOX_FLAG_SHIFT 16 + + +#define BBOX_TYPE_MASK 0x0000ffff +#define BBOX_TYPE_SHIFT 0 + +#define BBOX_TYPE_RING (0 << BBOX_TYPE_SHIFT) +#define BBOX_TYPE_RECORD (1 << BBOX_TYPE_SHIFT) + + +#define BBOX_DATA_TYPE_STRING 0x1 +#define BBOX_DATA_TYPE_TRACE 0x2 +#define BBOX_DATA_TYPE_DATA 0x3 + +#define BBOX_RECORD_DESC_LEN 16 + +#define BBOX_BUFF_MAGIC 0xe0e1e2e3e4e5e6e7ul + +struct bbox_data_info { + void *data; + unsigned int size; + unsigned int slot; + struct timespec64 mtime; + struct task_struct *task; +}; + +extern ssize_t bbox_write(unsigned int bbox_id, + struct bbox_data_info *data_info); +extern ssize_t bbox_read(unsigned int bbox_id, + struct bbox_data_info *data_info); +extern int bbox_alloc_record_slot(unsigned int bbox_id, unsigned int size, + unsigned int type); +extern void bbox_record_clear(unsigned int bbox_id, int slot_id); +extern int bbox_alloc(const char *name, int flags); +extern void bbox_free(unsigned int bbox_id); +extern int bbox_alloc_dynamic(const char *name, int flags, + unsigned int pages); +extern int bbox_ring_show(struct seq_file *seq, unsigned int bbox_id); +extern int bbox_record_show(struct seq_file *seq, + unsigned int bbox_id, int slot_id); +extern void bbox_set_record_desc(unsigned int bbox_id, + unsigned int slot, const char *desc); +extern void bbox_update_name(unsigned int bbox_id, const char *name); +#endif diff --git a/source/lib/internal/kernel_module/include/common.h b/source/lib/internal/kernel_module/include/common.h new file mode 100644 index 0000000000000000000000000000000000000000..836d6edae89eba4cf14810f566c9f66d2ccb7656 --- /dev/null +++ b/source/lib/internal/kernel_module/include/common.h @@ -0,0 +1,16 @@ +#ifndef __COMMON__ +#define __COMMON__ + +#define NAME_LEN (128) + +#undef TASK_COMM_LEN +#define TASK_COMM_LEN (16) + +#define CHR_NAME "sysak" + +enum SYSAK_IOCTL_CMD { + MEMLEAK_IOCTL_CMD = 1, + MEMHUNTER_IOCTL_CMD = 2, +}; + +#endif diff --git a/source/lib/internal/kernel_module/include/hook.h b/source/lib/internal/kernel_module/include/hook.h new file mode 100644 index 0000000000000000000000000000000000000000..4cad15bf345d331433bf4fc1bba025eba62e638d --- /dev/null +++ b/source/lib/internal/kernel_module/include/hook.h @@ -0,0 +1,17 @@ +#ifndef _KERNEL_COMMON_HOOK_H +#define _KERNEL_COMMON_HOOK_H + +#include +extern int hook_tracepoint(const char *name, void *probe, void *data); +extern int unhook_tracepoint(const char *name, void *probe, void *data); + +extern int hook_kprobe(struct kprobe *kp, const char *name, + kprobe_pre_handler_t pre, kprobe_post_handler_t post); +extern void unhook_kprobe(struct kprobe *kp); + +extern int hook_kretprobe(struct kretprobe *ptr_kretprobe, char *kretprobe_func, + kretprobe_handler_t kretprobe_entry_handler, + kretprobe_handler_t kretprobe_ret_handler, + size_t data_size); +extern void unhook_kretprobe(struct kretprobe *ptr_kretprobe); +#endif diff --git a/source/lib/internal/kernel_module/include/ksymbol.h b/source/lib/internal/kernel_module/include/ksymbol.h new file mode 100644 index 0000000000000000000000000000000000000000..7fe9a90e870b211eb26f3728c60da620f279f45e --- /dev/null +++ b/source/lib/internal/kernel_module/include/ksymbol.h @@ -0,0 +1,5 @@ +#ifndef _KERNEL_COMMON_SYMS_H +#define _KERNEL_COMMON_SYMS_H + +extern unsigned long get_func_syms_by_name(const char *name); +#endif diff --git a/source/lib/internal/kernel_module/include/memleak.h b/source/lib/internal/kernel_module/include/memleak.h new file mode 100644 index 0000000000000000000000000000000000000000..d0b8057b61a117bf5c0107657dcdf009f7b35753 --- /dev/null +++ b/source/lib/internal/kernel_module/include/memleak.h @@ -0,0 +1,21 @@ +#ifndef __MEMLEAK_IOCTL__ +#define __MEMLEAK_IOCTL__ +#include + +#include "common.h" + +#define MEMLEAK_CMD_ENALBE (0x0A) +#define MEMLEAK_CMD_SET (MEMLEAK_CMD_ENALBE + 1) +#define MEMLEAK_CMD_GET (MEMLEAK_CMD_SET + 1) +#define MEMLEAK_CMD_RESULT (MEMLEAK_CMD_GET + 1) +#define MEMLEAK_CMD_DISABLE (MEMLEAK_CMD_RESULT + 1) + +#define MEMLEAK_STATE_ON (1) +#define MEMLEAK_STATE_OFF (2) +#define MEMLEAK_STATE_INIT (3) + +#define MEMLEAK_ON _IOWR(MEMLEAK_IOCTL_CMD, MEMLEAK_CMD_ENALBE, struct memleak_settings) +#define MEMLEAK_OFF _IO(MEMLEAK_IOCTL_CMD, MEMLEAK_CMD_DISABLE) +#define MEMLEAK_RESULT _IOWR(MEMLEAK_IOCTL_CMD, MEMLEAK_CMD_RESULT, struct user_result) + +#endif diff --git a/source/lib/internal/kernel_module/include/proc.h b/source/lib/internal/kernel_module/include/proc.h new file mode 100644 index 0000000000000000000000000000000000000000..8200168633941d13c8466db0b3003b02a4821cfb --- /dev/null +++ b/source/lib/internal/kernel_module/include/proc.h @@ -0,0 +1,129 @@ +#ifndef _KERNEL_COMMON_PROC_H +#define _KERNEL_COMMON_PROC_H +#include +#include +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +#include +#include + +int __weak kstrtobool_from_user(const char __user *s, size_t count, bool *res) +{ + /* Longest string needed to differentiate, newline, terminator */ + char buf[4]; + + count = min(count, sizeof(buf) - 1); + if (copy_from_user(buf, s, count)) + return -EFAULT; + buf[count] = '\0'; + return strtobool(buf, res); +} +#endif + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) +#define DEFINE_PROC_ATTRIBUTE(name, __write) \ + static int name##_open(struct inode *inode, struct file *file) \ + { \ + return single_open(file, name##_show, PDE_DATA(inode)); \ + } \ + \ + static const struct file_operations name##_fops = { \ + .owner = THIS_MODULE, \ + .open = name##_open, \ + .read = seq_read, \ + .write = __write, \ + .llseek = seq_lseek, \ + .release = single_release, \ + } + +#define DEFINE_PROC_ATTRIBUTE_RW(name) \ + static ssize_t name##_write(struct file *file, \ + const char __user *buf, \ + size_t count, loff_t *ppos) \ + { \ + return name##_store(PDE_DATA(file_inode(file)), buf, \ + count); \ + } \ + DEFINE_PROC_ATTRIBUTE(name, name##_write) + +#define DEFINE_PROC_ATTRIBUTE_RO(name) \ + DEFINE_PROC_ATTRIBUTE(name, NULL) +#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 10, 0) +#define DEFINE_PROC_ATTRIBUTE(name, __write) \ + static int name##_open(struct inode *inode, struct file *file) \ + { \ + return single_open(file, name##_show, PDE_DATA(inode)); \ + } \ + \ + static const struct file_operations name##_fops = { \ + .owner = THIS_MODULE, \ + .open = name##_open, \ + .read = seq_read, \ + .write = __write, \ + .llseek = seq_lseek, \ + .release = single_release, \ + } + +#define DEFINE_PROC_ATTRIBUTE_RW(name) \ + static ssize_t name##_write(struct file *file, \ + const char __user *buf, \ + size_t count, loff_t *ppos) \ + { \ + return name##_store(PDE_DATA(file_inode(file)), buf, \ + count); \ + } \ + DEFINE_PROC_ATTRIBUTE(name, name##_write) + +#define DEFINE_PROC_ATTRIBUTE_RO(name) \ + DEFINE_PROC_ATTRIBUTE(name, NULL) +#else +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0) +#define PDE_DATA pde_data +#endif + +#define DEFINE_PROC_ATTRIBUTE(name, __write) \ + static int name##_open(struct inode *inode, struct file *file) \ + { \ + return single_open(file, name##_show, PDE_DATA(inode)); \ + } \ + \ + static const struct proc_ops name##_fops = { \ + .proc_open = name##_open, \ + .proc_read = seq_read, \ + .proc_write = __write, \ + .proc_lseek = seq_lseek, \ + .proc_release = single_release, \ + } + +#define DEFINE_PROC_ATTRIBUTE_RW(name) \ + static ssize_t name##_write(struct file *file, \ + const char __user *buf, \ + size_t count, loff_t *ppos) \ + { \ + return name##_store(PDE_DATA(file_inode(file)), buf, \ + count); \ + } \ + DEFINE_PROC_ATTRIBUTE(name, name##_write) + +#define DEFINE_PROC_ATTRIBUTE_RO(name) \ + DEFINE_PROC_ATTRIBUTE(name, NULL) +#endif + + +extern struct proc_dir_entry *sysak_proc_mkdir(const char *name); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) +extern struct proc_dir_entry *sysak_proc_create(const char *name, + const struct proc_ops *proc_fops); +#else +extern struct proc_dir_entry *sysak_proc_create(const char *name, + const struct file_operations *proc_fops); +#endif +extern void sysak_remove_proc_entry(const char *name); +extern int sysak_remove_proc_subtree(const char *name); + +extern int sysak_proc_init(void); +extern void sysak_proc_exit(void); +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.1.1.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.1.1.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.1.1.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.1.1.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.1.1.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.1.1.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.1.2.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.1.2.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.1.2.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.1.2.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.1.2.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.1.2.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.12.1.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.12.1.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.12.1.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.12.1.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.12.1.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.12.1.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.18.1.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.18.1.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.18.1.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.18.1.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.18.1.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.18.1.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.1.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.1.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.1.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.1.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.1.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.1.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.2.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.2.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.2.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.2.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.2.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.2.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.3.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.3.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.3.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.3.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.3.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.4.3.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.7.1.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.7.1.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.7.1.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.7.1.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.7.1.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.7.1.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.9.1.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.9.1.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.9.1.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.9.1.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.9.1.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.9.1.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1062.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.10.1.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.10.1.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.10.1.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.10.1.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.10.1.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.10.1.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.13.1.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.13.1.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.13.1.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.13.1.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.13.1.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.13.1.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.18.2.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.18.2.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.18.2.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.18.2.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.18.2.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.18.2.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.19.1.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.19.1.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.19.1.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.19.1.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.19.1.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.19.1.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.8.2.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.8.2.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.8.2.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.8.2.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.8.2.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.8.2.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1127.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.11.1.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.11.1.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.11.1.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.11.1.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.11.1.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.11.1.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.15.2.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.15.2.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.15.2.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.15.2.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.15.2.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.15.2.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.2.1.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.2.1.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.2.1.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.2.1.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.2.1.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.2.1.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.2.2.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.2.2.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.2.2.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.2.2.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.2.2.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.2.2.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.6.1.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.6.1.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.6.1.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.6.1.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.6.1.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.6.1.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..3190b86c39f8138681b0b2caa25a08e7ed562646 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.el7.x86_64/nvme.h @@ -0,0 +1,60 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-1160.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-862.14.4.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-862.14.4.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..c0ff764861874ce8b36327a95a4b550d492b0c43 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-862.14.4.el7.x86_64/nvme.h @@ -0,0 +1,59 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 cqe_seen; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-862.14.4.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-862.14.4.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-862.14.4.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-957.21.3.el7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-957.21.3.el7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..c0ff764861874ce8b36327a95a4b550d492b0c43 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-957.21.3.el7.x86_64/nvme.h @@ -0,0 +1,59 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + char irqname[24]; /* nvme4294967295-65535\0 */ + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 cqe_seen; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-957.21.3.el7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-957.21.3.el7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..faa22a329c3d776adf26ac8b12671b7485d6dedc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/3.10.0-957.21.3.el7.x86_64/virtio_blk.h @@ -0,0 +1,175 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct request *req; + struct virtio_blk_outhdr out_hdr; + struct virtio_scsi_inhdr in_hdr; + u8 status; + struct scatterlist sg[]; +}; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + RH_KABI_DEPRECATE(unsigned int, ipi_redirect) + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? ((struct virtblk_req *)data)->req : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +static inline u64 get_check_hang_time_ns(void) +{ + return sched_clock(); +} + +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + blk_mq_tagset_busy_iter(q->tag_set, fn, data); + return 0; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.24-9.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.24-9.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..b0aaeba4301bdf74de1321993550fe1519f7df6b --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.24-9.al7.x86_64/nvme.h @@ -0,0 +1,59 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.24-9.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.24-9.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..fb79209c31ca392440c674c8b203f1ec034178c6 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.24-9.al7.x86_64/virtio_blk.h @@ -0,0 +1,219 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 0; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.81-17.1.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.81-17.1.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..b0aaeba4301bdf74de1321993550fe1519f7df6b --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.81-17.1.al7.x86_64/nvme.h @@ -0,0 +1,59 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.81-17.1.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.81-17.1.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..34e5cb77f404928b15eba189dfcdc4d7658feb3c --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.81-17.1.al7.x86_64/virtio_blk.h @@ -0,0 +1,219 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.81-17.2.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.81-17.2.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..b0aaeba4301bdf74de1321993550fe1519f7df6b --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.81-17.2.al7.x86_64/nvme.h @@ -0,0 +1,59 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.81-17.2.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.81-17.2.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..34e5cb77f404928b15eba189dfcdc4d7658feb3c --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.81-17.2.al7.x86_64/virtio_blk.h @@ -0,0 +1,219 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-013.ali4000.an7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-013.ali4000.an7.x86_64/nvme.h new file mode 100755 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-013.ali4000.an7.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-013.ali4000.an7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-013.ali4000.an7.x86_64/virtio_blk.h new file mode 100755 index 0000000000000000000000000000000000000000..0842a73551aa7f524a01d0db8a6b92571b2b0d8b --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-013.ali4000.an7.x86_64/virtio_blk.h @@ -0,0 +1,268 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-18.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-18.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..b0aaeba4301bdf74de1321993550fe1519f7df6b --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-18.al7.x86_64/nvme.h @@ -0,0 +1,59 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-18.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-18.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..04f7518036eb69bce794a2991dd148a8f7e81bb5 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-18.al7.x86_64/virtio_blk.h @@ -0,0 +1,226 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +void get_vq_info(struct vq_info *vq_i, struct request *rq); +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-19.1.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-19.1.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..b0aaeba4301bdf74de1321993550fe1519f7df6b --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-19.1.al7.x86_64/nvme.h @@ -0,0 +1,59 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-19.1.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-19.1.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..1a18919694b513e3b94a06f7de82fa064b94e5b7 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-19.1.al7.x86_64/virtio_blk.h @@ -0,0 +1,225 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-19.2.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-19.2.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..b0aaeba4301bdf74de1321993550fe1519f7df6b --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-19.2.al7.x86_64/nvme.h @@ -0,0 +1,59 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-19.2.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-19.2.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..1a18919694b513e3b94a06f7de82fa064b94e5b7 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-19.2.al7.x86_64/virtio_blk.h @@ -0,0 +1,225 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned int index_hw; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + int cpu) +{ + return q->queue_hw_ctx[q->mq_map[cpu]]; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-21.2.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-21.2.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-21.2.al7.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-21.2.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-21.2.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..853fa75e283274035567b387eb25b4f8e56a74cc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-21.2.al7.x86_64/virtio_blk.h @@ -0,0 +1,268 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-21.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-21.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-21.al7.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-21.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-21.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..853fa75e283274035567b387eb25b4f8e56a74cc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-21.al7.x86_64/virtio_blk.h @@ -0,0 +1,268 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-22.1.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-22.1.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-22.1.al7.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-22.1.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-22.1.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..853fa75e283274035567b387eb25b4f8e56a74cc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-22.1.al7.x86_64/virtio_blk.h @@ -0,0 +1,268 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-22.2.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-22.2.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-22.2.al7.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-22.2.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-22.2.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..853fa75e283274035567b387eb25b4f8e56a74cc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-22.2.al7.x86_64/virtio_blk.h @@ -0,0 +1,268 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-23.4.an8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-23.4.an8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-23.4.an8.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-23.4.an8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-23.4.an8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..853fa75e283274035567b387eb25b4f8e56a74cc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-23.4.an8.x86_64/virtio_blk.h @@ -0,0 +1,268 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-23.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-23.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-23.al7.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-23.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-23.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..853fa75e283274035567b387eb25b4f8e56a74cc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-23.al7.x86_64/virtio_blk.h @@ -0,0 +1,268 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.1.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.1.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.1.al7.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.1.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.1.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..853fa75e283274035567b387eb25b4f8e56a74cc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.1.al7.x86_64/virtio_blk.h @@ -0,0 +1,268 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.8.an8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.8.an8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.8.an8.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.8.an8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.8.an8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..853fa75e283274035567b387eb25b4f8e56a74cc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.8.an8.x86_64/virtio_blk.h @@ -0,0 +1,268 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.al7.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..42192d6dd9d4f8863943e9a40e1d00e979b937f2 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-24.al7.x86_64/virtio_blk.h @@ -0,0 +1,265 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-25.an8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-25.an8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-25.an8.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-25.an8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-25.an8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..853fa75e283274035567b387eb25b4f8e56a74cc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-25.an8.x86_64/virtio_blk.h @@ -0,0 +1,268 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.1.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.1.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.1.al7.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.1.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.1.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..42192d6dd9d4f8863943e9a40e1d00e979b937f2 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.1.al7.x86_64/virtio_blk.h @@ -0,0 +1,265 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.6.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.6.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.6.al7.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.6.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.6.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..42192d6dd9d4f8863943e9a40e1d00e979b937f2 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.6.al7.x86_64/virtio_blk.h @@ -0,0 +1,265 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.al7.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..42192d6dd9d4f8863943e9a40e1d00e979b937f2 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.al7.x86_64/virtio_blk.h @@ -0,0 +1,265 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.an8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.an8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.an8.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.an8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.an8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..853fa75e283274035567b387eb25b4f8e56a74cc --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-26.an8.x86_64/virtio_blk.h @@ -0,0 +1,268 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +//#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +//#endif +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-27.al7.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-27.al7.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-27.al7.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-27.al7.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-27.al7.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..42192d6dd9d4f8863943e9a40e1d00e979b937f2 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/4.19.91-27.al7.x86_64/virtio_blk.h @@ -0,0 +1,265 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Actual memory layout for this queue */ + struct vring vring; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* Last written value to avail->idx in guest byte order */ + u16 avail_idx_shadow; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + size_t queue_size_in_bytes; + dma_addr_t queue_dma_addr; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif + + /* Per-descriptor state. */ + struct vring_desc_state desc_state[]; +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { +#ifdef CONFIG_VIRTIO_BLK_SCSI + struct scsi_request sreq; /* for SCSI passthrough, must be first */ + u8 sense[SCSI_SENSE_BUFFERSIZE]; + struct virtio_scsi_inhdr in_hdr; +#endif + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_queue_delayed:1; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + /* + * flush_rq shares tag with this rq, both can't be active + * at the same time + */ + struct request *orig_rq; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, + enum hctx_type type, + unsigned int cpu) +{ + return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]]; +} + +/* + * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue + * @q: request queue + * @flags: request command flags + * @cpu: CPU + */ +static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + unsigned int flags, + unsigned int cpu) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + if ((flags & REQ_HIPRI) && + q->tag_set->nr_maps > HCTX_TYPE_POLL && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues && + test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + type = HCTX_TYPE_POLL; + + else if (((flags & REQ_OP_MASK) == REQ_OP_READ) && + q->tag_set->nr_maps > HCTX_TYPE_READ && + q->tag_set->map[HCTX_TYPE_READ].nr_queues) + type = HCTX_TYPE_READ; + + return blk_mq_map_queue_type(q, type, cpu); +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return blk_mq_map_queue(rq->q, rq->cmd_flags, rq->mq_ctx->cpu); +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = to_vvq(vq)->desc_state[head].data; + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->__deadline > rq->timeout) + return jiffies_to_usecs(rq->__deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static void blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.1.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.1.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.1.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.1.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.1.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.1.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.2.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.2.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.2.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.2.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.2.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.2.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.112-11.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.1.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.1.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.1.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.1.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.1.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.1.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.2.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.2.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.2.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.2.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.2.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.2.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-12.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.1.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.1.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.1.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.1.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.1.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.1.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.al8.aarch64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.al8.aarch64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.al8.aarch64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.al8.aarch64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.al8.aarch64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.al8.aarch64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.134-13.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.1.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.1.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.1.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.1.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.1.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.1.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.2.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.2.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.2.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.2.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.2.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.2.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.3.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.3.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.3.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.3.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.3.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.3.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.4.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.4.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.4.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.4.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.4.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.4.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.al8.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.al8.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..acda33af65e3da6b3a3a10b4ab9561a1720bd840 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.al8.x86_64/nvme.h @@ -0,0 +1,63 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + void *sq_cmds; + /* only used for poll queues: */ + spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; + struct nvme_completion *cqes; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 sqes; + unsigned long flags; +#define NVMEQ_ENABLED 0 +#define NVMEQ_SQ_CMB 1 +#define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; + struct completion delete_done; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (tail >= nvmeq->q_depth) + break; + memcpy(&cmd, nvmeq->sq_cmds + (tail << nvmeq->sqes), + sizeof(cmd)); + if (cmd.common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.al8.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.al8.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..ed80e59fddcf9f45323f0aafb79163521fb96e62 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.10.84-10.al8.x86_64/virtio_blk.h @@ -0,0 +1,310 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 next; /* The next desc state in a list. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra_packed { + dma_addr_t addr; /* Buffer DMA addr. */ + u32 len; /* Buffer length. */ + u16 flags; /* Descriptor flags. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_state_packed *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_lists[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + /* incremented at dispatch time */ + unsigned long rq_dispatched[2]; + unsigned long rq_merged; + + /* incremented at completion time */ + unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + struct lock_class_key key; + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_HIPRI, poll must be enabled. + */ + if (flags & REQ_HIPRI) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.17.0-1.an23.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.17.0-1.an23.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.17.0-1.an23.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.17.0-1.an23.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.17.0-1.an23.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..dd97cab2b5930bffbef2d7d7c57679a0fc595f3c --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.17.0-1.an23.x86_64/virtio_blk.h @@ -0,0 +1,304 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra { + dma_addr_t addr; /* Descriptor DMA addr. */ + u32 len; /* Descriptor length. */ + u16 flags; /* Descriptor flags. */ + u16 next; /* The next desc state in a list. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. */ + u16 last_used_idx; + + /* Hint for event idx: already triggered no need to disable. */ + bool event_triggered; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + struct vring_desc_extra *desc_extra; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Device ring wrap counter. */ + bool used_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_extra *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +#define VQ_NAME_LEN 16 +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* + * Tracks references from block_device_operations open/release and + * virtio_driver probe/remove so this object can be freed once no + * longer in use. + */ + refcount_t refs; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct sg_table sg_table; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_POLLED, poll must be enabled. + */ + if (flags & REQ_POLLED) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct request *rq, + void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.19.0-1_rc1.an23.x86_64/nvme.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.19.0-1_rc1.an23.x86_64/nvme.h new file mode 100644 index 0000000000000000000000000000000000000000..9f3286080f831945117862601f668da7376e10c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.19.0-1_rc1.an23.x86_64/nvme.h @@ -0,0 +1,61 @@ +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +struct nvme_queue { + struct device *q_dmadev; + void *nvme_dev; //struct nvme_dev *dev; + spinlock_t sq_lock; + struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; + spinlock_t cq_lock ____cacheline_aligned_in_smp; + volatile struct nvme_completion *cqes; + struct blk_mq_tags **tags; + dma_addr_t sq_dma_addr; + dma_addr_t cq_dma_addr; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_tail; + u16 last_sq_tail; + u16 cq_head; + u16 last_cq_head; + u16 qid; + u8 cq_phase; + u8 polled; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; +}; + +static int get_sq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int tail = nvmeq->sq_tail; + struct nvme_command cmd; + + do { + if (nvmeq->sq_cmds_io) { + memcpy_toio(&cmd, &nvmeq->sq_cmds_io[tail], sizeof(struct nvme_command)); + if (cmd.common.command_id == rq->tag) + return tail; + } + else if (nvmeq->sq_cmds[tail].common.command_id == rq->tag) + return tail; + } while (--tail >= 0); + return -1; +} + +static unsigned long get_cmd_ctx(struct nvme_queue *nvmeq, struct request *rq) +{ + //struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/include/5.19.0-1_rc1.an23.x86_64/virtio_blk.h b/source/lib/internal/kernel_module/modules/iosdiag/include/5.19.0-1_rc1.an23.x86_64/virtio_blk.h new file mode 100644 index 0000000000000000000000000000000000000000..da9179e2cc7c8370e296ef33f83d7cc5aa1d7496 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/include/5.19.0-1_rc1.an23.x86_64/virtio_blk.h @@ -0,0 +1,302 @@ +#ifndef _VIRTIO_BLK_H +#define _VIRTIO_BLK_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +#define VQ_NAME_LEN 16 + +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + +struct vring_desc_state_split { + void *data; /* Data for callback. */ + struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ +}; + +struct vring_desc_state_packed { + void *data; /* Data for callback. */ + struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ + u16 num; /* Descriptor list length. */ + u16 last; /* The last desc state in a list. */ +}; + +struct vring_desc_extra { + dma_addr_t addr; /* Descriptor DMA addr. */ + u32 len; /* Descriptor length. */ + u16 flags; /* Descriptor flags. */ + u16 next; /* The next desc state in a list. */ +}; + +struct vring_virtqueue { + struct virtqueue vq; + + /* Is this a packed ring? */ + bool packed_ring; + + /* Is DMA API used? */ + bool use_dma_api; + + /* Can we use weak barriers? */ + bool weak_barriers; + + /* Other side has made a mess, don't try any more. */ + bool broken; + + /* Host supports indirect buffers */ + bool indirect; + + /* Host publishes avail event idx */ + bool event; + + /* Head of free buffer list. */ + unsigned int free_head; + /* Number we've added since last sync. */ + unsigned int num_added; + + /* Last used index we've seen. + * for split ring, it just contains last used index + * for packed ring: + * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. + * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. + */ + u16 last_used_idx; + + /* Hint for event idx: already triggered no need to disable. */ + bool event_triggered; + + union { + /* Available for split ring */ + struct { + /* Actual memory layout for this queue. */ + struct vring vring; + + /* Last written value to avail->flags */ + u16 avail_flags_shadow; + + /* + * Last written value to avail->idx in + * guest byte order. + */ + u16 avail_idx_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_split *desc_state; + struct vring_desc_extra *desc_extra; + + /* DMA address and size information */ + dma_addr_t queue_dma_addr; + size_t queue_size_in_bytes; + } split; + + /* Available for packed ring */ + struct { + /* Actual memory layout for this queue. */ + struct { + unsigned int num; + struct vring_packed_desc *desc; + struct vring_packed_desc_event *driver; + struct vring_packed_desc_event *device; + } vring; + + /* Driver ring wrap counter. */ + bool avail_wrap_counter; + + /* Avail used flags. */ + u16 avail_used_flags; + + /* Index of the next avail descriptor. */ + u16 next_avail_idx; + + /* + * Last written value to driver->flags in + * guest byte order. + */ + u16 event_flags_shadow; + + /* Per-descriptor state. */ + struct vring_desc_state_packed *desc_state; + struct vring_desc_extra *desc_extra; + + /* DMA address and size information */ + dma_addr_t ring_dma_addr; + dma_addr_t driver_event_dma_addr; + dma_addr_t device_event_dma_addr; + size_t ring_size_in_bytes; + size_t event_size_in_bytes; + } packed; + }; + + /* How to notify other side. FIXME: commonalize hcalls! */ + bool (*notify)(struct virtqueue *vq); + + /* DMA, allocation, and size information */ + bool we_own_ring; + +#ifdef DEBUG + /* They're supposed to lock for us. */ + unsigned int in_use; + + /* Figure out if their kicks are too delayed. */ + bool last_add_time_valid; + ktime_t last_add_time; +#endif +}; + +struct virtio_blk { + /* + * This mutex must be held by anything that may run after + * virtblk_remove() sets vblk->vdev to NULL. + * + * blk-mq, virtqueue processing, and sysfs attribute code paths are + * shut down before vblk->vdev is set to NULL and therefore do not need + * to hold this mutex. + */ + struct mutex vdev_mutex; + struct virtio_device *vdev; + + /* The disk structure for the kernel. */ + struct gendisk *disk; + + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + + /* Process context for config space updates */ + struct work_struct config_work; + + /* Ida index - used to track minor number allocations. */ + int index; + + /* num of vqs */ + int num_vqs; + int io_queues[HCTX_MAX_TYPES]; + struct virtio_blk_vq *vqs; +}; + +struct virtblk_req { + struct virtio_blk_outhdr out_hdr; + u8 status; + struct sg_table sg_table; + struct scatterlist sg[]; +}; +#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) + +struct blk_mq_ctxs { + struct kobject kobj; + struct blk_mq_ctx __percpu *queue_ctx; +}; + +struct blk_mq_ctx { + struct { + spinlock_t lock; + struct list_head rq_list[HCTX_MAX_TYPES]; + } ____cacheline_aligned_in_smp; + + unsigned int cpu; + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +} ____cacheline_aligned_in_smp; + +struct blk_flush_queue { + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + blk_status_t rq_status; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; + struct request *flush_rq; + + spinlock_t mq_flush_lock; +}; + +static inline int enable_detect_flush_rq(void) +{ + return 1; +} + +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +{ + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* + * The caller ensure that if REQ_POLLED, poll must be enabled. + */ + if (flags & REQ_POLLED) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; + return type; +} + +static inline struct blk_mq_hw_ctx *blk_mq_get_hctx_byrq(struct request *rq) +{ + return rq->mq_hctx; + //return rq->mq_ctx->hctxs[blk_mq_get_hctx_type(rq->cmd_flags)]; +} + +static inline struct request *desc_state_data_to_req(struct virtqueue *vq, int head) +{ + void *data = (to_vvq(vq)->packed_ring ? to_vvq(vq)->packed.desc_state[head].data : + to_vvq(vq)->split.desc_state[head].data); + return data ? blk_mq_rq_from_pdu(data) : NULL; +} + +static inline int get_rq_internal_tag(struct request *rq) +{ + return rq ? rq->internal_tag : -1; +} + +static inline unsigned long get_issue_driver_ns(struct request *rq) +{ + if (!rq) + return 0; + if (rq->io_start_time_ns) + return rq->io_start_time_ns; + if (rq->deadline > rq->timeout) + return jiffies_to_usecs(rq->deadline - rq->timeout) * 1000; + return 0; +} + +/* + * LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + */ +static inline u64 get_check_hang_time_ns(void) +{ + return ktime_get_ns(); +} + +extern fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter; +typedef void (*blk_mq_rq_iter)(struct request *, void *, bool); +static blk_mq_rq_iter fn_blk_mq_check_hang = NULL; +static bool blk_mq_check_rq_hang(struct request *rq, + void *priv, bool reserved) +{ + if (fn_blk_mq_check_hang) + fn_blk_mq_check_hang(rq, priv, reserved); + return true; +} + +static inline int iter_all_rq(struct request_queue *q, blk_mq_rq_iter fn, void *data) +{ + fn_blk_mq_check_hang = fn; + + sym_blk_mq_queue_tag_busy_iter(q, blk_mq_check_rq_hang, data); + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/iosdiag/iosdiag.c b/source/lib/internal/kernel_module/modules/iosdiag/iosdiag.c new file mode 100644 index 0000000000000000000000000000000000000000..5b42a5bcaab49e868add68e7ad1147be7c5c66c4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/iosdiag.c @@ -0,0 +1,420 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" +#include + +#define DISKHANG_DIR_NAME "disk_hang" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) +#define get_ino_data(x) PDE_DATA(x) +#else +#define get_ino_data(x) pde_data(x) +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0) +#define DEFINE_PROC_OPS(name, _write, _mmap) \ + static const struct file_operations name##_fops = { \ + .owner = THIS_MODULE, \ + .open = name##_open, \ + .read = seq_read, \ + .write = _write, \ + .mmap = _mmap, \ + .llseek = seq_lseek, \ + .release = single_release, \ + } +#else +#define DEFINE_PROC_OPS(name, write, mmap) \ + static const struct proc_ops name##_fops = { \ + .proc_open = name##_open, \ + .proc_read = seq_read, \ + .proc_write = write, \ + .proc_mmap = mmap, \ + } +#endif + +#define DEFINE_PROC_ATTRIBUTE(name, __write, __mmap) \ + static int name##_open(struct inode *inode, struct file *file) \ + { \ + return single_open(file, name##_show, get_ino_data(inode)); \ + } \ + DEFINE_PROC_OPS(name, __write, __mmap) + +#define DEFINE_PROC_ATTRIBUTE_RW(name) \ + static ssize_t name##_write(struct file *file, \ + const char __user *buf, \ + size_t count, loff_t *ppos) \ + { \ + return name##_store(get_ino_data(file_inode(file)), buf, \ + count); \ + } \ + DEFINE_PROC_ATTRIBUTE(name, name##_write, name##_mmap) + +static DEFINE_MUTEX(rq_hang_buffer_mutex); + +struct rq_store { + struct list_head list; + struct request *rq; +}; +static struct rq_hang_info *g_rq_hang_info; +static int g_rq_hang_idx; +static unsigned long long g_rq_hang_total; +static int g_disk_type = -1; +static int g_bio_file_info; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +fn_queue_tag_busy_iter sym_blk_mq_queue_tag_busy_iter = NULL; +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) +typedef void (*fn_mq_free_request)(struct request *rq); +static fn_mq_free_request sym_blk_mq_free_request; +#endif +fn_get_files_struct sym_get_files_struct = NULL; +fn_put_files_struct sym_put_files_struct = NULL; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) +typedef struct block_device *(*fn_blkdev_get_no_open)(dev_t dev); +typedef void (*fn_blkdev_put_no_open)(struct block_device *bdev); +static fn_blkdev_get_no_open sym_blkdev_get_no_open; +static fn_blkdev_put_no_open sym_blkdev_put_no_open; +fn_fget_task sym_fget_task = NULL; +#endif + +static void set_disk_type(char *buf) +{ + if (buf[0] == 'v' && buf[1] == 'd' && (buf[2] >= 'a' && buf[2] <= 'z')) + g_disk_type = DISK_VIRTIO_BLK; + else if (buf[0] == 's' && buf[1] == 'd' && (buf[2] >= 'a' && buf[2] <= 'z')) + g_disk_type = DISK_SCSI; + else if (!strncmp(buf, "nvme", 4)) + g_disk_type = DISK_NVME; + else + g_disk_type = -1; +} + +static int get_disk_type(void) +{ + return g_disk_type; +} + +int get_bio_file_info(void) +{ + return g_bio_file_info; +} + +static void store_hang_rq(struct request *rq, unsigned long long now) +{ + int index; + + if (g_rq_hang_idx >= MAX_STORE_RQ_CNT) + return; + + g_rq_hang_total++; + index = g_rq_hang_idx; + if (fill_hang_info_from_rq(&g_rq_hang_info[index], rq, + get_disk_type())) + return; + g_rq_hang_info[index].check_hang_ns = now; + g_rq_hang_info[index].req_addr = (unsigned long)rq; + g_rq_hang_idx++; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) +static int is_flush_rq(struct request *rq) +{ + struct blk_mq_hw_ctx *hctx = blk_mq_get_hctx_byrq(rq); + + if (hctx && hctx->fq) + return hctx->fq->flush_rq == rq; + return 0; +} +#endif + +static void mq_check_rq_hang(struct request *rq, void *priv, bool reserved) +{ + int rq_hang_threshold = *((int *)priv); + u64 now = get_check_hang_time_ns(); + u64 duration; + + if (!rq) + return; + + if (g_rq_hang_idx >= MAX_STORE_RQ_CNT) + return; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) + //if (is_flush_rq(rq) && !enable_detect_flush_rq()) + if (is_flush_rq(rq)) + return; +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) + if (!refcount_inc_not_zero(&rq->ref)) + return; +#endif +#else + if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) + return; +#endif + duration = div_u64(now - rq->start_time_ns, NSEC_PER_MSEC); + if (duration >= rq_hang_threshold) + store_hang_rq(rq, now); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) + //if (is_flush_rq(rq) && rq->end_io) + // rq->end_io(rq, 0); + //else if (refcount_dec_and_test(&rq->ref)) +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) + if (refcount_dec_and_test(&rq->ref)) + sym_blk_mq_free_request(rq); +#endif +#endif +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 20, 0) +static struct rq_store g_rq_store[MAX_STORE_RQ_CNT]; +static int sq_check_rq_hang(struct request_queue *q, int rq_hang_threshold) +{ + u64 now = get_check_hang_time_ns(); + u64 duration; + unsigned long flags; + struct request *rq, *tmp; + LIST_HEAD(rq_list); + int rq_store_idx = 0; + spinlock_t *queue_lock = q->queue_lock; + + spin_lock_irqsave(queue_lock, flags); + list_for_each_entry_safe(rq, tmp, &q->queue_head, queuelist) { + duration = div_u64(now - rq->start_time_ns, NSEC_PER_MSEC); + if (duration >= rq_hang_threshold && rq_store_idx < MAX_STORE_RQ_CNT) { + g_rq_store[rq_store_idx].rq = rq; + INIT_LIST_HEAD(&g_rq_store[rq_store_idx].list); + list_add(&g_rq_store[rq_store_idx].list, &rq_list); + rq_store_idx++; + } else + continue; + } + spin_unlock_irqrestore(queue_lock, flags); + + spin_lock_irqsave(queue_lock, flags); + list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) { + duration = div_u64(now - rq->start_time_ns, NSEC_PER_MSEC); + if (duration >= rq_hang_threshold && rq_store_idx < MAX_STORE_RQ_CNT) { + g_rq_store[rq_store_idx].rq = rq; + INIT_LIST_HEAD(&g_rq_store[rq_store_idx].list); + list_add(&g_rq_store[rq_store_idx].list, &rq_list); + rq_store_idx++; + } else + continue; + } + spin_unlock_irqrestore(queue_lock, flags); + while(!list_empty(&rq_list)) { + struct rq_store *rqs; + rqs = list_first_entry(&rq_list, struct rq_store, list); + if (rqs->rq) + store_hang_rq(rqs->rq, now); + list_del_init(&rqs->list); + } + return 0; +} +#else +static int sq_check_rq_hang(struct request_queue *q, int rq_hang_threshold) {return 0;} +#endif + +static int rq_hang_detect(dev_t devnum, int rq_hang_threshold) +{ + int ret = 0; + struct request_queue *q; + struct block_device *bdev; + + if (!devnum || rq_hang_threshold <= 0) + return -EINVAL; +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 10, 0) + bdev = bdget(devnum); +#else + bdev = sym_blkdev_get_no_open(devnum); +#endif + if (!bdev) { + printk("error: invalid devnum(%d:%d)\n", MAJOR(devnum), MINOR(devnum)); + return -EFAULT; + } +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0) || LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0) + if (!bdev->bd_queue) { +#endif + if (!bdev->bd_disk || !(q = bdev_get_queue(bdev))) { + printk("error: can't get request queue for devnum(%d:%d)\n", + MAJOR(devnum), MINOR(devnum)); + ret = -EFAULT; + goto out; + } +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 8, 0) || LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0) + } else + q = bdev->bd_queue; +#endif + + if (q->mq_ops) + ret = iter_all_rq(q, mq_check_rq_hang, &rq_hang_threshold); + else + ret = sq_check_rq_hang(q, rq_hang_threshold); +out: +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 10, 0) + bdput(bdev); +#else + sym_blkdev_put_no_open(bdev); +#endif + return ret; +} + +static int rq_hang_show(struct seq_file *m, void *ptr) +{ + seq_printf(m, "total_rq_hang:%llu\n", g_rq_hang_total); + return 0; +} + +static ssize_t rq_hang_store(struct file *file, + const char __user *buf, size_t count) +{ + int ret; + char *p; + char chr[256]; + char diskname[BDEVNAME_SIZE] = {0}; + int major, minor; + int threshold = 0; + + if (count < 1) + return -EINVAL; + + if (copy_from_user(chr, buf, 256)) + return -EFAULT; + + /* echo "vdb:253:16 1000" > /proc/xxxxx */ + if ((p = strstr(chr, ":"))) { + memcpy(diskname, chr, (p - chr)); + ret = sscanf(p+1, "%d:%d %d %d", &major, &minor, &threshold, &g_bio_file_info); + if (ret < 3 || threshold <= 0 || major < 1 || minor < 0) { + printk("invalid argument \'%s\'\n", chr); + return -EINVAL; + } + } else { + printk("invalid argument \'%s\'\n", chr); + return -EINVAL; + } + mutex_lock(&rq_hang_buffer_mutex); + set_disk_type(diskname); + g_rq_hang_idx = 0; + memset(g_rq_hang_info, 0x0, sizeof(struct rq_hang_info) * MAX_STORE_RQ_CNT); + ret = rq_hang_detect(MKDEV(major, minor), threshold); + mutex_unlock(&rq_hang_buffer_mutex); + return ret ? ret : count; +} + +static int rq_hang_mmap(struct file *file, struct vm_area_struct *vma) +{ + return remap_vmalloc_range(vma, (void *)g_rq_hang_info, vma->vm_pgoff); +} +DEFINE_PROC_ATTRIBUTE_RW(rq_hang); + +static int fill_ksymbols(void) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) + char *blkdev_get_func_name = "bdget"; + char *blkdev_put_func_name = "bdput"; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0) + blkdev_get_func_name = "blkdev_get_no_open"; + blkdev_put_func_name = "blkdev_put_no_open"; +#endif + sym_blkdev_get_no_open = + (fn_blkdev_get_no_open)get_func_syms_by_name(blkdev_get_func_name); + if (!sym_blkdev_get_no_open) { + pr_err("not found symbol \"%s\"\n", blkdev_get_func_name); + return -EFAULT; + } + sym_blkdev_put_no_open = + (fn_blkdev_put_no_open)get_func_syms_by_name(blkdev_put_func_name); + if (!sym_blkdev_put_no_open) { + pr_err("not found symbol \"%s\"\n", blkdev_put_func_name); + return -EFAULT; + } + sym_fget_task = + (fn_fget_task)get_func_syms_by_name("fget_task"); + if (!sym_fget_task) + pr_warn("not found symbol \"fget_task\"\n"); +#else + sym_get_files_struct = + (fn_get_files_struct)get_func_syms_by_name("get_files_struct"); + if (!sym_get_files_struct) + pr_warn("not found symbol \"get_files_struct\"\n"); + + sym_put_files_struct = + (fn_put_files_struct)get_func_syms_by_name("put_files_struct"); + if (!sym_put_files_struct) + pr_warn("not found symbol \"put_files_struct\"\n"); +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) + sym_blk_mq_queue_tag_busy_iter = + (fn_queue_tag_busy_iter)get_func_syms_by_name("blk_mq_queue_tag_busy_iter"); + if (!sym_blk_mq_queue_tag_busy_iter) { + pr_err("not found symbol \"blk_mq_queue_tag_busy_iter\"\n"); + return -EFAULT; + } +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) + sym_blk_mq_free_request = + (fn_mq_free_request)get_func_syms_by_name("__blk_mq_free_request"); + if (!sym_blk_mq_free_request) { + pr_err("not found symbol \"__blk_mq_free_request\"\n"); + return -EFAULT; + } +#endif + return 0; +} + +int disk_hang_init(void) +{ + int ret; + struct proc_dir_entry *disk_hang_dir = NULL; + + if (fill_ksymbols()) { + pr_err("init ksymbols fail!\n"); + return -EPERM; + } + + disk_hang_dir = proc_mkdir(DISKHANG_DIR_NAME, NULL); + if (!disk_hang_dir) { + pr_err("create \"/proc/%s\" fail\n", DISKHANG_DIR_NAME); + return -ENOMEM; + } + if (!proc_create_data("rq_hang_detect", 0600, disk_hang_dir, + &rq_hang_fops, NULL)) { + pr_err("create \"/proc/%s/rq_hang_detect\" fail\n", + DISKHANG_DIR_NAME); + ret = -ENOMEM; + goto remove_proc; + } + g_rq_hang_info = vmalloc_user(sizeof(struct rq_hang_info) * MAX_STORE_RQ_CNT); + if (!g_rq_hang_info) { + pr_err("alloc memory \"rq hang info buffer\" fail\n"); + ret = -ENOMEM; + goto remove_proc; + } + memset(g_rq_hang_info, 0x0, sizeof(struct rq_hang_info) * MAX_STORE_RQ_CNT); + pr_info("iosdiag load success\n"); + return 0; +remove_proc: + remove_proc_subtree(DISKHANG_DIR_NAME, NULL); + return ret; +} + +int disk_hang_exit(void) +{ + if (g_rq_hang_info) { + vfree(g_rq_hang_info); + g_rq_hang_info = NULL; + } + remove_proc_subtree(DISKHANG_DIR_NAME, NULL); + return 0; +} + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/iosdiag.h b/source/lib/internal/kernel_module/modules/iosdiag/iosdiag.h new file mode 100644 index 0000000000000000000000000000000000000000..87f735ca76defd7afccee0e83b65bfab7b8910cd --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/iosdiag.h @@ -0,0 +1,109 @@ +#ifndef __IOSDIAG_H +#define __IOSDIAG_H +#include +#include +#include "ksymbol.h" + +#define MAX_STORE_RQ_CNT 128 +#define MAX_FILE_NAME_LEN 255 +#define BIO_INFO_MAX_PAGES 32 +#define MAX_REQ_BIOS 32 + +enum disk_type { + DISK_VIRTIO_BLK, + DISK_NVME, + DISK_SCSI, +}; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) +enum rq_atomic_flags { + REQ_ATOM_COMPLETE = 0, + REQ_ATOM_STARTED, +}; +#endif +/* +struct rq_buffer { + struct request rq; + unsigned long long check_time_ns; + void *rq_addr; +}; +*/ +struct vq_info { + int qid; + int vring_num; + int last_used_idx; + int used_idx; + int used_ring_flags; + int last_avail_idx; + int avail_idx; + int avail_ring_flags; + int event; + int rq_avail_idx; + int last_kick_avail_idx; + int rq_used_idx; +}; + +struct nvme_info { + int qid; + int q_depth; //sq/cq depth + int cq_head; //nvmeq->cqes[cq_head]~nvmeq->cqes[cq_end], including req->tag? + int cq_end; + int cq_rq_idx; //rq idx in cq + //int last_cq_head; //nvmeq->sq_head or nvmeq->last_cq_head + int sq_tail; //0~nvmeq->sq_cmds[idx].command_id, including req->tag? + int sq_rq_idx; //rq idx in sq + int sq_last_db; //last sq idx host kick nvme, nvmeq->q_db + unsigned long cmd_ctx; +}; + +struct scsi_info { + int done_hander_defined; + int is_mq; +}; + +struct bio_info { + unsigned long bio_addr; + unsigned long sector; + unsigned int size; + unsigned int pid; + char comm[TASK_COMM_LEN]; + char filename[MAX_FILE_NAME_LEN]; +}; + +struct rq_hang_info { + unsigned int data_len; + unsigned long sector; + unsigned long req_addr; + unsigned long long io_start_ns; + unsigned long long io_issue_driver_ns; + unsigned long long check_hang_ns; + char op[64]; + char state[16]; + struct vq_info vq; + struct nvme_info nvme; + struct scsi_info scsi; + int tag; + int internal_tag; + int cpu; + char diskname[BDEVNAME_SIZE]; + //int errors; + //unsigned long cmd_flags; + struct bio_info first_bio; +}; +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 17, 0) +typedef void (*fn_queue_tag_busy_iter)(struct request_queue *q, busy_iter_fn *fn, void *priv); +#else +typedef void (*fn_queue_tag_busy_iter)(struct request_queue *q, busy_tag_iter_fn *fn, void *priv); +#endif +#if LINUX_VERSION_CODE > KERNEL_VERSION(5, 10, 0) +typedef struct file *(*fn_fget_task)(struct task_struct *task, unsigned int fd); +#endif +typedef struct files_struct *(*fn_get_files_struct)(struct task_struct *); +typedef void (*fn_put_files_struct)(struct files_struct *fs); + + +int fill_hang_info_from_rq(struct rq_hang_info *rq_hang_info, + struct request *rq, + int disk_type); +#endif + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/nvme.c b/source/lib/internal/kernel_module/modules/iosdiag/nvme.c new file mode 100644 index 0000000000000000000000000000000000000000..b890579bfb69a3bcd7748a1a57cabc3fd7edfe6b --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/nvme.c @@ -0,0 +1,65 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct nvme_queue *get_nvme_queue_by_rq(struct request *rq) +{ + struct blk_mq_hw_ctx *hctx; + + if (!rq) + return NULL; + + hctx = blk_mq_get_hctx_byrq(rq); + if (!hctx) + return NULL; + return hctx->driver_data ? hctx->driver_data : NULL; +} + +static int get_cq_end(struct nvme_queue *nvmeq, struct request *rq) +{ + int head = nvmeq->cq_head; + + do { + if (nvmeq->cqes[head].command_id == -1) + return head; + } while (++head < nvmeq->q_depth); + return -1; +} + +static int get_cq_rq_idx(struct nvme_queue *nvmeq, struct request *rq) +{ + int head = 0; + + do { + if (nvmeq->cqes[head].command_id == rq->tag) + return head; + } while (++head < nvmeq->q_depth); + return -1; +} + +void get_nvme_info(struct nvme_info *nvme_i, struct request *rq) +{ + struct nvme_queue *nvmeq; + + if (!(nvmeq = get_nvme_queue_by_rq(rq))) + return; + + nvme_i->qid = nvmeq->qid; + nvme_i->q_depth = nvmeq->q_depth; + nvme_i->cq_head = nvmeq->cq_head; + nvme_i->cq_end = get_cq_end(nvmeq, rq); + nvme_i->cq_rq_idx = get_cq_rq_idx(nvmeq, rq); + nvme_i->sq_tail = nvmeq->sq_tail; + nvme_i->sq_rq_idx = get_sq_rq_idx(nvmeq, rq); + nvme_i->sq_last_db = readl(nvmeq->q_db); + nvme_i->cmd_ctx = get_cmd_ctx(nvmeq, rq); +} + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/rq_hang.c b/source/lib/internal/kernel_module/modules/iosdiag/rq_hang.c new file mode 100644 index 0000000000000000000000000000000000000000..12a22cfb89dcfb717735a7fd7b04400f58c910e4 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/rq_hang.c @@ -0,0 +1,350 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +#include +#endif +#include +#include +#include +#include +#include +#include "iosdiag.h" +#include + +struct req_op_name{ + int op; + char *op_str; +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) +#define REQ_OP_NAME(name) {REQ_OP_##name, #name} +#else +#define REQ_READ 0 +#define REQ_OP_NAME(name) {REQ_##name, #name} +#endif +static struct req_op_name g_op_name[] = { + REQ_OP_NAME(READ), + REQ_OP_NAME(WRITE), + REQ_OP_NAME(FLUSH), + REQ_OP_NAME(DISCARD), + REQ_OP_NAME(WRITE_SAME), +}; +#define SINGLE_OP_NAME_SIZE 16 +#define MAX_OP_NAME_SIZE ((SINGLE_OP_NAME_SIZE + 1) * 5) + +static const char *const blk_mq_rq_state_name_array[] = { +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) + [REQ_ATOM_COMPLETE] = "complete", + [REQ_ATOM_STARTED] = "in_flight", +#else + [MQ_RQ_IDLE] = "idle", + [MQ_RQ_IN_FLIGHT] = "in_flight", + [MQ_RQ_COMPLETE] = "complete", +#endif +}; + +extern fn_get_files_struct sym_get_files_struct; +extern fn_put_files_struct sym_put_files_struct; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) +extern fn_fget_task sym_fget_task; +#endif +extern int get_bio_file_info(void); + +extern void get_vq_info(struct vq_info *vq_i, struct request *rq); +extern void get_scsi_info(struct scsi_info *scsi_i, struct request *rq); +extern void get_nvme_info(struct nvme_info *nvme_i, struct request *rq); + +static char *get_disk_name(struct gendisk *hd, int partno, char *buf) +{ + if (!partno) + snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); + else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) + snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); + else + snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); + return buf; +} + +static void blk_rq_op_name(int op_flags, char *op_buf, int buf_len) +{ + int i = 0; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) + for (; i < (sizeof(g_op_name) / sizeof(g_op_name[0])); i++) { + if (op_flags == g_op_name[i].op) { + strcat(op_buf, g_op_name[i].op_str); + return; + } + } +#else + int len; + for (; i < (sizeof(g_op_name) / sizeof(g_op_name[0])); i++) { + if (op_flags & g_op_name[i].op) { + if ((len = strlen(op_buf)) >= buf_len) + return; + if (len) { + strncat(op_buf, "|", min((strlen("|") + 1),(buf_len - len))); + op_buf[buf_len - 1] = '\0'; + if ((len = strlen(op_buf)) >= buf_len) + return; + } + strncat(op_buf, g_op_name[i].op_str, + min((strlen(g_op_name[i].op_str) + 1), + (buf_len - len))); + op_buf[buf_len - 1] = '\0'; + } + } +#endif +} + +static const char *blk_mq_rq_state_name(unsigned int rq_state) +{ + if (WARN_ON_ONCE(rq_state >= + ARRAY_SIZE(blk_mq_rq_state_name_array))) + return "(?)"; + return blk_mq_rq_state_name_array[rq_state]; +} + +static char *__dentry_name(struct dentry *dentry, char *name) +{ + char *p = dentry_path_raw(dentry, name, PATH_MAX); + + if (IS_ERR(p)) { + __putname(name); + return NULL; + } + + if (p + strlen(p) + 1 != name + PATH_MAX) { + __putname(name); + return NULL; + } + + if (p > name) + strcpy(name, p); + + return name; +} + +static char *dentry_name(struct dentry *dentry) +{ + char *name = __getname(); + if (!name) + return NULL; + + return __dentry_name(dentry, name); +} + +static char *inode_name(struct inode *ino) +{ + struct dentry *dentry; + char *name; + + dentry = d_find_alias(ino); + if (!dentry) + return NULL; + + name = dentry_name(dentry); + dput(dentry); + return name; +} + +static int is_task_open_file(struct task_struct *p, struct inode *ino) +{ + struct files_struct *files; + struct file *file; + struct fdtable *fdt; + unsigned int fd; + int found = 0; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 10, 0) + files = p->files; +#else + if (!sym_get_files_struct || !sym_put_files_struct) + return found; + files = sym_get_files_struct(p); +#endif + if (files) { + rcu_read_lock(); + fdt = files_fdtable(files); + fd = find_first_bit(fdt->open_fds, fdt->max_fds); + while (fd < fdt->max_fds) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 10, 0) + file = fcheck_files(files, fd); +#else + if (!sym_fget_task) + break; + file = sym_fget_task(p, fd); +#endif + if (file && (file_inode(file) == ino)) + found = 1; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) + fput(file); +#endif + if (found) + break; + fd = find_next_bit(fdt->open_fds, fdt->max_fds, fd + 1); + } + rcu_read_unlock(); + sym_put_files_struct(files); + } + return found; +} + +static void get_task_info_lsof(struct inode *ino, unsigned int *pid, + char *comm) +{ + struct task_struct *p; + + rcu_read_lock(); + for_each_process(p) { + if (p->flags & PF_KTHREAD) + continue; + rcu_read_unlock(); + get_task_struct(p); + if (is_task_open_file(p, ino)) { + *pid = p->pid; + memcpy(comm, p->comm, sizeof(p->comm)); + put_task_struct(p); + return; + } + put_task_struct(p); + + cond_resched(); + rcu_read_lock(); + } + rcu_read_unlock(); +} + +static int get_inode_filename(struct inode *ino, char *name_buf, + int len) +{ + char *name; + + if (!ino->i_ino) + return -1; + + name = inode_name(ino); + if (name) { + if (strlen(name) + 1 <= len) + strlcpy(name_buf, name, strlen(name) + 1); + else { + strlcpy(name_buf, "...", 4); + strlcpy(name_buf + 3, + name + (strlen(name) + 1 - (len - 3)), + (len - 3)); + } + __putname(name); + return 0; + } + return -1; +} + +static void get_bio_info(struct bio_info *bio_i, struct bio *bio) +{ + struct bio_vec *bvec; + + if (!bio) + return; + + bio_i->bio_addr = (unsigned long)bio; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0) + bio_i->sector = bio->bi_iter.bi_sector; + bio_i->size = bio->bi_iter.bi_size; +#else + bio_i->sector = bio->bi_sector; + bio_i->size = bio->bi_size; +#endif + if (get_bio_file_info()) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) + int i; + bio_for_each_segment_all(bvec, bio, i) { +#else + struct bvec_iter_all iter_all; + bio_for_each_segment_all(bvec, bio, iter_all) { +#endif + struct page *page = bvec->bv_page; + + if (!page) + continue; + if (page->mapping && page->mapping->host) { + if (get_inode_filename(page->mapping->host, bio_i->filename, + sizeof(bio_i->filename))) { + continue; + } + + if (sym_get_files_struct && sym_put_files_struct) + get_task_info_lsof(page->mapping->host, &bio_i->pid, + bio_i->comm); + break; + } + } + } +} + +static void get_rq_info(struct rq_hang_info *rq_hi, struct request *rq) +{ + char op_buf[MAX_OP_NAME_SIZE]; + + rq_hi->data_len = rq->__data_len; + rq_hi->sector = rq->__sector; + strcpy(op_buf, ""); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) + blk_rq_op_name(req_op(rq), op_buf, sizeof(op_buf)); +#else + blk_rq_op_name(rq->cmd_flags, op_buf, sizeof(op_buf)); +#endif + strncpy(rq_hi->op, op_buf, min(strlen(op_buf), sizeof(rq_hi->op) - 1)); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) + strcpy(rq_hi->state, (blk_mq_rq_state_name((test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags) ? + REQ_ATOM_COMPLETE : REQ_ATOM_STARTED)))); +#else + strcpy(rq_hi->state, blk_mq_rq_state_name(READ_ONCE(rq->state))); +#endif + rq_hi->tag = rq->tag; + rq_hi->internal_tag = get_rq_internal_tag(rq); + if (rq->mq_ctx) + rq_hi->cpu = rq->mq_ctx->cpu; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 20, 0) + else + rq_hi->cpu = rq->cpu; +#endif + rq_hi->io_start_ns = rq->start_time_ns; + rq_hi->io_issue_driver_ns = get_issue_driver_ns(rq); +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) + if (rq->rq_disk) + get_disk_name(rq->rq_disk, rq->part ? rq->part->partno : 0, + rq_hi->diskname); +#else + if (rq->q && rq->q->disk) + get_disk_name(rq->q->disk, rq->part ? rq->part->bd_partno : 0, + rq_hi->diskname); +#endif + get_bio_info(&rq_hi->first_bio, rq->bio); +} + +int fill_hang_info_from_rq(struct rq_hang_info *rq_hi, + struct request *rq, int disk_type) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0) + if (!rq || !test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) + return -1; +#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 16, 0) + if (!rq || !refcount_read(&rq->ref)) + return -1; +#endif + get_rq_info(rq_hi, rq); + if (disk_type == DISK_VIRTIO_BLK) + get_vq_info(&rq_hi->vq, rq); + else if (disk_type == DISK_NVME) + get_nvme_info(&rq_hi->nvme, rq); + else if (disk_type == DISK_SCSI) + get_scsi_info(&rq_hi->scsi, rq); + return 0; +} + diff --git a/source/lib/internal/kernel_module/modules/iosdiag/scsi.c b/source/lib/internal/kernel_module/modules/iosdiag/scsi.c new file mode 100644 index 0000000000000000000000000000000000000000..d9288b85a51dd3859c2e7320c7a9008b800fefdf --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/scsi.c @@ -0,0 +1,35 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iosdiag.h" + +void get_scsi_info(struct scsi_info *scsi_i, struct request *rq) +{ + struct scsi_cmnd *cmd; + + if (rq->q->mq_ops) { + scsi_i->is_mq = 1; + cmd = blk_mq_rq_to_pdu(rq); + } +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 0, 0) + else + cmd = rq->special; +#endif + + if (!cmd) + return; +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 15, 0) + scsi_i->done_hander_defined = cmd->scsi_done ? 1 : 0; +#else + scsi_i->done_hander_defined = 1; +#endif +} diff --git a/source/lib/internal/kernel_module/modules/iosdiag/virtio_blk.c b/source/lib/internal/kernel_module/modules/iosdiag/virtio_blk.c new file mode 100644 index 0000000000000000000000000000000000000000..9b856613a08f20458a19a45ae915b3b9fce6d1ae --- /dev/null +++ b/source/lib/internal/kernel_module/modules/iosdiag/virtio_blk.c @@ -0,0 +1,148 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct vring *get_vring_by_vq(struct virtqueue *vq) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 20, 0) + return &to_vvq(vq)->vring; +#else + if (to_vvq(vq)->packed_ring) + return NULL;//(struct vring *)&to_vvq(vq)->packed.vring; + + return (struct vring *)&to_vvq(vq)->split.vring; +#endif +} + +static struct virtqueue *get_virtqueue_by_rq(struct request *rq) +{ + struct virtio_blk *vblk; + int qid; + struct blk_mq_hw_ctx *hctx; + struct virtqueue *vq; + + if (!rq) + return NULL; + + hctx = blk_mq_get_hctx_byrq(rq); + if (!hctx) + return NULL; + qid = hctx->queue_num; + vblk = hctx->queue->queuedata; + if (qid >= vblk->num_vqs) + return NULL; + vq = vblk->vqs[qid].vq; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) + if (!vq || !get_vring_by_vq(vq)) + return NULL; +#endif + return vq; +} + + +static int get_vq_id(struct request *rq) +{ + struct blk_mq_hw_ctx *hctx; + + if (!rq) + return -1; + + hctx = blk_mq_get_hctx_byrq(rq); + if (!hctx) + return -1; + return hctx->queue_num; +} + +static int get_rq_avail_idx(struct request *rq) +{ + int i; + unsigned int loop = 0; + struct vring *vring; + struct virtqueue *vq; + u16 last_used_idx; + u16 current_avail_idx; + int head; + + if (!(vq = get_virtqueue_by_rq(rq))) + return -1; + vring = get_vring_by_vq(vq); + current_avail_idx = vring->avail->idx; + last_used_idx = to_vvq(vq)->last_used_idx; + while (last_used_idx <= current_avail_idx && (loop++) < vring->num) { + i = last_used_idx & (vring->num - 1); + head = virtio16_to_cpu(vq->vdev, vring->avail->ring[i]); + if (head < vring->num) { + if (desc_state_data_to_req(vq, head) == rq) + return last_used_idx; + } else { + return -1; + } + last_used_idx++; + } + return -1; +} + +static int get_rq_used_idx(struct request *rq) +{ + int i; + unsigned int loop = 0; + struct vring *vring; + struct virtqueue *vq; + u16 last_used_idx; + u16 used_idx; + int head; + + if (!(vq = get_virtqueue_by_rq(rq))) + return -1; + vring = get_vring_by_vq(vq); + used_idx = virtio16_to_cpu(vq->vdev, vring->used->idx); + last_used_idx = to_vvq(vq)->last_used_idx; + while (last_used_idx < used_idx && (loop++) < vring->num) { + i = last_used_idx & (vring->num - 1); + head = virtio32_to_cpu(vq->vdev, vring->used->ring[i].id); + if (head < vring->num) { + if (desc_state_data_to_req(vq, head) == rq) + return last_used_idx; + } else { + return -1; + } + last_used_idx++; + } + return -1; +} + +void get_vq_info(struct vq_info *vq_i, struct request *rq) +{ + struct vring *vring; + struct virtqueue *vq; + + if (!(vq = get_virtqueue_by_rq(rq))) + return; + + vring = get_vring_by_vq(vq); + vq_i->qid = get_vq_id(rq); + vq_i->vring_num = vring->num; + vq_i->event = to_vvq(vq)->event ? 1 : 0; + vq_i->last_used_idx = to_vvq(vq)->last_used_idx; + vq_i->used_idx = vring->used->idx; + vq_i->used_ring_flags = vring->used->flags; + if (vq_i->event == 1) + vq_i->last_avail_idx = + *(__virtio16 *)&vring->used->ring[vring->num]; + else + vq_i->last_avail_idx = -1; + vq_i->avail_idx = vring->avail->idx; + vq_i->avail_ring_flags = vring->avail->flags; + vq_i->last_kick_avail_idx = vq_i->avail_idx - to_vvq(vq)->num_added; + vq_i->rq_avail_idx = get_rq_avail_idx(rq); + vq_i->rq_used_idx = get_rq_used_idx(rq); +} + diff --git a/source/lib/internal/kernel_module/modules/memhunter/common.c b/source/lib/internal/kernel_module/modules/memhunter/common.c new file mode 100644 index 0000000000000000000000000000000000000000..f69daaac33f98fea2fcb2b4641f4ce8b14451ce1 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memhunter/common.c @@ -0,0 +1,227 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" + +int prepend(char **buffer, int *buflen, const char *str, int namelen) +{ + *buflen -= namelen; + if (*buflen < 0) + return -1; + *buffer -= namelen; + memcpy(*buffer, str, namelen); + return 0; +} + +static struct mount *inode2mount(struct inode *inode) +{ + struct list_head *pos; + struct mount *mount = NULL; + + if(inode && !_IS_ERR(inode)){ + pos = inode->i_sb->s_mounts.next; + if(pos && !_IS_ERR(pos)){ + mount = container_of(pos, struct mount, mnt_instance); + } + } + return mount; +} + + +static int mnt_has_parent(struct mount *mnt) +{ + return !!(mnt != mnt->mnt_parent); +} + +static struct dentry *__lock_parent(struct dentry *dentry) +{ + struct dentry *parent; + rcu_read_lock(); + spin_unlock(&dentry->d_lock); +again: + parent = READ_ONCE(dentry->d_parent); + spin_lock(&parent->d_lock); + if (unlikely(parent != dentry->d_parent)) { + spin_unlock(&parent->d_lock); + goto again; + } + rcu_read_unlock(); + if (parent != dentry) + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + else + parent = NULL; + return parent; +} + +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *parent = dentry->d_parent; + if (IS_ROOT(dentry)) + return NULL; + if (likely(spin_trylock(&parent->d_lock))) + return parent; + return __lock_parent(dentry); +} + +static inline void __dget_dlock(struct dentry *dentry) +{ + dentry->d_lockref.count++; +} + +static struct dentry *__d_find_alias(struct inode *inode) +{ + struct dentry *alias; + + if (S_ISDIR(inode->i_mode)) + return NULL; + +#ifndef LINUX_310 + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) +#else + hlist_for_each_entry(alias, &inode->i_dentry, d_alias) +#endif + { + spin_lock(&alias->d_lock); + __dget_dlock(alias); + spin_unlock(&alias->d_lock); + return alias; + } + return NULL; +} + +static struct dentry *_d_find_alias(struct inode *inode) +{ + struct dentry *de = NULL; + + if (!hlist_empty(&inode->i_dentry)) { + spin_lock(&inode->i_lock); + de = __d_find_alias(inode); + spin_unlock(&inode->i_lock); + } + return de; +} + + + +static char *dentry_name(struct inode *inode, struct dentry *dentry, char *name, int len) +{ + struct mount *mnt; + struct mount *prev = NULL; + char *p; + char *tmp; + char *end; + int ret = 0; + + tmp = kmalloc(PATH_MAX, GFP_ATOMIC); + if (!tmp) + return NULL; + + end = name; + end[len - 1] = 0; + end = name + len - 1; + + mnt = inode2mount(inode); + do { + memset(tmp, 0, PATH_MAX); + p = dentry_path_raw(dentry, tmp, PATH_MAX); + //pr_err("%s-%d:inode %px mountpoint %px dentry:%px\n", p, strlen(p), inode, mnt->mnt_mountpoint, dentry); + ret = prepend(&end, &len, p, strlen(p)); + if (ret) + pr_err("prepend error\n"); + //pr_err("mnt:%px parent:%px end:%px len %d tmp:%px\n", mnt,mnt->mnt_parent, end, len, tmp); + prev = mnt; + dentry = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; + } while (mnt_has_parent(prev) && (dentry != mnt->mnt_mountpoint)); + + kfree(tmp); + memmove(name, end, strlen(end) + 1); + return name; +} + +int scan_inode_name(struct inode *inode, char *buf, int len, unsigned long *cached, int *deleted) +{ + struct dentry *dt; + struct dentry *parent; + + *cached = inode->i_data.nrpages; + dt = _d_find_alias(inode); + if (!dt) { + *deleted = 1; + return 0; + } + + spin_lock(&inode->i_lock); + spin_lock(&dt->d_lock); + parent = lock_parent(dt); + + dentry_name(inode, dt, buf, len); + *deleted = d_unlinked(dt); + + if (parent) + spin_unlock(&parent->d_lock); + spin_unlock(&dt->d_lock); + spin_unlock(&inode->i_lock); + dput(dt); + + return 0; +} + +void radix_init(struct radix_tree_root *root) +{ + INIT_RADIX_TREE(root, GFP_ATOMIC); +} + +int radix_insert(struct radix_tree_root *root, unsigned long key, void *ptr) +{ + return radix_tree_insert(root, key, ptr); +} + +void *radix_lookup(struct radix_tree_root *root, unsigned long key) +{ + return (void *)radix_tree_lookup(root, key); +} + +int radix_delete(struct radix_tree_root *root, unsigned long key) +{ + return radix_tree_delete(root, key); +} +#undef NR +#define NR (10) +int radix_delete_all(struct radix_tree_root *root, node_free_t free) +{ + int found, i; + unsigned long pos = 0; + struct radix_item *res[NR]; + + do { + found = radix_tree_gang_lookup(root, (void **)res, pos, NR); + for (i = 0; i < found; i++) { + radix_delete(root, res[i]->key); + if (free) + free(res[i]); + } + } while (found > 0); + + return 0; +} diff --git a/source/lib/internal/kernel_module/modules/memhunter/common.h b/source/lib/internal/kernel_module/modules/memhunter/common.h new file mode 100644 index 0000000000000000000000000000000000000000..f111575e91dfaa227091075dfcdd779da5e7c45d --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memhunter/common.h @@ -0,0 +1,133 @@ +#ifndef __RADIX_TREE__ +#define __RADIX_TREE__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 9) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#else +#define LINUX_310 +#include "memcontrol_7.h" +#endif + +#define NAME_LEN (1024) +struct radix_item { + unsigned long key; +}; +struct mount { + struct hlist_node mnt_hash; + struct mount *mnt_parent; + struct dentry *mnt_mountpoint; + struct vfsmount mnt; +#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 14, 0) + union { + struct rcu_head mnt_rcu; + struct llist_node mnt_llist; + }; +#endif +#ifdef CONFIG_SMP + struct mnt_pcp __percpu *mnt_pcp; +#else + int mnt_count; + int mnt_writers; +#endif + struct list_head mnt_mounts; /* list of children, anchored here */ + struct list_head mnt_child; /* and going through their mnt_child */ + struct list_head mnt_instance; /* mount instance on sb->s_mounts */ +}; + +struct inode_item { + struct radix_item node; + struct list_head inode; + unsigned long i_ino; + int nr_pages; + int deleted:4; + int shmem:4; + unsigned long cached; + char *filename; +}; + +struct file_item_list { + struct list_head items_list; + char filename[NAME_LEN]; + unsigned long size; + unsigned long cached; + int deleted; +}; + +struct filecache_result_list { + int num; + char fsname[NAME_LEN]; + struct list_head file_items_list; +}; + +struct file_item { + char filename[NAME_LEN]; + unsigned long size; + unsigned long cached; + int deleted; +}; + +struct filecache_result { + int num; + char fsname[NAME_LEN]; + struct file_item *filecache_items; +}; + +typedef enum _memhunter_type { + MEMHUNTER_CACHE_TYPE_FILE = 1, + MEMHUNTER_CACHE_TYPE_MEMCG_DYING, + MEMHUNTER_CACHE_TYPE_MEMCG_ONE, +} memhunter_type; + +typedef void (*node_free_t)(void *args); + +static inline bool _IS_ERR(const void *ptr) +{ + if((unsigned long)ptr < 0xffff000000000000){ + return 1; + } + return IS_ERR_VALUE((unsigned long)ptr); +} +static inline int _page_is_file_cache(struct page *page) +{ + return !PageSwapBacked(page); +} + +static inline int page_is_shmem(struct page *page) +{ + return !!(!_page_is_file_cache(page) && !PageAnon(page)); +} +int prepend(char **buffer, int *buflen, const char *str, int namelen); +int scan_inode_name(struct inode *inode, char *buf, int len, unsigned long *cached, int *deleted); +void radix_init(struct radix_tree_root *root); +int radix_insert(struct radix_tree_root *root, unsigned long key, void *ptr); +void *radix_lookup(struct radix_tree_root *root, unsigned long key); +int radix_delete(struct radix_tree_root *root, unsigned long key); +int radix_delete_all(struct radix_tree_root *root, node_free_t free); +int filecache_scan(void); +int memcg_dying_scan(void); +int memcg_scan_one(void); +int filecache_main(unsigned long arg); +#ifdef LINUX_310 +enum { + CSS_DYING = 0, /* this CSS is dying*/ +}; +#endif +#endif diff --git a/source/lib/internal/kernel_module/modules/memhunter/filecache.c b/source/lib/internal/kernel_module/modules/memhunter/filecache.c new file mode 100644 index 0000000000000000000000000000000000000000..29639fdb856804d1c22cf4fbc5b27bc5a38d341f --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memhunter/filecache.c @@ -0,0 +1,190 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +static unsigned long totalCache = 0; +static char * fileName = NULL; +struct filecache_result_list result_list; + +static int dump_filecache_result(struct filecache_result __user *result) +{ + struct file_item *tmp, *fnext; + struct filecache_result res; + int count = 0; + int i = 0; + struct file_item_list *items, *tmp2; + int ret = 0; + + if(copy_from_user(&res, result, sizeof(res))) + { + pr_err("can not copy from user %d:%d\n",count,__LINE__); + ret = copy_to_user(result, &count, sizeof(count)); + return 0; + } + if (!res.num || !res.filecache_items) + { + pr_err("num %d ,items %p \n", res.num, res.filecache_items); + ret = copy_to_user(result, &count, sizeof(count)); + return 0; + } + + i = res.num > result_list.num ? result_list.num : res.num; + if(i <= 0) + { + i = copy_to_user(result, &count, sizeof(count)); + return i; + } + if((tmp = vmalloc(sizeof(struct file_item) * i)) == NULL) + { + pr_err("vmalloc error %d:%d\n",count,__LINE__); + ret = copy_to_user(result, &count, sizeof(count)); + return 0; + } + + fnext = tmp; + + list_for_each_entry_safe(items, tmp2, &(result_list.file_items_list), items_list) + { + pr_err("filename:%s size:%lu cached:%lu deleted:%d\n", items->filename, items->size, items->cached, items->deleted); + strcpy(fnext->filename, items->filename); + fnext->size = items->size; + fnext->cached = items->cached; + fnext->deleted = items->deleted; + count += 1; + fnext ++; + if(count >= i) + break; + } + res.num = count; + + i = copy_to_user(result, &count, sizeof(count)); + i = copy_to_user(res.filecache_items, tmp, sizeof(struct file_item) * count); + vfree(tmp); + return i; +} + +static void scan_super_block(struct super_block *sb, void * args) +{ + struct inode *inode, *next; + unsigned long cached; + int deleted; + struct file_item_list *tmp, *tmp2 = NULL; + +#ifdef LINUX_310 + spinlock_t *sb_inode_lock = kallsyms_lookup_name("inode_sb_list_lock"); + spin_lock(sb_inode_lock); +#else + spin_lock(&sb->s_inode_list_lock); +#endif + list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { + if (S_ISREG(inode->i_mode)) { + scan_inode_name(inode, fileName, PATH_MAX, &cached, &deleted); + if (cached*4 > 1024*50) + { + pr_err("filename:%s size:%lu cached:%lu deleted:%d\n", fileName, (unsigned long)inode->i_size, cached, deleted); + tmp2 = vzalloc(sizeof(struct file_item_list)); + if(!tmp2) + { + pr_err("vzalloc error: %d",__LINE__); + break; + } + strncpy(tmp2->filename, fileName,strlen(fileName)); + tmp2->size = (unsigned long)inode->i_size; + tmp2->cached = cached; + tmp2->deleted = deleted; + result_list.num += 1; + list_add_tail(&tmp2->items_list, &(result_list.file_items_list)); + } + } + } +#ifdef LINUX_310 + spin_unlock(sb_inode_lock); +#else + spin_unlock(&sb->s_inode_list_lock); +#endif +} + +static int scan_filesystem_type(char * fs) +{ + struct file_system_type * file_system; + + file_system = get_fs_type(fs); + if (!file_system) + return 0; + + iterate_supers_type(file_system, scan_super_block, NULL); + module_put(file_system->owner); + + return 0; +} + +int filecache_init(void) +{ + fileName = __getname(); + if(!fileName) + return 0; + result_list.num = 0; + INIT_LIST_HEAD(&(result_list.file_items_list)); + return 0; +} + +void filecache_exit(void) +{ + struct file_item_list *files_all, *tmp; + if (fileName) + __putname(fileName); + if(result_list.num) + { + list_for_each_entry_safe(files_all, tmp, &(result_list.file_items_list), items_list) + vfree(files_all); + } +} + +int filecache_scan(void) +{ + filecache_init(); + scan_filesystem_type("ext4"); + pr_err("total file cached %lu\n",totalCache); + totalCache = 0; + scan_filesystem_type("tmpfs"); + pr_err("total tmpfs %lu\n",totalCache); + filecache_exit(); + return 0; +} +int filecache_main(unsigned long arg) +{ + struct filecache_result *res = (struct filecache_result*)arg; + int ret = 0; + char fsname[NAME_LEN]; + memset(fsname, 0, sizeof(fsname)); + ret = copy_from_user(fsname, res->fsname, NAME_LEN); + pr_err("fsname:%s\n",fsname); + filecache_init(); + scan_filesystem_type(fsname); + pr_err("total file cached %lu\n",totalCache); + dump_filecache_result(res); + return 0; +} diff --git a/source/lib/internal/kernel_module/modules/memhunter/memcg.c b/source/lib/internal/kernel_module/modules/memhunter/memcg.c new file mode 100644 index 0000000000000000000000000000000000000000..b90043d7acc84cc3e71e031e8689bf7e5463c68b --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memhunter/memcg.c @@ -0,0 +1,513 @@ +#include +#include +#include +#include +#include +#include "memcg.h" + +static struct mem_cgroup *(* _mem_cgroup_iter)(struct mem_cgroup *,struct mem_cgroup *,struct mem_cgroup_reclaim_cookie *); +static void (*_mem_cgroup_iter_break)(struct mem_cgroup*, struct mem_cgroup*); +static struct zone *(*_next_zone)(struct zone *zone); +static struct address_space *(*_page_mapping)(struct page *page); +struct pglist_data *(*_first_online_pgdat)(void); + +#define for_each_mem_cgroup(iter, start) \ + for (iter = _mem_cgroup_iter(start, NULL, NULL); \ + iter != NULL; \ + iter = _mem_cgroup_iter(start, iter, NULL)) +#define _for_each_zone(zone) \ + for (zone = (_first_online_pgdat())->node_zones; \ + zone; \ + zone = _next_zone(zone)) + +static void free_item(void *args) +{ + struct inode_item *item = (struct inode_item *)args; + kfree(item->filename); + kfree(item); +} + +static void memcg_free_item(struct memcg_item *cgitem) +{ + radix_delete_all(&cgitem->inode_root, free_item); +} + + +static int get_page_inode(struct memcg_item *cgitem, struct page *page) +{ + struct address_space *mapping; + struct inode *inode; + struct inode_item *item; + unsigned long cached; + int deleted; + int ret = 0; + char *fileName; + + fileName = kzalloc(PATH_MAX, GFP_ATOMIC); + if (!fileName) + return 0; + get_page(page); + if (PageAnon(page)) { + cgitem->anon++; + goto _skip; + } + if (!_page_is_file_cache(page)) + cgitem->shmem++; + else + cgitem->file++; +#ifdef LINUX_310 + mapping = _page_mapping(page); +#else + mapping = page_mapping(page); +#endif + if (!mapping) + goto _skip; + inode = mapping->host; + if (!inode) + goto _skip; + + item = radix_lookup(&cgitem->inode_root, (unsigned long)inode); + if (item) { + item->nr_pages++; + goto _skip; + } + scan_inode_name(inode, fileName, PATH_MAX, &cached, &deleted); + if (!cached) + goto _skip; + + item = kzalloc(sizeof(*item), GFP_ATOMIC); + if (!item) + goto _skip; + item->i_ino = inode->i_ino; + item->nr_pages = 1; + item->filename = kmemdup(fileName, strlen(fileName) + 1, GFP_ATOMIC); + item->cached = cached; + item->deleted = deleted; + item->shmem = page_is_shmem(page); + item->node.key = (unsigned long)inode; + list_add_tail(&item->inode, &cgitem->head); + ret = radix_insert(&cgitem->inode_root, (unsigned long)inode, (void*)item); + if(ret) + pr_info("insert file:%s error\n", item->filename); + cgitem->num_file++; +_skip: + kfree(fileName); + put_page(page); + return 0; +} + + +static void get_lru_page(struct memcg_item *item, struct lruvec *vec) +{ + struct page *page, *tmp; + enum lru_list lru; + unsigned long flags; + +#ifdef LINUX_310 + struct zone *lruzone; + lruzone = vec->zone; + if (!lruzone) { + pr_err("lru zone error for memcg:%px cg:%s\n", item->memcg, item->cgname); + return; + } + if(lruzone) + spin_lock_irqsave(&lruzone->lru_lock, flags); +#else + struct pglist_data *pgdat; + pgdat = vec->pgdat; + if (!pgdat) { + pr_err("lru pgdata error for memcg:%px cg:%s\n", item->memcg, item->cgname); + return; + } + if(pgdat) + spin_lock_irqsave(&pgdat->lru_lock, flags); +#endif + for_each_lru(lru) { + struct list_head *list = &vec->lists[lru]; + list_for_each_entry_safe(page, tmp, list, lru) { + get_page_inode(item, page); + } + } +#ifdef LINUX_310 + if (lruzone) + spin_unlock_irqrestore(&lruzone->lru_lock, flags); +#else + if (pgdat) + spin_unlock_irqrestore(&pgdat->lru_lock, flags); +#endif +} + +static void get_memcg_page(struct memcg_item *item) +{ +#ifdef LINUX_310 + struct mem_cgroup_per_zone *mz; + struct zone *z; + _for_each_zone(z) { + if((unsigned)zone_to_nid(z) >= nr_node_ids) + continue; + mz = &item->memcg->info.nodeinfo[zone_to_nid(z)]->zoneinfo[zone_idx(z)]; + get_lru_page(item, &mz->lruvec); + } +#else + struct mem_cgroup_per_node *mz; + int nid; + for_each_node(nid) { + mz = mem_cgroup_nodeinfo(item->memcg, nid); + get_lru_page(item, &mz->lruvec); + } +#endif +} + +static void memcg_get_name(struct mem_cgroup *memcg, char *name, unsigned int len) +{ + char *end; + int pos; + struct cgroup *cg = memcg->css.cgroup; +#ifdef LINUX_310 + if (!cg) + return; + rcu_read_lock(); + cgroup_path(cg, name, PATH_MAX); + rcu_read_unlock(); + end = name+strlen("/sys/fs/cgroup/memory"); + memmove(end, name, strlen(name)+1); + prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory")); + pr_err("cg:name: %s, len:%d\n",name,strlen(name)); +#else + struct kernfs_node *kn; + struct kernfs_node *pkn; + if (!cg|| !cg->kn) + return; + kn = cg->kn; + + kernfs_get(kn); + end = name + len - 1; + prepend(&end, &len, "\0", 1); + pkn = kn; + while (pkn) { + pos = prepend(&end, &len, pkn->name, strlen(pkn->name)); + if (pos) + break; + if ((pkn == pkn->parent) || !pkn->parent) + break; + pos = prepend(&end, &len, "/", 1); + if (pos) + break; + pkn = pkn->parent; + } + + prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory")); + + kernfs_put(kn); + memmove(name, end, strlen(end) + 1); +#endif +} + +static struct memcg_item *memcg_init_item(struct mem_cgroup *cg) +{ + struct memcg_item *item = NULL; + char *fileName; + + fileName = kzalloc(PATH_MAX, GFP_ATOMIC); + if (!fileName) + return NULL; + item = kzalloc(sizeof(*item), GFP_ATOMIC); + if (!item) { + goto _out; + } + memcg_get_name(cg, fileName, PATH_MAX); + item->memcg = cg; +#ifdef LINUX_310 + item->size = cg->res.usage; + item->cgname = kmemdup(fileName, strlen(fileName) + 2, GFP_ATOMIC); + if(item->cgname) + item->cgname[strlen(fileName) + 1] = '\0'; +#else + item->size = page_counter_read(&cg->memory); + item->cgname = kmemdup_nul(fileName, strlen(fileName) + 1, GFP_ATOMIC); +#endif + INIT_LIST_HEAD(&item->head); + INIT_LIST_HEAD(&item->offline); + radix_init(&item->inode_root); +_out: + kfree(fileName); + return item; +} + +int memcg_dump_to_user(struct memcg_info *info, struct memcg_info_user __user *result) +{ + struct memcg_item *cgitem; + struct inode_item *item; + struct memcg_info_user res; + struct memcg_item_user *cgitem_u, *tmp, *user_head; + struct memcg_item_user tmp_memcg; + struct inode_item_user *tmp2, *free_tmp; + int count_m = 0, i = 0, j = 0, count_i; + int ret = 0; + + if(copy_from_user(&res, result, sizeof(res))) + { + pr_err("can not copy from user %d:%d\n",count_m,__LINE__); + ret = copy_to_user(result, &count_m, sizeof(count_m)); + return 0; + } + + if (!res.nr || !res.items) + { + pr_err("num %d ,items %p \n", res.nr, res.items); + ret = copy_to_user(result, &count_m, sizeof(count_m)); + return 0; + } + + i = res.nr > info->nr ? info->nr : res.nr; + if(i == 0) + { + ret = copy_to_user(result, &count_m, sizeof(count_m)); + return 0; + } + if((tmp = vzalloc(sizeof(struct memcg_item_user) * i)) == NULL) + { + pr_err("vmalloc error %d:%d\n",count_m,__LINE__); + ret = copy_to_user(result, &count_m, sizeof(count_m)); + return 0; + } + + cgitem_u = res.items; + user_head = tmp; + + list_for_each_entry(cgitem, &info->head, offline) { + if(count_m >= i) + break; + pr_err("cg:%s memory:%lu file:%lu anon:%lu shmem:%lu num_file:%d\n", cgitem->cgname, cgitem->size, cgitem->file, cgitem->anon, cgitem->shmem, cgitem->num_file); + strncpy(tmp->cgname, cgitem->cgname,strlen(cgitem->cgname)); + tmp->size = cgitem->size; + tmp->file = cgitem->file; + tmp->anon = cgitem->anon; + tmp->shmem = cgitem->shmem; + tmp->num_file = cgitem->num_file; + + j = INODE_LIMIT > tmp->num_file ? tmp->num_file : INODE_LIMIT; + if(j <= 0) + { + if(copy_from_user(&tmp_memcg, cgitem_u, sizeof(tmp_memcg))) + { + pr_err("can not copy from user %d:%d\n",count_m,__LINE__); + ret = copy_to_user(result, &count_m, sizeof(count_m)); + vfree(tmp->inode_items); + vfree(user_head); + return 0; + } + tmp->inode_items = tmp_memcg.inode_items; + copy_to_user(cgitem_u, tmp, sizeof(struct memcg_item_user)); + tmp++; + cgitem_u++; + count_m++; + continue; + } + if((tmp2 = vzalloc(sizeof(struct inode_item_user) * j)) == NULL) + { + pr_err("vmalloc error %d:%d\n",count_m,__LINE__); + ret = copy_to_user(result, &count_m, sizeof(count_m)); + vfree(user_head); + return 0; + } + free_tmp = tmp2; + count_i = 0; + + list_for_each_entry(item, &cgitem->head,inode) { + if(count_i >= j) + break; + pr_err("ino:%lu, filename:%s cached:%lu nr_pages:%d deleted:%d shmem:%d\n", item->i_ino, item->filename, item->cached, item->nr_pages, item->deleted, item->shmem); + tmp2->i_ino = item->i_ino; + strcpy(tmp2->filename, item->filename); + tmp2->cached = item->cached; + tmp2->nr_pages = item->nr_pages; + tmp2->deleted = item->deleted; + tmp2->shmem = item->shmem; + tmp2++; + count_i++; + } + if(copy_from_user(&tmp_memcg, cgitem_u, sizeof(tmp_memcg))) + { + pr_err("can not copy from user %d:%d\n",count_m,__LINE__); + ret = copy_to_user(result, &count_m, sizeof(count_m)); + vfree(tmp->inode_items); + vfree(user_head); + return 0; + } + copy_to_user(tmp_memcg.inode_items, free_tmp, sizeof(struct inode_item_user) * j); + tmp->inode_items = tmp_memcg.inode_items; + copy_to_user(cgitem_u, tmp, sizeof(struct memcg_item_user)); + vfree(free_tmp); + tmp++; + cgitem_u++; + count_m++; + } + i = copy_to_user(result, &count_m, sizeof(count_m)); + vfree(user_head); + return i; + +} + +void memcg_dump(struct memcg_info *info) +{ + struct memcg_item *cgitem; + struct inode_item *item; + int a = 0; + list_for_each_entry(cgitem, &info->head, offline) { + pr_err("cg:%s memory:%lu file:%lu anon:%lu shmem:%lu num_file:%d\n", cgitem->cgname, cgitem->size, cgitem->file, cgitem->anon, cgitem->shmem, cgitem->num_file); + list_for_each_entry(item, &cgitem->head,inode) { + pr_err("ino:%lu, filename:%s cached:%lu nr_pages:%d deleted:%d shmem:%d\n", item->i_ino, item->filename, item->cached, item->nr_pages, item->deleted, item->shmem); + a = 1; + } + } +} + +void memcg_free_all(struct memcg_info *info) +{ + struct memcg_item *cgitem; + struct memcg_item *tmp; + + list_for_each_entry_safe(cgitem, tmp, &info->head, offline) { + memcg_free_item(cgitem); + kfree(cgitem->cgname); + kfree(cgitem); + } +} + +static int memcg_init(void) +{ + + if (_mem_cgroup_iter && _mem_cgroup_iter_break) + return 0; + + _mem_cgroup_iter = kallsyms_lookup_name("mem_cgroup_iter"); + if (!_mem_cgroup_iter) { + pr_err("lookup mem cgroup iter error\n"); + return -1; + } + + _mem_cgroup_iter_break = kallsyms_lookup_name("mem_cgroup_iter_break"); + if (!_mem_cgroup_iter_break) { + pr_err("lookup iter break error\n"); + return -1; + } + _next_zone = kallsyms_lookup_name("next_zone"); + if (!_next_zone) { + pr_err("next_zone error\n"); + return -1; + } + + _first_online_pgdat = kallsyms_lookup_name("first_online_pgdat"); + if (!_first_online_pgdat) { + pr_err("first_online_pgdat error\n"); + return -1; + } + _page_mapping = kallsyms_lookup_name("page_mapping"); + if (!_page_mapping) { + pr_err("page_mapping error\n"); + return -1; + } + + return 0; +} + +int memcg_scan(struct memcg_info *info, struct mem_cgroup *start, int offline) +{ + struct mem_cgroup *iter = NULL; + struct memcg_item *item; + + if (memcg_init()) + return -1; + + for_each_mem_cgroup(iter, start) { +#ifdef LINUX_310 + if (offline && !(iter->css.flags&CSS_DYING)) +#else + if (offline && (iter->css.flags&CSS_ONLINE)) +#endif + continue; + item = memcg_init_item(iter); + if (!item) + continue; + get_memcg_page(item); + info->nr++; + list_add_tail(&item->offline, &info->head); + } + return 0; +} + +struct mem_cgroup *memcg_get_by_name(char *cgname) +{ + struct mem_cgroup *iter = NULL; + char *fileName; + + if (memcg_init()) + return 0; + fileName = kzalloc(PATH_MAX, GFP_ATOMIC); + if (!fileName) { + pr_err("alloc memory failed:%pF\n", __FUNCTION__); + return 0; + } + + for_each_mem_cgroup(iter, NULL) { + memcg_get_name(iter, fileName, PATH_MAX); + if (!strncmp(fileName, cgname, strlen(cgname))) { + pr_err("filename:%s, cgname:%s\n",fileName, cgname); + _mem_cgroup_iter_break(NULL, iter); + break; + } + } + + kfree(fileName); + return iter; +} +int memcg_dying_main(unsigned long arg) +{ + pr_err("try_to dump to user\n"); + struct memcg_info info; + struct mem_cgroup *memcg = NULL; + info.nr = 0; + char cgname_u[NAME_LEN]; + struct memcg_info_user *res = (struct memcg_info_user*)arg; + + memset(cgname_u, 0, sizeof(cgname_u)); + copy_from_user(cgname_u, res->cgname, NAME_LEN); + if(strlen(cgname_u) > 0) + { + if(cgname_u[strlen(cgname_u)-1] == '/') + cgname_u[strlen(cgname_u)-1] = '\0'; + pr_err("cgname:%s\n",cgname_u); + memcg = memcg_get_by_name(cgname_u); + } + + INIT_LIST_HEAD(&info.head); + memcg_scan(&info, memcg, 1); + memcg_dump_to_user(&info, (struct memcg_info_user*)arg); + memcg_free_all(&info); + + return 0; +} +int memcg_one_main(unsigned long arg) +{ + char *cgname = "/sys/fs/cgroup/memory/agent"; + struct mem_cgroup *memcg = NULL; + struct memcg_info info; + char cgname_u[NAME_LEN]; + struct memcg_info_user *res = (struct memcg_info_user*)arg; + info.nr = 0; + + memset(cgname_u, 0, sizeof(cgname_u)); + copy_from_user(cgname_u, res->cgname, NAME_LEN); + pr_err("cgname:%s\n",cgname_u); + if(strlen(cgname_u) > 0) + if(cgname_u[strlen(cgname_u)-1] == '/') + cgname_u[strlen(cgname_u)-1] = '\0'; + INIT_LIST_HEAD(&info.head); + memcg = memcg_get_by_name(cgname_u); + if(memcg != NULL) + memcg_scan(&info, memcg, 0); + memcg_dump_to_user(&info, (struct memcg_info_user*)arg); + memcg_free_all(&info); + return 0; +} diff --git a/source/lib/internal/kernel_module/modules/memhunter/memcg.h b/source/lib/internal/kernel_module/modules/memhunter/memcg.h new file mode 100644 index 0000000000000000000000000000000000000000..88d85a9f34a6f04114ccc4c0b4252a306ed63f04 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memhunter/memcg.h @@ -0,0 +1,54 @@ +#ifndef __MEMCG_FILE__ +#define __MEMCG_FILE__ +#include +#include "common.h" + +#define INODE_LIMIT (50) +struct memcg_info { + int nr ;/* number of memcg for dying*/ + struct list_head head; /* memcg offline*/ +}; + +struct memcg_item { + struct radix_tree_root inode_root; + struct list_head head; + struct list_head offline; + unsigned long anon; + unsigned long shmem; + unsigned long file; + unsigned long size; + int num_file; + struct mem_cgroup *memcg; + char *cgname; +}; + +struct inode_item_user { + unsigned long i_ino; + int nr_pages; + int deleted:4; + int shmem:4; + unsigned long cached; + char filename[NAME_LEN]; +}; +struct memcg_item_user { + struct inode_item_user *inode_items; + unsigned long anon; + unsigned long shmem; + unsigned long file; + unsigned long size; + int num_file; + char cgname[NAME_LEN]; +}; + +struct memcg_info_user { + int nr ;/* number of memcg for dying*/ + struct memcg_item_user* items; + char cgname[NAME_LEN]; +}; +void memcg_dump(struct memcg_info *info); +void memcg_free_all(struct memcg_info *info); +int memcg_scan(struct memcg_info *info, struct mem_cgroup *start, int offline); +int memcg_dying_main(unsigned long arg); +int memcg_one_main(unsigned long arg); +struct mem_cgroup *memcg_get_by_name(char *cgname); +#endif diff --git a/source/lib/internal/kernel_module/modules/memhunter/memcg_dia.c b/source/lib/internal/kernel_module/modules/memhunter/memcg_dia.c new file mode 100644 index 0000000000000000000000000000000000000000..24a5cc86e9640877d77cb8027db2927fe16c1a55 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memhunter/memcg_dia.c @@ -0,0 +1,30 @@ +#include +#include "common.h" +#include "memcg.h" + +int memcg_dying_scan(void) +{ + int offline = 1; + struct memcg_info info; + + INIT_LIST_HEAD(&info.head); + memcg_scan(&info, NULL, offline); + memcg_dump(&info); + memcg_free_all(&info); + + return 0; +} + +int memcg_scan_one(void) +{ + char *cgname = "/sys/fs/cgroup/memory/agent"; + struct mem_cgroup *memcg; + struct memcg_info info; + + INIT_LIST_HEAD(&info.head); + memcg = memcg_get_by_name(cgname); + memcg_scan(&info, memcg, 0); + memcg_dump(&info); + memcg_free_all(&info); + return 0; +} diff --git a/source/lib/internal/kernel_module/modules/memhunter/memcontrol_7.h b/source/lib/internal/kernel_module/modules/memhunter/memcontrol_7.h new file mode 100644 index 0000000000000000000000000000000000000000..1ac50bd636c01d190a9864e04f0a8511c62f5dd3 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memhunter/memcontrol_7.h @@ -0,0 +1,284 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct mem_cgroup_lru_info; +enum drt_count_t { + DRT_0_50, + DRT_50_100, + DRT_100_200, + DRT_200_500, + DRT_500_1k, + DRT_1k_5k, + DRT_5k_10k, + DRT_10k_100k, + DRT_100k_INF, + DRT_COUNT, +}; +struct eventfd_ctx { + struct kref kref; + wait_queue_head_t wqh; + /* + * Every time that a write(2) is performed on an eventfd, the + * value of the __u64 being written is added to "count" and a + * wakeup is performed on "wqh". A read(2) will return the "count" + * value to userspace, and will reset "count" to zero. The kernel + * side eventfd_signal() also, adds to the "count" counter and + * issue a wakeup. + */ + __u64 count; + unsigned int flags; +}; +struct mem_cgroup_threshold { + struct eventfd_ctx *eventfd; + u64 threshold; +}; + +/* For threshold */ +struct mem_cgroup_threshold_ary { + /* An array index points to threshold just below or equal to usage. */ + int current_threshold; + /* Size of entries[] */ + unsigned int size; + /* Array of thresholds */ + struct mem_cgroup_threshold entries[0]; +}; + +struct mem_cgroup_thresholds { + /* Primary thresholds array */ + struct mem_cgroup_threshold_ary *primary; + /* + * * Spare threshold array. + * * This is needed to make mem_cgroup_unregister_event() "never fail". + * * It must be able to store at least primary->size - 1 entries. + * */ + struct mem_cgroup_threshold_ary *spare; +}; + +struct mem_cgroup_reclaim_iter { + /* + * last scanned hierarchy member. Valid only if last_dead_count + * matches memcg->dead_count of the hierarchy root group. + */ + struct mem_cgroup *last_visited; + unsigned long last_dead_count; + + /* scan generation, increased every round-trip */ + unsigned int generation; +}; +struct mem_cgroup_per_zone { + struct lruvec lruvec; + unsigned long lru_size[NR_LRU_LISTS]; + + struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; + + struct rb_node tree_node; /* RB tree node */ + unsigned long long usage_in_excess;/* Set to the value by which */ + /* the soft limit is exceeded*/ + bool on_tree; + bool writeback; /* memcg kswapd reclaim writeback */ + bool dirty; /* memcg kswapd reclaim dirty */ + bool congested; /* memcg has many dirty pages */ + /* backed by a congested BDI */ + struct mem_cgroup *memcg; /* Back pointer, we cannot */ + /* use container_of */ + + unsigned long pages_scanned; /* since last reclaim */ + bool all_unreclaimable; /* All pages pinned */ +}; + +struct mem_cgroup_per_node { + struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; +}; +struct mem_cgroup_lru_info { + struct mem_cgroup_per_node *nodeinfo[0]; +}; +struct mem_cgroup { + struct cgroup_subsys_state css; + /* + ┆* the counter to account for memory usage + ┆*/ + struct res_counter res; + +#ifdef CONFIG_MEM_DELAY + /* Memory delay measurement domain */ + struct memdelay_domain *memdelay_domain; +#endif + + /* vmpressure notifications */ + struct vmpressure vmpressure; + + union { + /* + ┆* the counter to account for mem+swap usage. + ┆*/ + struct res_counter memsw; + + /* + ┆* rcu_freeing is used only when freeing struct mem_cgroup, + ┆* so put it into a union to avoid wasting more memory. + ┆* It must be disjoint from the css field. It could be + ┆* in a union with the res field, but res plays a much + ┆* larger part in mem_cgroup life than memsw, and might + ┆* be of interest, even at time of free, when debugging. + ┆* So share rcu_head with the less interesting memsw. + ┆*/ + struct rcu_head rcu_freeing; + /* + ┆* We also need some space for a worker in deferred freeing. + ┆* By the time we call it, rcu_freeing is no longer in use. + ┆*/ + struct work_struct work_freeing; + }; + + /* + ┆* the counter to account for kernel memory usage. + ┆*/ + struct res_counter kmem; + /* + ┆* Should the accounting and control be hierarchical, per subtree? + ┆*/ + bool use_hierarchy; + unsigned long kmem_account_flags; /* See KMEM_ACCOUNTED_*, below */ + + int oom_kill; + bool oom_lock; + atomic_t under_oom; + atomic_t oom_wakeups; + + atomic_t refcnt; + + int swappiness; + + int priority; + + bool oom_kill_all; + bool use_priority_oom; + /* OOM-Killer disable */ + int oom_kill_disable; + + /* set when res.limit == memsw.limit */ + bool memsw_is_minimum; + + /* protect arrays of thresholds */ + struct mutex thresholds_lock; + + /* thresholds for memory usage. RCU-protected */ + struct mem_cgroup_thresholds thresholds; + + /* thresholds for mem+swap usage. RCU-protected */ + struct mem_cgroup_thresholds memsw_thresholds; + + /* For oom notifier event fd */ + struct list_head oom_notify; + +#ifdef CONFIG_CGROUP_WRITEBACK + struct list_head cgwb_list; + struct wb_domain cgwb_domain; +#endif + + /* + ┆* Should we move charges of a task when a task is moved into this + ┆* mem_cgroup ? And what type of charges should we move ? + ┆*/ + unsigned long move_charge_at_immigrate; + /* + ┆* set > 0 if pages under this cgroup are moving to other cgroup. + ┆*/ + atomic_t moving_account; + /* taken only while moving_account > 0 */ + spinlock_t move_lock; + struct task_struct *move_lock_task; + unsigned long move_lock_flags; + /* + ┆* percpu counter. + ┆*/ + struct mem_cgroup_stat_cpu __percpu *stat; + spinlock_t pcp_counter_lock; + +#ifdef CONFIG_CGROUP_WRITEBACK + int dirty_ratio; + int dirty_bg_ratio; +#endif + atomic_t wmark_ratio; + atomic64_t wmark_extra; + atomic_t force_empty_ctl; + + bool kswapd_stop; /* Protected by kswapds_spinlock */ + struct mutex kswapd_mutex; + wait_queue_head_t *kswapd_wait; + + atomic_t dead_count; +#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) + struct tcp_memcontrol tcp_mem; +#endif +#if defined(CONFIG_MEMCG_KMEM) + /* analogous to slab_common's slab_caches list. per-memcg */ + struct list_head memcg_slab_caches; + /* Not a spinlock, we can take a lot of time walking the list */ + struct mutex slab_caches_mutex; + /* Index in the kmem_cache->memcg_params->memcg_caches array */ + int kmemcg_id; +#endif + + int last_scanned_node; +#if MAX_NUMNODES > 1 + nodemask_t scan_nodes; + atomic_t numainfo_events; + atomic_t numainfo_updating; +#endif + u64 direct_reclaim_time_count[DRT_COUNT]; + spinlock_t direct_reclaim_time_count_lock; + + u64 direct_reclaim_sched_time_histogram + [DRSTH_COUNT][DRSTH_TYPE_COUNT]; + spinlock_t direct_reclaim_sched_time_histogram_lock; + +#ifdef CONFIG_KIDLED + struct rw_semaphore idle_stats_rwsem; + unsigned long idle_scans; + struct kidled_scan_period scan_period; + int idle_stable_idx; + struct idle_page_stats idle_stats[KIDLED_STATS_NR_TYPE]; +#endif + + /* + * Per cgroup active and inactive list, similar to the + * per zone LRU lists. + * + * WARNING: This has to be the last element of the struct. Don't + * add new fields after this point. + */ + struct mem_cgroup_lru_info info; +}; diff --git a/source/lib/internal/kernel_module/modules/memhunter/memhunter.c b/source/lib/internal/kernel_module/modules/memhunter/memhunter.c new file mode 100644 index 0000000000000000000000000000000000000000..04b057b7a783a69d09d605e56a57d0616a9b9b67 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memhunter/memhunter.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include +#include +#include +#include "common.h" +#include "memcg.h" + +#define CHR_NAME "memhunter" +static DEFINE_MUTEX(dev_mutex); +static int memhunter_dev_major = -1; +static struct class *memhunter_dev_class = NULL; +static struct device *memhunter_dev = NULL; + +int memhunter_handler_cmd(unsigned int cmd, unsigned long arg) +{ + int ret = -EINVAL; + int type, nr; + printk("debug -- 1\n"); + + if (!mutex_trylock(&dev_mutex)) + return -EBUSY; + + type = _IOC_TYPE(cmd); + nr = _IOC_NR(cmd); + printk("type: %d\n", nr); + switch (nr) { + case MEMHUNTER_CACHE_TYPE_FILE: + ret = filecache_main(arg); + break; + case MEMHUNTER_CACHE_TYPE_MEMCG_DYING: + printk("dying\n"); + ret = memcg_dying_main(arg); + break; + case MEMHUNTER_CACHE_TYPE_MEMCG_ONE: + ret = memcg_one_main(arg); + break; + default: + printk("defualt ioctl cmd =%d, nr = %d\n", type, nr); + break; + } + + mutex_unlock(&dev_mutex); + return ret; +} + +static __init int memhunter_init(void) +{ + return 0; +} + +static __exit void memhunter_exit(void) +{ + return ; +} + diff --git a/source/lib/internal/kernel_module/modules/memleak/hashlist.c b/source/lib/internal/kernel_module/modules/memleak/hashlist.c new file mode 100755 index 0000000000000000000000000000000000000000..ff7b587edf05d7e4480838710913d2e6bc2a1903 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memleak/hashlist.c @@ -0,0 +1,397 @@ +#include +#include "mem.h" +#include + +static DEFINE_PER_CPU(int, nest_count); + +static inline u32 ptr_hash(const void *ptr) +{ + return jhash((void *)&ptr, sizeof(ptr), 0); +} + +static inline struct bucket *__select_bucket(struct memleak_htab *htab, u32 hash) +{ + return &htab->buckets[hash & (htab->n_buckets - 1)]; +} + +static inline struct list_head *select_bucket(struct memleak_htab *htab, u32 hash) +{ + return &__select_bucket(htab, hash)->head; +} + +static inline int _get_cpu(void) +{ + int cpu = 0; + + preempt_disable(); + cpu = smp_processor_id(); + preempt_enable(); + + return cpu; +} + + void *internal_alloc(size_t size, gfp_t flags) +{ + void *ret; + int cpu = 0; + cpu = _get_cpu(); + per_cpu(nest_count, cpu) += 1; + ret = kmalloc(size, flags); + per_cpu(nest_count, cpu) -= 1; + + return ret; +} + + void internal_kfree(void *addr) +{ + int cpu = _get_cpu(); + + per_cpu(nest_count, cpu) += 1; + kfree(addr); + per_cpu(nest_count, cpu) -= 1; +} + +int memleak_entry_reentrant(void) +{ + int cpu = _get_cpu(); + per_cpu(nest_count, cpu) += 1; + return per_cpu(nest_count, cpu) > 1; +} + +void memleak_exit_reentrant(void) +{ + int cpu = _get_cpu(); + + per_cpu(nest_count, cpu) -= 1; +} + +int memleak_hashlist_init(struct memleak_htab *htab) +{ + int i = 0; + int size; + struct alloc_desc *desc; + + htab->buckets = internal_alloc(htab->n_buckets * sizeof(struct bucket), GFP_KERNEL); + if (!htab->buckets) { + return -ENOMEM; + } + + memset(htab->buckets, 0, htab->n_buckets * sizeof(struct bucket)); + + INIT_LIST_HEAD(&htab->freelist); + + for (i = 0; i < htab->n_buckets; i++) { + INIT_LIST_HEAD(&htab->buckets[i].head); + spin_lock_init(&htab->buckets[i].lock); + + } + + htab->free = 0; + + size = sizeof(struct alloc_desc) + sizeof(u64) * htab->stack_deep; + /*prealloc one by one */ + for (i = 0; i < htab->total; i++) { + desc = internal_alloc(size, GFP_KERNEL | __GFP_ZERO); + if (desc) { + desc->num = htab->stack_deep; + list_add(&desc->node, &htab->freelist); + htab->free++; + } + } + + return 0; +} + +struct alloc_desc * memleak_alloc_desc(struct memleak_htab *htab) +{ + struct alloc_desc *desc; + unsigned long flags; + int size = sizeof(struct alloc_desc) + sizeof(u64) * htab->stack_deep; + + if (!htab->set.ext) + htab->stack_deep = 0; + + if (!htab->free) { + desc = internal_alloc(size, GFP_ATOMIC | __GFP_ZERO); + if (desc) + desc->num = htab->stack_deep; + return desc; + } + spin_lock_irqsave(&htab->lock, flags); + + desc = list_first_entry_or_null(&htab->freelist, struct alloc_desc, node); + if (desc) { + htab->free--; + desc->num = htab->stack_deep; + list_del_init(&desc->node); + } + + spin_unlock_irqrestore(&htab->lock, flags); + + return desc; +} + +int memleak_free_desc(struct memleak_htab *htab, struct alloc_desc *desc) +{ + unsigned long flags; + + if (!desc) + return 0; + + if (htab->free >= htab->total) { + + internal_kfree(desc); + return 0; + } + + spin_lock_irqsave(&htab->lock, flags); + + memset(desc, 0, sizeof(*desc)); + list_add(&desc->node, &htab->freelist); + htab->free++; + + spin_unlock_irqrestore(&htab->lock, flags); + + return 0; +} + +int memleak_insert_desc(struct memleak_htab *htab, struct alloc_desc *desc) +{ + unsigned long flags; + struct bucket *bucket; + + if (!desc || !desc->ptr) + return 0; + + desc->hash = ptr_hash(desc->ptr); + + bucket = __select_bucket(htab, desc->hash); + + spin_lock_irqsave(&bucket->lock, flags); + + list_add(&desc->node, &bucket->head); + bucket->nr++; + atomic_add(1, &htab->count); + spin_unlock_irqrestore(&bucket->lock,flags); + + return 0; +} + +struct alloc_desc * memleak_del_desc(struct memleak_htab *htab, const void *ptr) +{ + unsigned long flags; + struct bucket *bucket; + struct alloc_desc *tmp1, *tmp2; + struct alloc_desc *desc = NULL; + u32 hash; + + if (!ptr) + return NULL; + + hash = ptr_hash(ptr); + bucket = __select_bucket(htab, hash); + + spin_lock_irqsave(&bucket->lock, flags); + + list_for_each_entry_safe(tmp1, tmp2, &bucket->head, node) { + if (tmp1->ptr == ptr && (tmp1->hash == hash)) { + list_del_init(&tmp1->node); + desc = tmp1; + bucket->nr--; + atomic_sub(1, &htab->count); + break; + } + } + + spin_unlock_irqrestore(&bucket->lock, flags); + + + return desc; +} + +int memleak_hashlist_uninit(struct memleak_htab *htab) +{ + struct bucket *bucket; + struct alloc_desc *tmp1, *tmp2; + int i; + + htab->free = 0; + + for (i = 0; i < htab->n_buckets; i++) { + bucket = &htab->buckets[i]; + + list_for_each_entry_safe(tmp1, tmp2, &bucket->head, node) { + list_del_init(&tmp1->node); + internal_kfree(tmp1); + htab->free++; + } + } + + list_for_each_entry_safe(tmp1, tmp2, &htab->freelist, node) { + list_del_init(&tmp1->node); + internal_kfree(tmp1); + htab->free++; + } + + if (htab->free != htab->total) + pr_info("memleak free %u ,total %u\n", htab->free, htab->total); + + if (htab->buckets) + internal_kfree(htab->buckets); + + htab->buckets = NULL; + + return 0; +} + +static void memleak_dump_object(struct memleak_htab *htab, struct max_object *object) +{ + struct kmem_cache *cache = htab->check.cache; + + if (!cache || !object) + return ; + + strncpy(object->slabname, cache->name, NAME_LEN); + object->object_size = cache->size; + object->object_num = htab->check.object_num; + + if (!htab->info.object) + return ; + + object->similar_object = htab->info.object->valid_object; + object->ptr = htab->info.object->ptr; +} + + +int memleak_dump_leak(struct memleak_htab *htab, struct user_result __user *result) +{ + struct bucket *bucket; + struct alloc_desc *tmp1, *tmp2; + struct user_alloc_desc *desc; + struct user_result res; + struct max_object object; + void __user *tmp; + + int i = 0; + int j = 0; + int num = 0; + int count = atomic_read(&htab->count); + int ret = 0; + unsigned long long curr_ts = sched_clock(); + + if ((count <= 0) || copy_from_user(&res, result, sizeof(res))) { + pr_err("count zero %d:%d\n",count,__LINE__); + ret = copy_to_user(result, &i, sizeof(i)); + return 0; + } + + if (!res.num || !res.desc) { + pr_err("num %d ,desc %p \n", res.num, res.desc); + ret = copy_to_user(result, &i, sizeof(i)); + return 0; + } + + pr_info("total memleak number %d user %d ts=%llu\n", count, res.num, sched_clock()); + + res.num = (res.num > count) ? count : res.num; + num = res.num; + + desc = vmalloc(sizeof(*desc) * num); + if (!desc) { + pr_err("vmalloc error %d:%d\n",count,__LINE__); + ret = copy_to_user(result, &i, sizeof(i)); + return 0; + } + + tmp = res.desc; + res.desc = desc; + j = 0; + + /*copy object info */ + if (res.objects) { + memset(&object, 0, sizeof(object)); + memleak_dump_object(htab, &object); + ret = copy_to_user(res.objects, &object, sizeof(object)); + } + + for (i = 0; i < htab->n_buckets; i++) { + int z = 0; + bucket = &htab->buckets[i]; + if (bucket->nr <= 0) { + continue; + } + + list_for_each_entry_safe(tmp1, tmp2, &bucket->head, node) { + list_del_init(&tmp1->node); + if ((htab->set.type == MEMLEAK_TYPE_PAGE) && PageSlab((struct page*)tmp1->ptr)) { + goto _skip; + } + + desc->ts = (curr_ts - tmp1->ts)>>30; + desc->ptr = tmp1->ptr; + desc->pid = tmp1->pid; + desc->mark = memleak_mark_leak(htab, tmp1); + desc->order = tmp1->order; + desc->call_site = tmp1->call_site; + strcpy(desc->comm,tmp1->comm); + snprintf(desc->function, NAME_LEN, "%pS", (void *)tmp1->call_site); + desc->num = tmp1->num; + for (z = 0; z < desc->num; z++) { + snprintf(desc->backtrace[z], 128, "%pS", tmp1->backtrace[z]); + } + desc++; + j++; +_skip: + memleak_free_desc(htab, tmp1); + atomic_sub(1, &htab->count); + bucket->nr--; + if (!--num) + goto _out; + } + } + +_out: + + i = copy_to_user(result, &j, sizeof(j)); + i = copy_to_user(tmp, res.desc, sizeof(*desc) * j); + + vfree(res.desc); + pr_info("get num %d htab %d, %d\n", j, atomic_read(&htab->count), num); + return i; +} + +int memleak_clear_leak(struct memleak_htab *htab) +{ + struct bucket *bucket; + struct alloc_desc *tmp1, *tmp2; + int i; + + + if (!atomic_read(&htab->count)) { + return 0; + } + + pr_info(" clear leak %d \n", atomic_read(&htab->count)); + + + for (i = 0; i < htab->n_buckets; i++) { + + bucket = &htab->buckets[i]; + cond_resched(); + + if (bucket->nr) { + + list_for_each_entry_safe(tmp1, tmp2, &bucket->head, node) { + + list_del_init(&tmp1->node); + memleak_free_desc(htab, tmp1); + } + } + + bucket->nr = 0; + } + + atomic_set(&htab->count, 0); + + return 0; +} diff --git a/source/lib/internal/kernel_module/modules/memleak/mem.h b/source/lib/internal/kernel_module/modules/memleak/mem.h new file mode 100755 index 0000000000000000000000000000000000000000..9ee4ccee2ea0b9ef3f343e1a926deded80ffc603 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memleak/mem.h @@ -0,0 +1,188 @@ +#ifndef __MEMLEAK__ +#define __MEMLEAK__ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) +#include +#else +#include +#endif +#include +#include + +#include "common.h" +#include "memleak.h" +#include "user.h" + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0) +/* Reuses the bits in struct page */ +struct slab { + unsigned long __page_flags; + +#if defined(CONFIG_SLAB) + + union { + struct list_head slab_list; + struct rcu_head rcu_head; + }; + struct kmem_cache *slab_cache; + void *freelist; /* array of free object indexes */ + void *s_mem; /* first object */ + unsigned int active; + +#elif defined(CONFIG_SLUB) + + union { + struct list_head slab_list; + struct rcu_head rcu_head; +#ifdef CONFIG_SLUB_CPU_PARTIAL + struct { + struct slab *next; + int slabs; /* Nr of slabs left */ + }; +#endif + }; +struct kmem_cache *slab_cache; + /* Double-word boundary */ + void *freelist; /* first free object */ + union { + unsigned long counters; + struct { + unsigned inuse:16; + unsigned objects:15; + unsigned frozen:1; + }; + }; + unsigned int __unused; + +#elif defined(CONFIG_SLOB) + + struct list_head slab_list; + void *__unused_1; + void *freelist; /* first free block */ + long units; + unsigned int __unused_2; + +#else +#error "Unexpected slab allocator configured" +#endif + + atomic_t __page_refcount; +#ifdef CONFIG_MEMCG + unsigned long memcg_data; +#endif +}; + +#define slab_folio(s) (_Generic((s), \ + const struct slab *: (const struct folio *)s, \ + struct slab *: (struct folio *)s)) + + +#define folio_slab(folio) (_Generic((folio), \ + const struct folio *: (const struct slab *)(folio), \ + struct folio *: (struct slab *)(folio))) + + +static inline void *slab_address(const struct slab *slab) +{ + return folio_address(slab_folio(slab)); +} +#endif +#include +#include +#include +#include +struct bucket { + struct list_head head; + u32 nr; + spinlock_t lock; +}; + +struct slab_info { + struct mutex *slab_mutex; + struct list_head *slab_caches; + struct kmem_cache *cache; + unsigned long object_num; +}; + +struct object { + struct list_head node; + void *ptr; + int valid_byte; + int valid_object; + void *page; +}; + +struct object_info { + struct list_head head; + struct object *object; + int object_size; + int size; + int num; +}; + + +struct memleak_htab { + struct bucket *buckets; + struct list_head freelist; + spinlock_t lock; + u32 n_buckets; + u32 free; + u32 total; + u32 stack_deep; + atomic_t count; + int state; + int rate; + struct slab_info check; + struct object_info info; + struct delayed_work work; + struct memleak_settings set; +}; + +struct alloc_desc { + struct list_head node; + unsigned long ts; + const void *ptr; + unsigned long long call_site; + int pid; + int order; + char comm[TASK_COMM_LEN]; + u32 hash; + u32 num; + u64 backtrace[]; +}; + +int memleak_hashlist_init(struct memleak_htab *htab); +struct alloc_desc * memleak_alloc_desc(struct memleak_htab *htab); +int memleak_free_desc(struct memleak_htab *htab, struct alloc_desc *desc); +int memleak_insert_desc(struct memleak_htab *htab, struct alloc_desc *desc); +struct alloc_desc * memleak_del_desc(struct memleak_htab *htab, const void *ptr); +int memleak_hashlist_uninit(struct memleak_htab *htab); +int memleak_entry_reentrant(void); +void memleak_exit_reentrant(void); +int memleak_dump_leak(struct memleak_htab *htab, struct user_result *result); + +void * internal_alloc(size_t size, gfp_t flags); +void internal_kfree(void *addr); + + +int memleak_clear_leak(struct memleak_htab *htab); +int memleak_trace_off(struct memleak_htab *htab); +int memleak_trace_on(struct memleak_htab *htab); + +int memleak_handler_cmd(int cmd, unsigned long arg); +int memleak_mark_leak(struct memleak_htab *htab, struct alloc_desc *desc); +int memleak_free_object(struct memleak_htab *htab); +int memleak_max_object(struct memleak_htab *htab); +#endif diff --git a/source/lib/internal/kernel_module/modules/memleak/memleak.c b/source/lib/internal/kernel_module/modules/memleak/memleak.c new file mode 100755 index 0000000000000000000000000000000000000000..660da77dcb387655f953b5e9ab2b79d5f411d26a --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memleak/memleak.c @@ -0,0 +1,717 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mem.h" +#include +#include +#include "sysak_mods.h" +#include "hook.h" + +#define HASH_SIZE (1024) +#define PRE_ALLOC (2048) + +static int inited = 0; + +static struct memleak_htab *tab; +static int memleak_ref; +static ssize_t (*show_slab_objects)(struct kmem_cache *s, char *buf); +int __memleak_init(void); +int __memleak_uninit(void); + + +static unsigned long (*__kallsyms_lookup_name)(const char *name); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5,17,0) +void * virt_to_slab_cache(const void *x) +{ + struct page *page; + + if (unlikely(ZERO_OR_NULL_PTR(x))) + return NULL; + + page = virt_to_head_page(x); + if (!page || unlikely(!PageSlab(page))) { + return NULL; + } + return page->slab_cache; +} +#else +void * virt_to_slab_cache(const void *x) +{ + struct folio *folio; + struct slab *slab; + if (unlikely(ZERO_OR_NULL_PTR(x))) + return NULL; + folio = virt_to_folio(x); + if (unlikely(!folio_test_slab(folio))) { + return NULL; + } + slab = folio_slab(folio); + return slab->slab_cache; +} +#endif + +static int memleak_is_target(struct memleak_htab *htab, const void *x) +{ + void *cache; + + + if (!htab->check.cache) + return 1; + + if (unlikely(ZERO_OR_NULL_PTR(x))) + return 0; + + cache = virt_to_slab_cache(x); + return (cache == (void *)htab->check.cache); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5,2,0) +static unsigned long get_stack_rip(unsigned long *arr, int max_entries) +{ + struct stack_trace stack_trace; + unsigned long trace[16] = {0}; + + stack_trace.nr_entries = 0; + stack_trace.skip = 3; + if (arr && max_entries) { + stack_trace.max_entries = max_entries; + stack_trace.entries = arr; + } else { + stack_trace.max_entries = 16; + stack_trace.entries = trace; + } + + save_stack_trace(&stack_trace); + + return stack_trace.nr_entries; +} +#else +static unsigned long get_stack_rip(unsigned long *arr, int max_entries) +{ + return stack_trace_save(arr,max_entries, 3); +} +#endif + +static void memleak_alloc_desc_push(struct memleak_htab *htab, unsigned long call_site, const void *ptr, int order) +{ + unsigned long flags; + struct alloc_desc *desc; + + if (!ptr || !memleak_is_target(htab, ptr)) + return; + + local_irq_save(flags); + if (memleak_entry_reentrant()) + goto _out; + + desc = memleak_alloc_desc(htab); + if (!desc) + goto _out; + + desc->call_site = call_site; + desc->ptr = ptr; + desc->order = order; + desc->ts = sched_clock(); + desc->pid = current->pid; + strcpy(desc->comm, current->comm); + + if (desc->num) { + desc->num = get_stack_rip((unsigned long *)desc->backtrace, desc->num); + } + if (!call_site && desc->num) + desc->call_site = desc->backtrace[2]; + + memleak_insert_desc(htab, desc); + +_out: + memleak_exit_reentrant(); + local_irq_restore(flags); +} + +static void memleak_alloc_desc_pop(struct memleak_htab *htab,unsigned long call_site, const void *ptr,int order) +{ + unsigned long flags; + struct alloc_desc *desc; + + if (!ptr || !memleak_is_target(htab, ptr)) + return; + + local_irq_save(flags); + + if (memleak_entry_reentrant()) + goto _out; + + desc = memleak_del_desc(htab, ptr); + memleak_free_desc(htab, desc); + +_out: + memleak_exit_reentrant(); + local_irq_restore(flags); +} + +#if KERNEL_VERSION(3, 10, 0) <= LINUX_VERSION_CODE +static void trace_slab_alloc(void *__data, unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags) +#else +static void trace_slab_alloc(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags) +#endif +{ + memleak_alloc_desc_push(tab, call_site, ptr, 0); +} + +#if KERNEL_VERSION(3, 10, 0) <= LINUX_VERSION_CODE +static void trace_slab_free(void *ignore, unsigned long call_site, const void *ptr) +#else +static void trace_slab_free(unsigned long call_site, const void *ptr) +#endif +{ + + memleak_alloc_desc_pop(tab, call_site, ptr, 0); +} + +#ifdef CONFIG_NUMA +#if KERNEL_VERSION(3, 10, 0) <= LINUX_VERSION_CODE +static void trace_slab_alloc_node(void *__data, unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags, int node) +#else +static void trace_slab_alloc_node(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags, int node) +#endif +{ + memleak_alloc_desc_push(tab, call_site, ptr, 0); +} + +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0) +static void trace_page_alloc(void *ignore, struct page *page, + unsigned int order, gfp_t gfp_flags, int migratetype) +#else +static void trace_page_alloc(struct page *page, + unsigned int order, gfp_t gfp_flags, int migratetype) +#endif +{ + + if ((migratetype == 1) || (migratetype == 2)) { + return; + } + + memleak_alloc_desc_push(tab, 0, page, order); + +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0) +static void trace_page_free(void *ignore, struct page *page, + unsigned int order) +#else +static void trace_page_free(struct page *page, + unsigned int order) +#endif +{ + if (((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0) + return; + + memleak_alloc_desc_pop(tab, 0, page, order); +} + + + +static int slab_tracepoint_init(void) +{ + int ret = 0; + + ret = hook_tracepoint("kmem_cache_alloc", trace_slab_alloc, NULL); + if (ret) { + pr_err("memleak register kmem cache alloc tracepoint error %d\n", ret); + } + + ret = hook_tracepoint("kmem_cache_free", trace_slab_free, NULL); + if (ret) { + pr_err("memleak register kmem cache free tracepoint error %d\n", ret); + } + + ret = hook_tracepoint("kmalloc", trace_slab_alloc, NULL); + if (ret) { + pr_err("memleak register kmalloc tracepoint error %d\n", ret); + } + + ret = hook_tracepoint("kfree", trace_slab_free, NULL); + if (ret) { + pr_err("memleak register kfree tracepoint error %d\n", ret); + } + +#ifdef CONFIG_NUMA + ret = hook_tracepoint("kmalloc_node", trace_slab_alloc_node, NULL); + if (ret) { + pr_err("memleak register kmalloc node tracepoint error %d\n", ret); + } +#ifdef CONFIG_NUMA + ret = hook_tracepoint("kmem_cache_alloc_node", trace_slab_alloc_node, NULL); + if (ret) { + pr_err("memleak register kmem_cache_alloc node tracepoint error %d\n", ret); + } +#endif +#endif + return 0; +} + +static void slab_tracepoint_alloc_uninit(void) +{ + unhook_tracepoint("kmem_cache_alloc", trace_slab_alloc, NULL); + unhook_tracepoint("kmalloc", trace_slab_alloc, NULL); + +#ifdef CONFIG_NUMA + unhook_tracepoint("kmalloc_node", trace_slab_alloc_node, NULL); +#ifdef CONFIG_TRACING + unhook_tracepoint("kmem_cache_alloc_node", trace_slab_alloc_node, NULL); +#endif +#endif +} + +static void slab_tracepoint_free_uninit(void) +{ + unhook_tracepoint("kfree", trace_slab_free, NULL); + unhook_tracepoint("kmem_cache_free", trace_slab_free, NULL); +} + +static void page_tracepoint_init(void) +{ + int ret = 0; + + ret = hook_tracepoint("mm_page_free", trace_page_free, NULL); + if(ret) + pr_err("register mm page free error\n"); + + + ret = hook_tracepoint("mm_page_alloc", trace_page_alloc, NULL); + if(ret) + pr_err("register mm page alloc error\n"); +} + +static void page_tracepoint_alloc_uninit(void) +{ + + unhook_tracepoint("mm_page_alloc", trace_page_alloc, NULL); +} + +static void page_tracepoint_free_uninit(void) +{ + + unhook_tracepoint("mm_page_free", trace_page_free, NULL); +} + +static void memleak_tracepoint_init(struct memleak_htab *htab) +{ + if (htab->set.type == MEMLEAK_TYPE_SLAB) { + slab_tracepoint_init(); + }else if (htab->set.type == MEMLEAK_TYPE_PAGE) { + page_tracepoint_init(); + } else + pr_err("trace type error %d\n", htab->set.type); +} + +static void memleak_tracepoint_alloc_uninit(struct memleak_htab *htab) +{ + if (htab->set.type == MEMLEAK_TYPE_SLAB) { + slab_tracepoint_alloc_uninit(); + } else if (htab->set.type == MEMLEAK_TYPE_PAGE) { + page_tracepoint_alloc_uninit(); + } else + pr_err("trace alloc uninit type %d\n", htab->set.type); +} + +static void memleak_tracepoint_free_uninit(struct memleak_htab *htab) +{ + if (htab->set.type == MEMLEAK_TYPE_SLAB) { + slab_tracepoint_free_uninit(); + } else if (htab->set.type == MEMLEAK_TYPE_PAGE) { + page_tracepoint_free_uninit(); + } else + pr_err("trace free uninit type %d\n", htab->set.type); + +} + +static unsigned long str2num(char *buf) +{ + unsigned long objects = 0; + int ret; + char * tmp = buf; + + while (*buf && *++buf != ' '); + + if (!*buf) + return 0; + + *buf = 0; + ret = kstrtoul(tmp, 10, &objects); + return objects; +} + +static int memleak_get_maxslab(struct memleak_htab *htab) +{ + unsigned long size = 0; + unsigned long max = 0; + struct kmem_cache *tmp; + char *object_buffer; + void **show_slab = (void **)&show_slab_objects; + +#ifndef CONFIG_SLUB_DEBUG + return 0; +#endif + + *show_slab = (void *)__kallsyms_lookup_name("objects_show"); + if (!*show_slab) { + pr_err("Get show_slab objects error\n"); + return 0; + } + pr_err("get slab size is :%px\n",show_slab_objects); + object_buffer = (char *)__get_free_page(GFP_KERNEL); + if (!object_buffer) + return 0; + mutex_lock(htab->check.slab_mutex); + + list_for_each_entry(tmp, htab->check.slab_caches, list) { + if (tmp->flags & SLAB_RECLAIM_ACCOUNT) + continue; + + size = show_slab_objects(tmp, object_buffer); + if (size < 0) + continue; + + size = str2num(object_buffer); + if (size <= 0) + continue; + + if (size > max) { + max = size; + htab->check.cache = tmp; + htab->check.object_num = max; + } + } + + if (htab->check.cache) + pr_info("max cache %s size = %lu \n", htab->check.cache->name, max); + + mutex_unlock(htab->check.slab_mutex); + free_page(object_buffer); + return 0; +} + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,7,0) +#include +static struct kprobe kprobe_kallsyms_lookup_name = { + .symbol_name = "kallsyms_lookup_name" +}; +int init_symbol(void) +{ + register_kprobe(&kprobe_kallsyms_lookup_name); + __kallsyms_lookup_name = (void *)kprobe_kallsyms_lookup_name.addr; + unregister_kprobe(&kprobe_kallsyms_lookup_name); + + pr_err("kallsyms_lookup_name is %px\n", __kallsyms_lookup_name); + + if (!__kallsyms_lookup_name) { + return -EINVAL; + } + + return 0; +} +#else +int init_symbol(void) +{ + __kallsyms_lookup_name = kallsyms_lookup_name; + return 0; +} +#endif +static int memleak_slab_init(struct memleak_htab *htab) +{ + struct mutex *slab_mutex; + struct kmem_cache *s; + struct list_head *slab_caches; + + slab_mutex = (struct mutex *)__kallsyms_lookup_name("slab_mutex"); + slab_caches = (struct list_head *)__kallsyms_lookup_name("slab_caches"); + + if (!slab_mutex || !slab_caches) { + pr_err("memleak:can't get slab mutex/caches %p:%p\n", slab_mutex, slab_caches); + return -EIO; + } + + htab->check.slab_mutex = slab_mutex; + htab->check.slab_caches = slab_caches; + htab->check.object_num = 0; + + if (!htab->set.name[0]) { + memleak_get_maxslab(htab); + goto _out; + } + + if (!strcmp(htab->set.name, "all")) + return 0; + + mutex_lock(slab_mutex); + + list_for_each_entry(s, slab_caches, list) { + if (!strcmp(s->name, htab->set.name)) { + htab->check.cache = s; + pr_info("get slab %s,%p\n",s->name, htab->check.cache); + break; + } + } + + mutex_unlock(slab_mutex); + +_out: + return !htab->check.cache; +} + + +static int memleak_mem_init(struct memleak_htab *htab) +{ + + htab->n_buckets = HASH_SIZE; + htab->total = PRE_ALLOC; + htab->stack_deep = 16; + + return memleak_hashlist_init(tab); +} + +static void memleak_mem_uninit(struct memleak_htab *htab) +{ + memleak_hashlist_uninit(htab); +} + +static void memleak_delay_work(struct work_struct *work) +{ + struct memleak_htab *htab; + int delay = 0; + + htab = (struct memleak_htab *)container_of(work, struct memleak_htab, work.work); + + if (htab->state == MEMLEAK_STATE_INIT) { + pr_err("memleak delay work state on\n"); + memleak_tracepoint_alloc_uninit(htab); + + htab->state = MEMLEAK_STATE_ON; + delay = (htab->set.monitor_time * htab->set.rate)/100; + schedule_delayed_work(&htab->work, HZ * delay); + + } else if (htab->state == MEMLEAK_STATE_ON) { + + pr_err("memleak delay work state off\n"); + + memleak_tracepoint_free_uninit(htab); + + htab->state = MEMLEAK_STATE_OFF; + } +} + +static int memleak_trace_slab(struct memleak_htab *htab) +{ + int ret; + + htab->check.cache = NULL; + htab->check.object_num = 0; + atomic_set(&htab->count, 0); + + init_symbol(); + ret = memleak_slab_init(htab); + + memleak_max_object(htab); + + return ret; +} + +static int memleak_trace_slab_uninit(struct memleak_htab *htab) +{ + if (htab->set.type != MEMLEAK_TYPE_SLAB) + return 0; + + memleak_free_object(htab); + + htab->check.cache = NULL; + htab->check.object_num = 0; + + return 0; +} + +int memleak_trace_off(struct memleak_htab *htab) +{ + cancel_delayed_work_sync(&htab->work); + + if (htab->state == MEMLEAK_STATE_INIT) { + + memleak_tracepoint_alloc_uninit(htab); + memleak_tracepoint_free_uninit(htab); + + } else if (htab->state == MEMLEAK_STATE_ON) { + memleak_tracepoint_free_uninit(htab); + } + + htab->state = MEMLEAK_STATE_OFF; + + memleak_trace_slab_uninit(htab); + + return 0; +} + + int memleak_trace_on(struct memleak_htab *htab) +{ + int ret = 0; + int delay = 0; + + if (!htab) + return ret; + + if (!htab->set.monitor_time) + htab->set.monitor_time = MONITOR_TIME; + + if (!htab->set.rate) + htab->set.rate = MONITOR_RATE; + + if (!htab->set.type) + htab->set.type = MEMLEAK_TYPE_SLAB; + + switch (htab->set.type) { + + case MEMLEAK_TYPE_VMALLOC: + pr_info("trace vmalloc\n"); + htab->check.cache = NULL; + break; + case MEMLEAK_TYPE_PAGE: + htab->check.cache = NULL; + pr_info("trace alloc page\n"); + break; + default: + ret = memleak_trace_slab(htab); + } + + htab->state = MEMLEAK_STATE_INIT; + atomic_set(&htab->count, 0); + + memleak_tracepoint_init(htab); + + atomic_set(&htab->count, 0); + delay = htab->set.monitor_time; + delay = delay - (delay * htab->set.rate)/100; + + pr_info("delay = %d\n",delay); + schedule_delayed_work(&htab->work, HZ * delay); + + return ret; +} + +int memleak_release(void) +{ + printk("memleak release\n"); + memleak_trace_off(tab); + memleak_clear_leak(tab); + + return 0; +} + +int memleak_handler_cmd(int cmd, unsigned long arg) +{ + int ret = -EINVAL; + struct memleak_settings set; + struct memleak_htab * htab=NULL; + + if (!inited && (cmd != MEMLEAK_CMD_DISABLE)) { + inited = 1; + __memleak_init(); + } + + if (!tab) + return -EBUSY; + + htab = tab; + + if (htab->state != MEMLEAK_STATE_OFF && + (cmd == MEMLEAK_CMD_RESULT || cmd == MEMLEAK_CMD_ENALBE)) { + pr_info("htab busy wait\n"); + return -EAGAIN; + } + + switch (cmd) { + + case MEMLEAK_CMD_ENALBE: + ret = copy_from_user(&set, (void *)arg, sizeof(set)); + if (ret) + return ret; + pr_info("type = %d time = %d,slabname %s ext %d,rate=%d\n",set.type, set.monitor_time, set.name, set.ext,set.rate); + htab->set = set; + ret = memleak_trace_on(htab); + if (!ret) + sysak_module_get(&memleak_ref); + break; + + case MEMLEAK_CMD_RESULT: + pr_info("get result\n"); + ret = memleak_dump_leak(htab, (struct user_result __user*)arg); + break; + + case MEMLEAK_CMD_DISABLE: + __memleak_uninit(); + inited = 0; + sysak_module_put(&memleak_ref); + }; + + return ret; +} + + int memleak_init(void) +{ + return 0; +} + int __memleak_init(void) +{ + int ret = 0; + + tab = kzalloc(sizeof(struct memleak_htab), GFP_KERNEL); + if (!tab) { + pr_err("alloc memleak hash table failed\n"); + return -ENOMEM; + } + + spin_lock_init(&tab->lock); + INIT_DELAYED_WORK(&tab->work, memleak_delay_work); + tab->state = MEMLEAK_STATE_OFF; + + ret = memleak_mem_init(tab); + if (ret) { + kfree(tab); + ret = -ENOMEM; + tab = NULL; + } + + return ret; +} + +int memleak_uninit(void) +{ + return 0; +} + +int __memleak_uninit(void) +{ + if (!tab) + return 0; + + memleak_release(); + + memleak_mem_uninit(tab); + + kfree(tab); + tab = NULL; + + return 0; +} diff --git a/source/lib/internal/kernel_module/modules/memleak/objects.c b/source/lib/internal/kernel_module/modules/memleak/objects.c new file mode 100644 index 0000000000000000000000000000000000000000..8285f5757cceefeeeeb333fce8be9234aeed469a --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memleak/objects.c @@ -0,0 +1,322 @@ +#include +#include +#include +#include +#include +#include +#include "mem.h" +#include +#include +#include + +#define MAX_OBJECT (1000) + +/* 400G*/ +#define MAX_MEM (400*1024*1024>>(PAGE_SHIFT-10)) + +/* 100G */ +#define MED_MEM (100*1024*1024>>(PAGE_SHIFT-10)) + +static int rate = 65; + +static inline int is_invalid_byte(unsigned char byte) +{ + return (byte == 0x00 || byte == 0xff + || byte == 0xbb || byte == 0xcc + || byte == 0x5a || byte == 0x6a); +} +static int compute_valid_num(unsigned char *src, int size) +{ + int i ; + int valid = 0; + + for (i = 0; i < size; i++) { + if (is_invalid_byte(src[i])) + continue; + valid++; + } + return valid; +} + +static int compare_one_object(struct object *object, unsigned char *dst, int size) +{ + int i ; + int valid_num = 0; + unsigned char *src = (unsigned char *)object->ptr; + if (dst == NULL || src == NULL) + return 0; + + for (i = 0; i < size; i++) { + + if (is_invalid_byte(src[i])) + continue; + if (src[i] == dst[i]) + valid_num++; + } + + return ((valid_num * 100) >= (object->valid_byte * rate)); +} + +static int find_similar_object(struct object_info *info, struct object *object, unsigned long long *arr, int num) +{ + int i, j; + int valid = 0; + int ret = 0; + int max = 0; + struct object tmp; + + + for (i = 0; i < num; i++) { + + valid = 0; + memset(&tmp, 0, sizeof(tmp)); + tmp.valid_byte = compute_valid_num((unsigned char *)arr[i], info->object_size); + + if (tmp.valid_byte < 4) + continue; + + tmp.ptr = (void *)arr[i]; + + for (j = 0; j < num; j++) { + + if (i == j) + continue; + + ret = compare_one_object(&tmp, (unsigned char *)(arr[j]), info->object_size); + if (ret) + valid++; + } + + if (valid >= max) { + max = valid; + *object = tmp; + object->valid_object = max; + } + + if ((object->valid_object * 2) >= num) + break; + } + + return 0; +} + +static int merge_similar_object(struct object_info *info, struct object *object, int i) +{ + int merge = 0; + struct object *tmp; + unsigned char *ptr = (unsigned char *)object->ptr; + + if (object->valid_object < i / 2) { + return 1; + } + + list_for_each_entry(tmp, &info->head, node) { + + merge = compare_one_object(tmp, ptr, info->size); + if (merge) + break; + } + + if (!info->object) + info->object = object; + + if (merge) { + tmp->valid_object += object->valid_object; + + if (tmp->valid_object > info->object->valid_object) + info->object = tmp; + + } else { + if (info->num > MAX_OBJECT) { + return 1; + } + info->num++; + list_add(&object->node, &info->head); + } + + return merge; +} + +static int scan_one_page(struct page *page, struct object_info *info) +{ + int n; + int num = PAGE_SIZE / info->size; + char unuse[num]; + int i = num; + struct object *object; + void *meta; + unsigned long long *tmp; + + void *start = page_address(page); + + memset(unuse, 0, sizeof(unuse)); + +#if 0 + for (p = page->freelist; p && p < end; p = (*(void **)p)) { + n = (p - start) / info->size ; + if (n < num) { + unuse[n] = 1; + i--; + } + } +#endif + if ( i <= (num >> 1)) + return 0; + + object = internal_alloc(sizeof(*object), GFP_ATOMIC); + if (!object) { + pr_err(" alloc object info error\n"); + return 0; + } + + memset(object, 0, sizeof(*object)); + + meta = internal_alloc(sizeof(void *) * i, GFP_ATOMIC); + if (!meta) { + internal_kfree(object); + return 0; + } + + memset(meta, 0, sizeof(void *) * i); + + tmp = (unsigned long long *)meta; + + for (n = 0; n < num; n++) { + if (unuse[n]) + continue; + *tmp = (unsigned long long )(start + n * info->size); + tmp++; + } + + + find_similar_object(info, object, (unsigned long long *)meta, i); + + object->page = (void *)start; + + n = merge_similar_object(info, object, i); + if (n) { + internal_kfree(object); + } + + internal_kfree(meta); + + return 0; +} + +int memleak_free_object(struct memleak_htab *htab) +{ + struct object *tmp1, *tmp2; + struct object_info *info = &htab->info; + + if (!htab->check.cache) + return 0; + + list_for_each_entry_safe(tmp1, tmp2, &info->head, node) { + list_del_init(&tmp1->node); + internal_kfree(tmp1); + } + + memset(info, 0, sizeof(*info)); + INIT_LIST_HEAD(&info->head); + + return 0; +} + +extern void * virt_to_slab_cache(const void *x); + +int memleak_max_object(struct memleak_htab *htab) +{ + int i = 0; + struct object_info *info = &htab->info; + struct kmem_cache *cache = htab->check.cache; + struct object *object; + struct sysinfo meminfo; + int skip = 0; + + memset(info, 0, sizeof(*info)); + INIT_LIST_HEAD(&info->head); + + if (!cache) { + pr_info("slab cache is null\n"); + return 0; + } + + if (htab->rate) + rate = htab->rate; + + info->object_size = cache->object_size; + info->size = cache->size; + + si_meminfo(&meminfo); + if (meminfo.totalram > MAX_MEM) + skip = 3; + else if (meminfo.totalram > MED_MEM) + skip = 1; + else + skip = 0; + + for_each_online_node(i) { + unsigned long start_pfn = node_start_pfn(i); + unsigned long end_pfn = node_end_pfn(i); + unsigned long pfn; + unsigned long order; + for (pfn = start_pfn; pfn < end_pfn;) { + struct page *page = NULL; + + cond_resched(); + + if (!pfn_valid(pfn)) { + pfn++; + continue; + } + + page = pfn_to_page(pfn); + if (!page) { + pfn++; + continue; + } + + if (PageCompound(page)) + order = compound_order(page); + else if (PageBuddy(page)) + order = page->private; + else + order = 0; + pfn += (1 << (order >= MAX_ORDER ? 0 : order)); + + /* only scan pages belonging to this node */ + if (page_to_nid(page) != i) + continue; + /* only scan if page is in use */ + if (page_count(page) == 0) + continue; + /*only scan slab page */ + if (!PageSlab(page)) + continue; + /*only scan target slab */ + if (virt_to_slab_cache(pfn_to_kaddr(pfn)) != cache) + continue; + + scan_one_page(page, info); + pfn += skip; + } + } + + pr_info("find object %d\n", info->num); + object = info->object; + if (object) + pr_info("start %px ptr %px byte %d object %d \n", object->page, object->ptr, object->valid_byte, object->valid_object); + + return 0; +} + +int memleak_mark_leak(struct memleak_htab *htab, struct alloc_desc *desc) +{ + struct object_info *info = &htab->info; + + if (!htab->check.cache || !info->object || !desc) + return 0; + + return !!compare_one_object(info->object, (unsigned char *)desc->ptr, info->object_size); +} + diff --git a/source/lib/internal/kernel_module/modules/memleak/user.h b/source/lib/internal/kernel_module/modules/memleak/user.h new file mode 100644 index 0000000000000000000000000000000000000000..aa623484107c3ace120ffbf9904e2d7d9ee4583c --- /dev/null +++ b/source/lib/internal/kernel_module/modules/memleak/user.h @@ -0,0 +1,54 @@ +#ifndef __USER__ +#define __USER__ + +#define MONITOR_TIME (300) +#define MONITOR_RATE (20) /* 20% */ + +typedef enum _memleak_type { + MEMLEAK_TYPE_SLAB = 1, + MEMLEAK_TYPE_PAGE, + MEMLEAK_TYPE_VMALLOC, +} memleak_type; + +struct memleak_settings { + memleak_type type; + int monitor_time;/*default 300 seconds */ + int rate; + char name[NAME_LEN]; + int ext;/*extension function */ +}; + +struct max_object { + char slabname[NAME_LEN]; + void *ptr; + int object_size; + unsigned long object_num; + unsigned long similar_object; +}; +struct user_result { + int num; + struct max_object *objects; + struct user_alloc_desc *desc; +}; + +struct user_alloc_desc { + int pid; + int mark; + int order; + const void *ptr; + char comm[TASK_COMM_LEN]; + char function[NAME_LEN]; + unsigned long long call_site; + unsigned long long ts; + int num; + char backtrace[32][128]; +}; + +struct user_call_site { + unsigned long long call_site; + int nr; + int mark_nr; + char function[NAME_LEN]; +}; + +#endif diff --git a/source/lib/internal/kernel_module/modules/mmaptrace/mmaptrace.c b/source/lib/internal/kernel_module/modules/mmaptrace/mmaptrace.c new file mode 100644 index 0000000000000000000000000000000000000000..da9aefbbaa12d723a2eeaadafe6328bd17f5f415 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/mmaptrace/mmaptrace.c @@ -0,0 +1,596 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 81) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 91) +#include +#endif +#include "proc.h" + +#ifdef CONFIG_X86 +#define MAX_SYMBOL_LEN 64 +#define PATH_LEN 256 +#define STACK_DEPTH 100 +#define STACK_DETAIL_DEPTH 20 +#define PERTASK_STACK 10 +#define LIST_LEN 10 +#define PROC_NUMBUF 128 +#define REGISTER_FAILED 1 + +extern struct mm_struct *get_task_mm(struct task_struct *task); + +static bool enable_mmaptrace = false; +static unsigned long mmap_len = 246 << 10; +static pid_t mmap_pid; +static int brk; + +LIST_HEAD(threads_list); +LIST_HEAD(threadvma_list); + +DECLARE_RWSEM(threadslist_sem); +DECLARE_RWSEM(vmalist_sem); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) +static struct kprobe kp_mmap = { + .symbol_name = "ksys_mmap_pgoff", +}; + +static struct kprobe kp_brk = { + .symbol_name = "do_brk_flags", +}; +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) +static struct kprobe kp_mmap = { + .symbol_name = "vm_mmap_pgoff", +}; + +static struct kprobe kp_brk = { + .symbol_name = "do_brk_flags", +}; +#else +static struct kprobe kp_mmap = { + .symbol_name = "vm_mmap_pgoff", +}; + +static struct kprobe kp_brk = { + .symbol_name = "do_brk", +}; +#endif + +struct stack_info { + unsigned long bp; + char path[PATH_LEN]; +}; + +struct user_stack_detail { + struct list_head list; + int is_brk; +#if defined(DIAG_ARM64) + //struct user_pt_regs regs; +#else + //struct pt_regs regs; +#endif + //unsigned long ip; + //unsigned long bp; + //unsigned long sp; + struct stack_info stack[STACK_DETAIL_DEPTH]; +}; + +struct task_info{ + pid_t pid; + pid_t tgid; + struct list_head task_list; + unsigned long mmap_count; + struct list_head vma_list; + unsigned long userstack_list_len; + struct list_head userstack_list; + char comm[TASK_COMM_LEN]; +}; + +struct vma_info{ + struct list_head list; + pid_t pid; + unsigned long start; + unsigned long end; + int exectue; + char path[PATH_LEN]; +}; + +struct stack_frame_user { + const void __user *next_fp; + unsigned long ret_addr; +}; + + +static void save_mmapstack_trace_user(struct task_struct *task, struct task_info *tsk) +{ + struct list_head *vma_entry; + const struct pt_regs *regs = task_pt_regs(current); + const void __user *fp = (const void __user *)regs->sp; + int stack_len = 0 ; + int i; + + struct user_stack_detail *new_stack = kzalloc(sizeof(struct user_stack_detail),GFP_KERNEL); + if (!new_stack) + return; + new_stack->is_brk = brk; + for (i = 0; i < STACK_DEPTH; i++){ + if (stack_len > STACK_DETAIL_DEPTH) + break; + list_for_each(vma_entry, &threadvma_list){ + //struct vma_info *vma = (struct vma_info *)vma_entry; + struct vma_info *vma = container_of(vma_entry, struct vma_info, list); + unsigned long tmp; + + if (!copy_from_user(&tmp, fp+i*__SIZEOF_LONG__, __SIZEOF_LONG__)) { + if ((tmp >= vma->start) && (tmp <= vma->end)) { + new_stack->stack[stack_len].bp = tmp; + strcpy(new_stack->stack[stack_len].path,vma->path); + stack_len++; + } + } + } + } + list_add_tail(&new_stack->list, &tsk->userstack_list); +} + +static int save_calltrace(struct pt_regs *regs) +{ + struct list_head *tsk_entry; + struct task_info *new_tsk; + pid_t tgid = 0; + + //down_write(&threadslist_sem); + list_for_each(tsk_entry, &threads_list){ + struct task_info *tsk = container_of(tsk_entry, struct task_info, task_list); + tgid = tsk->tgid; + if (tsk->pid == current->pid){ + if (tsk->userstack_list_len > LIST_LEN){ + return 0; + } + save_mmapstack_trace_user(current,tsk); + return 0; + } + //save stack + } + if (tgid == current->tgid){ + new_tsk = kzalloc(sizeof(struct task_info),GFP_KERNEL); + if (!new_tsk) + return 0; + new_tsk->pid = current->pid; + new_tsk->tgid = tgid; + memcpy(new_tsk->comm,current->comm,sizeof(new_tsk->comm)); + new_tsk->mmap_count++; + INIT_LIST_HEAD(&new_tsk->userstack_list); + save_mmapstack_trace_user(current,new_tsk); + list_add_tail(&new_tsk->task_list,&threads_list); + } + //up_write(&threadslist_sem); + return 0; +} + +static int before_mmap_pgoff(struct kprobe *p, struct pt_regs *regs) +{ + int ret; + + brk = 0; + if (regs->si < mmap_len){ + return 0; + } + if (!current || !current->mm) + return 0; + + ret = save_calltrace(regs); + return 0; +} + +static void after_mmap_pgoff(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + return; +} + +static void get_filename(char *buf, const struct path *path, size_t size) +{ + //int res = -1; + //char *end; + if (size) { + char *p = d_path(path, buf, size); + if (!IS_ERR(p)) { + strcpy(buf,p); + //end = mangle_path(buf, p, "\n"); + //if (end) + //res = end - buf; + } + } + return; +} + +static int mmaptrace_print_show(struct seq_file *m, void *v) +{ + struct list_head *tsk_entry; + struct list_head *stack_entry; + int loop_count = 0; + char *syscall_name; + int i; + + //down_read(&threadslist_sem); + if (list_empty(&threads_list)){ + //up_read(&threadslist_sem); + seq_printf(m, "task list is empty\n"); + return 0; + } + + list_for_each(tsk_entry, &threads_list){ + struct task_info *tsk = container_of(tsk_entry, struct task_info, task_list); + seq_printf(m, "pid[%d],name[%s],tgid[%d]\n", + tsk->pid, tsk->comm, tsk->tgid); + list_for_each(stack_entry, &tsk->userstack_list){ + struct user_stack_detail *user_stack = (struct user_stack_detail *)stack_entry; + loop_count++; + syscall_name = user_stack->is_brk ? "brk" : "mmap"; + seq_printf(m, "%s,用户态堆栈%d:\n", syscall_name,loop_count); + for (i = 0; i < STACK_DETAIL_DEPTH; i++) { + if (user_stack->stack[i].bp == 0) { + continue; + } + seq_printf(m,"#~ 0x%lx", user_stack->stack[i].bp); + seq_printf(m," %s\n",user_stack->stack[i].path); + } + } + } + //up_read(&threadslist_sem); + return 0; +} + +DEFINE_PROC_ATTRIBUTE_RO(mmaptrace_print); + +static int mmaptrace_pid_show(struct seq_file *m, void *v) +{ + seq_printf(m, "pid:%d, len:%ld\n", mmap_pid, mmap_len); + return 0; + +} + +static ssize_t mmaptrace_pid_store(void *priv, const char __user *buf, size_t count) +{ + struct task_struct *tsk; + struct task_info *new_tsk; + struct mm_struct *mm; + struct file *vma_file; + struct vm_area_struct *vma; + struct vma_info *new_vma; + struct pid *pid; + char buffer[PROC_NUMBUF]; + char buff[PATH_LEN]; + pid_t pid_i; + int err = -1; + + if (!enable_mmaptrace){ + pr_warn("mmaptrace disabled!"); + return count; + } + + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + return -EFAULT; + } + + err = kstrtoint(strstrip(buffer), 0, &pid_i); + if (err) + return -EINVAL; + + if (!list_empty(&threads_list)){ + struct list_head *entry; + list_for_each(entry, &threads_list){ + struct task_info *pos = (struct task_info *)entry; + if (pos->pid == pid_i) + return count; + } + } + + rcu_read_lock(); + + pid= find_get_pid(pid_i); + tsk = pid_task(pid, PIDTYPE_PID); + if (!tsk || !(tsk->mm)){ + rcu_read_unlock(); + return -EINVAL; + } + mmap_pid = pid_i; + + if (mmap_pid != 0 ){ + new_tsk = kzalloc(sizeof(struct task_info),GFP_KERNEL); + if (!new_tsk) + goto failed_tsk; + new_tsk->pid = mmap_pid; + new_tsk->tgid = tsk->tgid; + memcpy(new_tsk->comm,tsk->comm,sizeof(tsk->comm)); + new_tsk->mmap_count++; + //INIT_LIST_HEAD(&new_tsk->vma_list); + INIT_LIST_HEAD(&new_tsk->userstack_list); + + mm = get_task_mm(tsk); + + if (IS_ERR_OR_NULL(mm)){ + rcu_read_unlock(); + return -EINVAL; + } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) + if (!mmap_read_trylock(mm)){ +#else + if (!down_read_trylock(&mm->mmap_sem)){ +#endif + rcu_read_unlock(); + return -EINTR; + } + for (vma = mm->mmap; vma; vma = vma->vm_next){ + //if (vma->vm_file && vma->vm_flags & VM_EXEC && !inode_open_for_write(file_inode(vma->vm_file))){ + if (vma->vm_file && vma->vm_flags & VM_EXEC){ + new_vma = kzalloc(sizeof(struct vma_info),GFP_KERNEL); + if (!new_vma){ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) + mmap_read_unlock(mm); +#else + up_read(&mm->mmap_sem); +#endif + goto failed_vma; + } + new_vma->start = vma->vm_start; + new_vma->pid = current->pid; + new_vma->end = vma->vm_end; + vma_file = vma->vm_file; + + if (vma_file){ + get_filename(buff, &vma_file->f_path, PATH_LEN); + } + strcpy(new_vma->path, buff); + //(&vmalist_sem); + list_add_tail(&new_vma->list,&threadvma_list); + //up_write(&vmalist_sem); + } + } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) + mmap_read_unlock(mm); +#else + up_read(&mm->mmap_sem); +#endif + //down_write(&threadslist_sem); + list_add_tail(&new_tsk->task_list, &threads_list); + //up_write(&threadslist_sem); + } + rcu_read_unlock(); + return count; +failed_vma: + kfree(new_tsk); +failed_tsk: + rcu_read_unlock(); + return -ENOMEM; +} + +DEFINE_PROC_ATTRIBUTE_RW(mmaptrace_pid); + +static ssize_t mmaptrace_len_store(void *priv, const char __user *buf, size_t count) +{ + char buffer[PROC_NUMBUF]; + unsigned long length; + int err = -1; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + return -EFAULT; + } + + err = _kstrtoul(strstrip(buffer), 0, &length); + if (err) + return -EINVAL; + mmap_len = length; + return count; +} + +static int mmaptrace_len_show(struct seq_file *m, void *v) +{ + seq_printf(m, "monitor len: %ld\n", mmap_len); + return 0; + +} + +DEFINE_PROC_ATTRIBUTE_RW(mmaptrace_len); + +static int before_do_brk(struct kprobe *p, struct pt_regs *regs) +{ + int ret; + + brk = 1; + if (regs->si < mmap_len){ + return 0; + } + + if (!current || !current->mm) + return 0; + ret = save_calltrace(regs); + return 0; +} + +static void after_do_brk(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + return; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0) +static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr) +{ + pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr); + return 0; +} +#endif + +static int mmaptrace_enable(void) +{ + int ret_mmap, ret_brk; + + kp_mmap.pre_handler = before_mmap_pgoff; + kp_mmap.post_handler = after_mmap_pgoff; +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0) + kp_mmap.fault_handler = handler_fault; +#endif + + kp_brk.pre_handler = before_do_brk; + kp_brk.post_handler = after_do_brk; +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 13, 0) + kp_brk.fault_handler = handler_fault; +#endif + + ret_mmap = register_kprobe(&kp_mmap); + if (ret_mmap < 0) { + pr_err("register_kprobe mmap failed, returned %d\n", ret_mmap); + return -REGISTER_FAILED; + } + + ret_brk = register_kprobe(&kp_brk); + if (ret_brk < 0) { + unregister_kprobe(&kp_mmap); + pr_err("register_kprobe brk failed, returned %d\n", ret_brk); + return -REGISTER_FAILED; + } + + pr_info("Planted kprobe at %p\n", kp_mmap.addr); + pr_info("Planted kprobe at %p\n", kp_brk.addr); + return 0; +} + +void mmaptrace_disable(void) +{ + unregister_kprobe(&kp_mmap); + unregister_kprobe(&kp_brk); + pr_info("kprobe at %p unregistered\n", kp_mmap.addr); + pr_info("kprobe at %p unregistered\n", kp_brk.addr); +} + +static int mmaptrace_enable_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", (int)enable_mmaptrace); + return 0; +} + +static ssize_t mmaptrace_enable_store(void *priv, const char __user *buf, size_t count) +{ + char buffer[PROC_NUMBUF]; + int val; + int err = -1; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + return -EFAULT; + } + err = kstrtoint(strstrip(buffer), 0, &val); + + if (val == 1){ + if (!mmaptrace_enable()) + enable_mmaptrace = true; + }else if (val == 0){ + if (enable_mmaptrace){ + mmaptrace_disable(); + enable_mmaptrace = false; + } + } + return count; +} + +DEFINE_PROC_ATTRIBUTE_RW(mmaptrace_enable); + +int mmaptrace_init(void) +{ + struct proc_dir_entry *parent_dir; + struct proc_dir_entry *entry_print; + struct proc_dir_entry *entry_pid; + struct proc_dir_entry *entry_len; + struct proc_dir_entry *entry_enable; + + parent_dir = sysak_proc_mkdir("mmaptrace"); + if (!parent_dir) { + goto failed_root; + } + + entry_print = proc_create("mmaptrace_print", 0444, parent_dir, &mmaptrace_print_fops); + if(!entry_print) { + goto failed; + } + + entry_pid = proc_create("mmaptrace_pid", 0664, parent_dir, &mmaptrace_pid_fops); + if(!entry_pid) { + goto failed; + } + + entry_len = proc_create("mmaptrace_len", 0444, parent_dir, &mmaptrace_len_fops); + if(!entry_len) { + goto failed; + } + + entry_enable = proc_create("mmaptrace_enable", 0664, parent_dir, &mmaptrace_enable_fops); + if(!entry_enable) { + goto failed; + } + return 0; + +failed: + sysak_remove_proc_entry("mmaptrace"); +failed_root: + return -1; +} + +int mmaptrace_exit(void) +{ + struct list_head *tsk_entry; + struct list_head *vma_entry; + struct list_head *tsk_prev; + struct list_head *vma_prev; + + if (enable_mmaptrace){ + mmaptrace_disable(); + } + + list_for_each(tsk_entry, &threads_list){ + struct task_info *tsk = container_of(tsk_entry, struct task_info, task_list); + tsk_prev = tsk_entry->prev; + + list_del(tsk_entry); + kfree(tsk); + tsk_entry = tsk_prev; + } + + list_for_each(vma_entry, &threadvma_list){ + struct vma_info *vma = container_of(vma_entry, struct vma_info, list); + vma_prev = vma_entry->prev; + + list_del(vma_entry); + kfree(vma); + vma_entry = vma_prev; + } + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/modules/sched/noschedule.c b/source/lib/internal/kernel_module/modules/sched/noschedule.c new file mode 100644 index 0000000000000000000000000000000000000000..4014b8bacd76aa5bf552c8d4cc79a9646d6310e2 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/sched/noschedule.c @@ -0,0 +1,696 @@ +#define pr_fmt(fmt) "trace-nosched: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sysak_mods.h" +#include "proc.h" + +static unsigned long (*__kallsyms_lookup_name)(const char *name); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,7,0) +#include +static struct kprobe kprobe_kallsyms_lookup_name = { + .symbol_name = "kallsyms_lookup_name" +}; + +static int init_symbol(void) +{ + int ret = -ENODEV; + + ret = register_kprobe(&kprobe_kallsyms_lookup_name); + if (!ret) { + __kallsyms_lookup_name = (void *)kprobe_kallsyms_lookup_name.addr; + unregister_kprobe(&kprobe_kallsyms_lookup_name); + + pr_info("kallsyms_lookup_name is %px\n", __kallsyms_lookup_name); + if (!__kallsyms_lookup_name) + return -ENODEV; + } + + return ret; +} +#else +static int init_symbol(void) +{ + __kallsyms_lookup_name = kallsyms_lookup_name; + return 0; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) +#include +#else +#include +#include +#include +#endif + +//#define CONFIG_DEBUG_TRACE_NOSCHED +#define NUMBER_CHARACTER 40 +#define PROC_DIR_NAME "nosch" +#define NUM_TRACEPOINTS 1 +#define MAX_TRACE_ENTRIES (SZ_1K / sizeof(void *)) +#define PER_TRACE_ENTRIES_AVERAGE 8 + +#define MAX_STACE_TRACE_ENTRIES \ + (MAX_TRACE_ENTRIES / PER_TRACE_ENTRIES_AVERAGE) + + +/** + * If we call register_trace_sched_{wakeup,wakeup_new,switch,migrate_task}() + * directly in a kernel module, the compiler will complain about undefined + * symbol of __tracepoint_sched_{wakeup, wakeup_new, switch, migrate_task} + * because the kernel do not export the tracepoint symbol. Here is a workaround + * via for_each_kernel_tracepoint() to lookup the tracepoint and save. + */ +struct tracepoint_entry { + void *probe; + const char *name; + struct tracepoint *tp; +}; + +struct stack_entry { + unsigned int nr_entries; + unsigned long *entries; +}; + +struct per_cpu_stack_trace { + u64 last_timestamp; + struct hrtimer hrtimer; + struct task_struct *skip; + + unsigned int nr_stack_entries; + unsigned int nr_entries; + struct stack_entry stack_entries[MAX_STACE_TRACE_ENTRIES]; + unsigned long entries[MAX_TRACE_ENTRIES]; + + char comms[MAX_STACE_TRACE_ENTRIES][TASK_COMM_LEN]; + pid_t pids[MAX_STACE_TRACE_ENTRIES]; + u64 duration[MAX_STACE_TRACE_ENTRIES]; + u64 stamp[MAX_STACE_TRACE_ENTRIES]; +}; + +struct noschedule_info { + struct tracepoint_entry tp_entries[NUM_TRACEPOINTS]; + unsigned int tp_initalized; + + struct per_cpu_stack_trace __percpu *stack_trace; +}; + +static int nosched_ref; + +/* Whether to enable the tracker. */ +static bool trace_enable; + +/* Default sampling period is 4 000 000ns. The minimum value is 1000000ns. */ +static u64 sampling_period = 4 * 1000 * 1000UL; + +/** + * How many nanoseconds should we record the stack trace. + * Default is 10 000 000ns. + */ +static u64 duration_threshold = 10 * 1000 * 1000UL; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) +static void (*save_stack_trace_effective)(struct pt_regs *regs, + struct stack_trace *trace); + +static inline void stack_trace_skip_hardirq_init(void) +{ + save_stack_trace_effective = + (void *)__kallsyms_lookup_name("save_stack_trace_regs"); +} + +static inline void store_stack_trace(struct pt_regs *regs, + struct stack_entry *stack_entry, + unsigned long *entries, + unsigned int max_entries, int skip) +{ + struct stack_trace stack_trace; + + stack_trace.nr_entries = 0; + stack_trace.max_entries = max_entries; + stack_trace.entries = entries; + stack_trace.skip = skip; + + if (likely(regs && save_stack_trace_effective)) + save_stack_trace_effective(regs, &stack_trace); + else + save_stack_trace(&stack_trace); + + stack_entry->entries = entries; + stack_entry->nr_entries = stack_trace.nr_entries; + + /* + * Some daft arches put -1 at the end to indicate its a full trace. + * + * this is buggy anyway, since it takes a whole extra entry so a + * complete trace that maxes out the entries provided will be reported + * as incomplete, friggin useless . + */ + if (stack_entry->nr_entries != 0 && + stack_entry->entries[stack_entry->nr_entries - 1] == ULONG_MAX) + stack_entry->nr_entries--; +} +#else +static unsigned int (*stack_trace_save_skip_hardirq)(struct pt_regs *regs, + unsigned long *store, + unsigned int size, + unsigned int skipnr); + +static inline void stack_trace_skip_hardirq_init(void) +{ + stack_trace_save_skip_hardirq = + (void *)__kallsyms_lookup_name("stack_trace_save_regs"); +} + +static inline void store_stack_trace(struct pt_regs *regs, + struct stack_entry *stack_entry, + unsigned long *entries, + unsigned int max_entries, int skip) +{ + stack_entry->entries = entries; + if (regs && stack_trace_save_skip_hardirq) + stack_entry->nr_entries = stack_trace_save_skip_hardirq(regs, + entries, max_entries, skip); + else + stack_entry->nr_entries = stack_trace_save(entries, max_entries, + skip); +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +static struct tracepoint **nosch__start___tracepoints_ptrs; +static struct tracepoint **nosch__stop___tracepoints_ptrs; + +static int nosch_init_local_tracepoints(void) +{ + nosch__start___tracepoints_ptrs = (void *)__kallsyms_lookup_name("__start___tracepoints_ptrs"); + nosch__stop___tracepoints_ptrs = (void *)__kallsyms_lookup_name("__stop___tracepoints_ptrs"); + if (nosch__start___tracepoints_ptrs == NULL || nosch__stop___tracepoints_ptrs == NULL) { + return -1; + } + return 0; +} + +static void nosch_for_each_tracepoint_range(struct tracepoint * const *begin, + struct tracepoint * const *end, + void (*fct)(struct tracepoint *tp, void *priv), + void *priv) +{ + struct tracepoint * const *iter; + + if (!begin) + return; + for (iter = begin; iter < end; iter++) + fct(*iter, priv); +} + +/** + * nosch_for_each_kernel_tracepoint - iteration on all kernel tracepoints + * @fct: callback + * @priv: private data + */ +void nosch_for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), + void *priv) +{ + nosch_for_each_tracepoint_range(nosch__start___tracepoints_ptrs, + nosch__stop___tracepoints_ptrs, fct, priv); +} +#endif + +static bool __stack_trace_record(struct per_cpu_stack_trace *stack_trace, + struct pt_regs *regs, u64 duration) +{ + unsigned int nr_entries, nr_stack_entries; + struct stack_entry *stack_entry; + + nr_stack_entries = stack_trace->nr_stack_entries; + if (nr_stack_entries >= ARRAY_SIZE(stack_trace->stack_entries)) + return false; + + nr_entries = stack_trace->nr_entries; + if (nr_entries >= ARRAY_SIZE(stack_trace->entries)) + return false; + + /* Save the thread command, pid and duration. */ + strlcpy(stack_trace->comms[nr_stack_entries], current->comm, + TASK_COMM_LEN); + stack_trace->pids[nr_stack_entries] = current->pid; + stack_trace->duration[nr_stack_entries] = duration; + stack_trace->stamp[nr_stack_entries] = stack_trace->last_timestamp/1000; + + stack_entry = stack_trace->stack_entries + nr_stack_entries; + store_stack_trace(regs, stack_entry, stack_trace->entries + nr_entries, + ARRAY_SIZE(stack_trace->entries) - nr_entries, 0); + stack_trace->nr_entries += stack_entry->nr_entries; + + /** + * Ensure that the initialisation of @stack_entry is complete before we + * update the @nr_stack_entries. + */ + smp_store_release(&stack_trace->nr_stack_entries, nr_stack_entries + 1); + + if (unlikely(stack_trace->nr_entries >= + ARRAY_SIZE(stack_trace->entries))) { + pr_info("BUG: MAX_TRACE_ENTRIES too low on cpu: %d!\n", + smp_processor_id()); + + return false; + } + + return true; +} + +/* Note: Must be called with irq disabled. */ +static inline bool stack_trace_record(struct per_cpu_stack_trace *stack_trace, + u64 delta) +{ + if (unlikely(delta >= duration_threshold)) + return __stack_trace_record(stack_trace, get_irq_regs(), delta); + + return false; +} + +static enum hrtimer_restart trace_nosched_hrtimer_handler(struct hrtimer *hrtimer) +{ + struct pt_regs *regs = get_irq_regs(); + struct per_cpu_stack_trace *stack_trace; + u64 now = local_clock(); + + stack_trace = container_of(hrtimer, struct per_cpu_stack_trace, + hrtimer); + /** + * Skip the idle task and make sure we are not only the + * running task on the CPU. If we are interrupted from + * user mode, it indicate that we are not executing in + * the kernel space, so we should also skip it. + */ + if (!is_idle_task(current) && regs && !user_mode(regs) && + !single_task_running()) { + u64 delta; + + delta = now - stack_trace->last_timestamp; + if (!stack_trace->skip && stack_trace_record(stack_trace, delta)) + stack_trace->skip = current; + } else { + stack_trace->last_timestamp = now; + } + + hrtimer_forward_now(hrtimer, ns_to_ktime(sampling_period)); + + return HRTIMER_RESTART; +} + +/* interrupts should be disabled from __schedule() */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +static void probe_sched_switch(void *priv, + struct task_struct *prev, + struct task_struct *next) +#else +static void probe_sched_switch(void *priv, bool preempt, + struct task_struct *prev, + struct task_struct *next) +#endif +{ + u64 now = local_clock(); + struct per_cpu_stack_trace __percpu *stack_trace = priv; + struct per_cpu_stack_trace *cpu_stack_trace = this_cpu_ptr(stack_trace); + u64 last = cpu_stack_trace->last_timestamp; + + if (unlikely(!trace_enable)) + return; + + cpu_stack_trace->last_timestamp = now; + if (unlikely(cpu_stack_trace->skip)) { + unsigned int index = cpu_stack_trace->nr_stack_entries - 1; + + cpu_stack_trace->skip = NULL; + cpu_stack_trace->duration[index] = now - last; + } + +} + +static struct noschedule_info nosched_info = { + .tp_entries = { + [0] = { + .name = "sched_switch", + .probe = probe_sched_switch, + }, + }, + .tp_initalized = 0, +}; + +static inline bool is_tracepoint_lookup_success(struct noschedule_info *info) +{ + return info->tp_initalized == ARRAY_SIZE(info->tp_entries); +} + +static void tracepoint_lookup(struct tracepoint *tp, void *priv) +{ + int i; + struct noschedule_info *info = priv; + + if (is_tracepoint_lookup_success(info)) + return; + + for (i = 0; i < ARRAY_SIZE(info->tp_entries); i++) { + if (info->tp_entries[i].tp || !info->tp_entries[i].name || + strcmp(tp->name, info->tp_entries[i].name)) + continue; + info->tp_entries[i].tp = tp; + info->tp_initalized++; + } +} + +static int threshold_show(struct seq_file *m, void *ptr) +{ + seq_printf(m, "%llu ms\n", duration_threshold/(1000*1000)); + + return 0; +} + +static ssize_t threshold_store(void *priv, const char __user *buf, size_t count) +{ + u64 val; + + if (kstrtou64_from_user(buf, count, 0, &val)) + return -EINVAL; + + duration_threshold = val; + + return count; +} +DEFINE_PROC_ATTRIBUTE_RW(threshold); + +static int enable_show(struct seq_file *m, void *ptr) +{ + seq_printf(m, "%s\n", trace_enable ? "enabled" : "disabled"); + + return 0; +} + +static void each_hrtimer_start(void *priv) +{ + u64 now = local_clock(); + struct per_cpu_stack_trace __percpu *stack_trace = priv; + struct hrtimer *hrtimer = this_cpu_ptr(&stack_trace->hrtimer); + + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_PINNED); + hrtimer->function = trace_nosched_hrtimer_handler; + + __this_cpu_write(stack_trace->last_timestamp, now); + + hrtimer_start_range_ns(hrtimer, ns_to_ktime(sampling_period), 0, + HRTIMER_MODE_REL_PINNED); +} + +static inline void trace_nosched_hrtimer_start(void) +{ + on_each_cpu(each_hrtimer_start, nosched_info.stack_trace, true); +} + +static inline void trace_nosched_hrtimer_cancel(void) +{ + int cpu; + + for_each_online_cpu(cpu) + hrtimer_cancel(per_cpu_ptr(&nosched_info.stack_trace->hrtimer, + cpu)); +} + +static int trace_nosched_register_tp(void) +{ + int i; + struct noschedule_info *info = &nosched_info; + + for (i = 0; i < ARRAY_SIZE(info->tp_entries); i++) { + int ret; + struct tracepoint_entry *entry = info->tp_entries + i; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + ret = tracepoint_probe_register(entry->tp->name, entry->probe, + info->stack_trace); +#else + ret = tracepoint_probe_register(entry->tp, entry->probe, + info->stack_trace); +#endif + if (ret && ret != -EEXIST) { + pr_err("sched trace: can not activate tracepoint " + "probe to %s with error code: %d\n", + entry->name, ret); + while (i--) { + entry = info->tp_entries + i; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + tracepoint_probe_unregister(entry->tp->name, + entry->probe, + info->stack_trace); +#else + tracepoint_probe_unregister(entry->tp, + entry->probe, + info->stack_trace); +#endif + } + return ret; + } + } + + return 0; +} + +static int trace_nosched_unregister_tp(void) +{ + int i; + struct noschedule_info *info = &nosched_info; + + for (i = 0; i < ARRAY_SIZE(info->tp_entries); i++) { + int ret; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + ret = tracepoint_probe_unregister(info->tp_entries[i].tp->name, + info->tp_entries[i].probe, + info->stack_trace); +#else + ret = tracepoint_probe_unregister(info->tp_entries[i].tp, + info->tp_entries[i].probe, + info->stack_trace); +#endif + if (ret && ret != -ENOENT) { + pr_err("sched trace: can not inactivate tracepoint " + "probe to %s with error code: %d\n", + info->tp_entries[i].name, ret); + return ret; + } + } + + return 0; +} + +static ssize_t enable_store(void *priv, const char __user *buf, size_t count) +{ + int enable; + + if (kstrtoint_from_user(buf, count, 16, &enable)) + return -EINVAL; + + if (!!enable == !!trace_enable) + return count; + + if (enable) { + if (!trace_nosched_register_tp()) { + trace_nosched_hrtimer_start(); + sysak_module_get(&nosched_ref); + } + else + return -EAGAIN; + } else { + trace_nosched_hrtimer_cancel(); + if (trace_nosched_unregister_tp()) + return -EAGAIN; + sysak_module_put(&nosched_ref); + } + + trace_enable = enable; + return count; +} +DEFINE_PROC_ATTRIBUTE_RW(enable); + +static void each_stack_trace_clear(void *priv) +{ + struct per_cpu_stack_trace __percpu *stack_trace = priv; + struct per_cpu_stack_trace *cpu_stack_trace = this_cpu_ptr(stack_trace); + + cpu_stack_trace->nr_entries = 0; + cpu_stack_trace->nr_stack_entries = 0; +} + +static inline void seq_print_stack_trace(struct seq_file *m, + struct stack_entry *entry) +{ + int i; + + if (WARN_ON(!entry->entries)) + return; + + for (i = 0; i < entry->nr_entries; i++) + seq_printf(m, "%*c%pS\n", 5, ' ', (void *)entry->entries[i]); +} + +static int stack_trace_show(struct seq_file *m, void *ptr) +{ + int cpu; + struct per_cpu_stack_trace __percpu *stack_trace = m->private; + + for_each_online_cpu(cpu) { + int i; + unsigned int nr; + struct per_cpu_stack_trace *cpu_stack_trace; + + cpu_stack_trace = per_cpu_ptr(stack_trace, cpu); + + /** + * Paired with smp_store_release() in the + * __stack_trace_record(). + */ + nr = smp_load_acquire(&cpu_stack_trace->nr_stack_entries); + if (!nr) + continue; + +// seq_printf(m, " cpu: %d\n", cpu); + + for (i = 0; i < nr; i++) { + struct stack_entry *entry; + + entry = cpu_stack_trace->stack_entries + i; + seq_printf(m, "%*ccpu:%d\tcommand:%s\tpid:%d\tlatency:%lluus\tSTAMP:%llu\n", + 5, ' ', cpu, cpu_stack_trace->comms[i], + cpu_stack_trace->pids[i], + cpu_stack_trace->duration[i] / (1000UL), + cpu_stack_trace->stamp[i]); + seq_print_stack_trace(m, entry); + seq_putc(m, '\n'); + + cond_resched(); + } + } + + return 0; +} + +static ssize_t stack_trace_store(void *priv, const char __user *buf, + size_t count) +{ + int clear; + + if (kstrtoint_from_user(buf, count, 10, &clear) || clear != 0) + return -EINVAL; + + on_each_cpu(each_stack_trace_clear, priv, true); + + return count; +} +DEFINE_PROC_ATTRIBUTE_RW(stack_trace); + +#ifdef CONFIG_DEBUG_TRACE_NOSCHED +#include + +static int nosched_test_show(struct seq_file *m, void *ptr) +{ + return 0; +} + +static ssize_t nosched_test_store(void *priv, const char __user *buf, + size_t count) +{ + int delay; + + if (kstrtoint_from_user(buf, count, 0, &delay) || delay == 0) + return -EINVAL; + + mdelay(delay); + + return count; +} +DEFINE_PROC_ATTRIBUTE_RW(nosched_test); +#endif + +int trace_noschedule_init(struct proc_dir_entry *root_dir) +{ + int ret = 0; + struct proc_dir_entry *parent_dir; + struct noschedule_info *info = &nosched_info; + + if((ret=init_symbol())) + return ret; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + if (nosch_init_local_tracepoints()) + return -ENODEV; +#endif + + stack_trace_skip_hardirq_init(); + + /* Lookup for the tracepoint that we needed */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + nosch_for_each_kernel_tracepoint(tracepoint_lookup, info); +#else + for_each_kernel_tracepoint(tracepoint_lookup, info); +#endif + + if (!is_tracepoint_lookup_success(info)) + return -ENODEV; + + info->stack_trace = alloc_percpu(struct per_cpu_stack_trace); + if (!info->stack_trace) + return -ENOMEM; + + parent_dir = proc_mkdir(PROC_DIR_NAME, root_dir); + if (!parent_dir) + goto free_buf; + if (!proc_create_data("threshold", 0644, parent_dir, &threshold_fops, + info->stack_trace)) + goto remove_proc; + if (!proc_create_data("enable", 0644, parent_dir, &enable_fops, + info->stack_trace)) + goto remove_proc; + if (!proc_create_data("stack_trace", 0, parent_dir, &stack_trace_fops, + info->stack_trace)) + goto remove_proc; +#ifdef CONFIG_DEBUG_TRACE_NOSCHED + if (!proc_create_data("nosched_test", 0644, parent_dir, + &nosched_test_fops, info->stack_trace)) + goto remove_proc; +#endif + + return 0; +remove_proc: + remove_proc_subtree(PROC_DIR_NAME, root_dir); +free_buf: + free_percpu(info->stack_trace); + + return -ENOMEM; +} + +void trace_noschedule_exit(void) +{ + if (trace_enable) { + trace_nosched_hrtimer_cancel(); + trace_nosched_unregister_tp(); + tracepoint_synchronize_unregister(); + } + free_percpu(nosched_info.stack_trace); +} diff --git a/source/lib/internal/kernel_module/modules/sched/trace_irqoff.c b/source/lib/internal/kernel_module/modules/sched/trace_irqoff.c new file mode 100644 index 0000000000000000000000000000000000000000..7a7b5707314f99c28018442401a12bc9d9b148c5 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/sched/trace_irqoff.c @@ -0,0 +1,634 @@ +#define pr_fmt(fmt) "trace-irqoff: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sysak_mods.h" +#include "proc.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) +#include +#else +#include +#endif + +#define MAX_TRACE_ENTRIES (SZ_1K / sizeof(unsigned long)) +#define PER_TRACE_ENTRIES_AVERAGE (8 + 8) + +#define MAX_STACE_TRACE_ENTRIES \ + (MAX_TRACE_ENTRIES / PER_TRACE_ENTRIES_AVERAGE) + +#define MAX_LATENCY_RECORD 10 + +static int irqoff_ref; +static bool trace_enable; + +/** + * Default sampling period is 4,000,000ns. The minimum value is 1,000,000ns. + */ +static u64 sampling_period = 4 * 1000 * 1000UL; + +/** + * How many times should we record the stack trace. + * Default is 10,000,000ns. + */ +static u64 trace_irqoff_latency = 10 * 1000 * 1000UL; + +struct irqoff_trace { + unsigned int nr_entries; + unsigned long *entries; +}; + +struct stack_trace_metadata { + u64 last_timestamp; + unsigned long nr_irqoff_trace; + struct irqoff_trace trace[MAX_STACE_TRACE_ENTRIES]; + unsigned long nr_entries; + unsigned long entries[MAX_TRACE_ENTRIES]; + unsigned long latency_count[MAX_LATENCY_RECORD]; + + /* Task command names*/ + char comms[MAX_STACE_TRACE_ENTRIES][TASK_COMM_LEN]; + + /* Task pids*/ + pid_t pids[MAX_STACE_TRACE_ENTRIES]; + + struct { + u64 nsecs:63; + u64 more:1; + } latency[MAX_STACE_TRACE_ENTRIES]; + u64 stamp[MAX_STACE_TRACE_ENTRIES]; +}; + +struct per_cpu_stack_trace { + struct timer_list timer; + struct hrtimer hrtimer; + struct stack_trace_metadata hardirq_trace; + struct stack_trace_metadata softirq_trace; + + bool softirq_delayed; +}; + +static struct per_cpu_stack_trace __percpu *cpu_stack_trace; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 1, 0) +static void (*save_stack_trace_skip_hardirq)(struct pt_regs *regs, + struct stack_trace *trace); + +static inline void stack_trace_skip_hardirq_init(void) +{ + save_stack_trace_skip_hardirq = + (void *)kallsyms_lookup_name("save_stack_trace_regs"); +} + +static inline void store_stack_trace(struct pt_regs *regs, + struct irqoff_trace *trace, + unsigned long *entries, + unsigned int max_entries, int skip) +{ + struct stack_trace stack_trace; + + stack_trace.nr_entries = 0; + stack_trace.max_entries = max_entries; + stack_trace.entries = entries; + stack_trace.skip = skip; + + if (regs && save_stack_trace_skip_hardirq) + save_stack_trace_skip_hardirq(regs, &stack_trace); + else + save_stack_trace(&stack_trace); + + trace->entries = entries; + trace->nr_entries = stack_trace.nr_entries; + + /* + * Some daft arches put -1 at the end to indicate its a full trace. + * + * this is buggy anyway, since it takes a whole extra entry so a + * complete trace that maxes out the entries provided will be reported + * as incomplete, friggin useless . + */ + if (trace->nr_entries != 0 && + trace->entries[trace->nr_entries - 1] == ULONG_MAX) + trace->nr_entries--; +} +#else +static unsigned int (*stack_trace_save_skip_hardirq)(struct pt_regs *regs, + unsigned long *store, + unsigned int size, + unsigned int skipnr); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0) +static inline void stack_trace_skip_hardirq_init(void) +{ + stack_trace_save_skip_hardirq = + (void *)kallsyms_lookup_name("stack_trace_save_regs"); +} +#else /* LINUX_VERSION_CODE */ + +static int noop_pre_handler(struct kprobe *p, struct pt_regs *regs){ + return 0; +} + +/** + * + * We can only find the kallsyms_lookup_name's addr by using kprobes, then use + * the unexported kallsyms_lookup_name to find symbols. + */ +static void stack_trace_skip_hardirq_init(void) +{ + int ret; + struct kprobe kp; + unsigned long (*kallsyms_lookup_name_fun)(const char *name); + + + ret = -1; + kp.symbol_name = "kallsyms_lookup_name"; + kp.pre_handler = noop_pre_handler; + stack_trace_save_skip_hardirq = NULL; + + ret = register_kprobe(&kp); + if (ret < 0) { + return; + } + + kallsyms_lookup_name_fun = (void*)kp.addr; + unregister_kprobe(&kp); + + stack_trace_save_skip_hardirq = + (void *)kallsyms_lookup_name_fun("stack_trace_save_regs"); +} +#endif /* LINUX_VERSION_CODE */ + +static inline void store_stack_trace(struct pt_regs *regs, + struct irqoff_trace *trace, + unsigned long *entries, + unsigned int max_entries, int skip) +{ + trace->entries = entries; + if (regs && stack_trace_save_skip_hardirq) + trace->nr_entries = stack_trace_save_skip_hardirq(regs, entries, + max_entries, + skip); + else + trace->nr_entries = stack_trace_save(entries, max_entries, + skip); +} +#endif + +/** + * Note: Must be called with irq disabled. + */ +static bool save_trace(struct pt_regs *regs, bool hardirq, u64 latency, u64 stamp) +{ + unsigned long nr_entries, nr_irqoff_trace; + struct irqoff_trace *trace; + struct stack_trace_metadata *stack_trace; + + stack_trace = hardirq ? this_cpu_ptr(&cpu_stack_trace->hardirq_trace) : + this_cpu_ptr(&cpu_stack_trace->softirq_trace); + + nr_irqoff_trace = stack_trace->nr_irqoff_trace; + if (unlikely(nr_irqoff_trace >= MAX_STACE_TRACE_ENTRIES)) + return false; + + nr_entries = stack_trace->nr_entries; + if (unlikely(nr_entries >= MAX_TRACE_ENTRIES - 1)) + return false; + + strlcpy(stack_trace->comms[nr_irqoff_trace], current->comm, + TASK_COMM_LEN); + stack_trace->pids[nr_irqoff_trace] = current->pid; + stack_trace->latency[nr_irqoff_trace].nsecs = latency; + stack_trace->latency[nr_irqoff_trace].more = !hardirq && regs; + stack_trace->stamp[nr_irqoff_trace] = stamp; + + trace = stack_trace->trace + nr_irqoff_trace; + store_stack_trace(regs, trace, stack_trace->entries + nr_entries, + MAX_TRACE_ENTRIES - nr_entries, 0); + stack_trace->nr_entries += trace->nr_entries; + + /** + * Ensure that the initialisation of @trace is complete before we + * update the @nr_irqoff_trace. + */ + smp_store_release(&stack_trace->nr_irqoff_trace, nr_irqoff_trace + 1); + + if (unlikely(stack_trace->nr_entries >= MAX_TRACE_ENTRIES - 1)) { + pr_info("BUG: MAX_TRACE_ENTRIES too low!"); + + return false; + } + + return true; +} + +static bool trace_irqoff_record(u64 delta, bool hardirq, bool skip, u64 stamp) +{ + int index = 0; + u64 throttle = sampling_period << 1; + u64 delta_old = delta; + + if (delta < throttle) + return false; + + delta >>= 1; + while (delta > throttle) { + index++; + delta >>= 1; + } + + if (unlikely(index >= MAX_LATENCY_RECORD)) + index = MAX_LATENCY_RECORD - 1; + + if (hardirq) + __this_cpu_inc(cpu_stack_trace->hardirq_trace.latency_count[index]); + else if (!skip) + __this_cpu_inc(cpu_stack_trace->softirq_trace.latency_count[index]); + + if (unlikely(delta_old >= trace_irqoff_latency)) + save_trace(skip ? get_irq_regs() : NULL, hardirq, delta_old, stamp); + + return true; +} + +static enum hrtimer_restart trace_irqoff_hrtimer_handler(struct hrtimer *hrtimer) +{ + u64 now = local_clock(), delta, stamp; + + stamp = __this_cpu_read(cpu_stack_trace->hardirq_trace.last_timestamp); + delta = now - stamp; + __this_cpu_write(cpu_stack_trace->hardirq_trace.last_timestamp, now); + + if (trace_irqoff_record(delta, true, true, stamp)) { + __this_cpu_write(cpu_stack_trace->softirq_trace.last_timestamp, + now); + } else if (!__this_cpu_read(cpu_stack_trace->softirq_delayed)) { + u64 delta_soft; + + stamp = __this_cpu_read(cpu_stack_trace->softirq_trace.last_timestamp); + delta_soft = now - stamp; + + if (unlikely(delta_soft >= trace_irqoff_latency)) { + __this_cpu_write(cpu_stack_trace->softirq_delayed, true); + trace_irqoff_record(delta_soft, false, true, stamp); + } + } + + hrtimer_forward_now(hrtimer, ns_to_ktime(sampling_period)); + + return HRTIMER_RESTART; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) +static void trace_irqoff_timer_handler(unsigned long data) +#else +static void trace_irqoff_timer_handler(struct timer_list *timer) +#endif +{ + u64 now = local_clock(), delta, stamp; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) + struct timer_list *timer = (struct timer_list *)data; +#endif + + stamp = __this_cpu_read(cpu_stack_trace->softirq_trace.last_timestamp); + delta = now - stamp; + __this_cpu_write(cpu_stack_trace->softirq_trace.last_timestamp, now); + + __this_cpu_write(cpu_stack_trace->softirq_delayed, false); + + trace_irqoff_record(delta, false, false, stamp); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) + mod_timer_pinned(timer, + jiffies + msecs_to_jiffies(sampling_period / 1000000UL)); +#else + mod_timer(timer, + jiffies + msecs_to_jiffies(sampling_period / 1000000UL)); +#endif +} + +static void smp_clear_stack_trace(void *info) +{ + int i; + struct per_cpu_stack_trace *stack_trace = info; + + stack_trace->hardirq_trace.nr_entries = 0; + stack_trace->hardirq_trace.nr_irqoff_trace = 0; + stack_trace->softirq_trace.nr_entries = 0; + stack_trace->softirq_trace.nr_irqoff_trace = 0; + + for (i = 0; i < MAX_LATENCY_RECORD; i++) { + stack_trace->hardirq_trace.latency_count[i] = 0; + stack_trace->softirq_trace.latency_count[i] = 0; + } +} + +static void smp_timers_start(void *info) +{ + u64 now = local_clock(); + struct per_cpu_stack_trace *stack_trace = info; + struct hrtimer *hrtimer = &stack_trace->hrtimer; + struct timer_list *timer = &stack_trace->timer; + + stack_trace->hardirq_trace.last_timestamp = now; + stack_trace->softirq_trace.last_timestamp = now; + + hrtimer_start_range_ns(hrtimer, ns_to_ktime(sampling_period), + 0, HRTIMER_MODE_REL_PINNED); + + timer->expires = jiffies + msecs_to_jiffies(sampling_period / 1000000UL); + add_timer_on(timer, smp_processor_id()); +} + + +static void seq_print_stack_trace(struct seq_file *m, struct irqoff_trace *trace) +{ + int i; + + if (WARN_ON(!trace->entries)) + return; + + for (i = 0; i < trace->nr_entries; i++) + seq_printf(m, "%*c%pS\n", 5, ' ', (void *)trace->entries[i]); +} + +static void trace_latency_show_one(struct seq_file *m, void *v, bool hardirq) +{ + int cpu; + + for_each_online_cpu(cpu) { + int i; + unsigned long nr_irqoff_trace; + struct stack_trace_metadata *stack_trace; + + stack_trace = hardirq ? + per_cpu_ptr(&cpu_stack_trace->hardirq_trace, cpu) : + per_cpu_ptr(&cpu_stack_trace->softirq_trace, cpu); + + /** + * Paired with smp_store_release() in the save_trace(). + */ + nr_irqoff_trace = smp_load_acquire(&stack_trace->nr_irqoff_trace); + + if (!nr_irqoff_trace) + continue; + + for (i = 0; i < nr_irqoff_trace; i++) { + struct irqoff_trace *trace = stack_trace->trace + i; + + seq_printf(m, "%*ccpu:%d\tcommand:%s\tpid:%d\tlatency:%lu%s\tSTAMP:%llu\n", + 5, ' ', cpu, stack_trace->comms[i], + stack_trace->pids[i], + stack_trace->latency[i].nsecs / (1000 * 1000UL), + stack_trace->latency[i].more ? "+ms" : "ms", + stack_trace->stamp[i]); + seq_print_stack_trace(m, trace); + seq_putc(m, '\n'); + + cond_resched(); + } + } +} + +static int trace_latency_show(struct seq_file *m, void *v) +{ + int cpu; + seq_printf(m, "trace_irqoff_latency: %llums\n\n", + trace_irqoff_latency / (1000 * 1000UL)); + + seq_puts(m, " hardirq:\n"); + trace_latency_show_one(m, v, true); + + seq_puts(m, " softirq:\n"); + trace_latency_show_one(m, v, false); + + for_each_online_cpu(cpu) + smp_call_function_single(cpu, smp_clear_stack_trace, + per_cpu_ptr(cpu_stack_trace, cpu), + true); + return 0; +} + + +static ssize_t trace_latency_store(void *priv, const char __user *buf, size_t count) +{ + u64 latency; + + if (kstrtou64_from_user(buf, count, 0, &latency)) + return -EINVAL; + + if (latency == 0) { + int cpu; + + for_each_online_cpu(cpu) + smp_call_function_single(cpu, smp_clear_stack_trace, + per_cpu_ptr(cpu_stack_trace, cpu), + true); + return count; + } else if (latency < (sampling_period << 1) / (1000 * 1000UL)) + return -EINVAL; + + trace_irqoff_latency = latency; + + return count; +} + +DEFINE_PROC_ATTRIBUTE_RW(trace_latency); + +static void trace_irqoff_start_timers(void) +{ + int cpu; + + for_each_online_cpu(cpu) { + struct hrtimer *hrtimer; + struct timer_list *timer; + + hrtimer = per_cpu_ptr(&cpu_stack_trace->hrtimer, cpu); + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_PINNED); + hrtimer->function = trace_irqoff_hrtimer_handler; + + timer = per_cpu_ptr(&cpu_stack_trace->timer, cpu); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) + __setup_timer(timer, trace_irqoff_timer_handler, + (unsigned long)timer, TIMER_IRQSAFE); +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0) + timer->flags = TIMER_PINNED | TIMER_IRQSAFE; + setup_timer(timer, trace_irqoff_timer_handler, + (unsigned long)timer); +#else + timer_setup(timer, trace_irqoff_timer_handler, + TIMER_PINNED | TIMER_IRQSAFE); +#endif + + smp_call_function_single(cpu, smp_timers_start, + per_cpu_ptr(cpu_stack_trace, cpu), + true); + } +} + +static void trace_irqoff_cancel_timers(void) +{ + int cpu; + + for_each_online_cpu(cpu) { + struct hrtimer *hrtimer; + struct timer_list *timer; + + hrtimer = per_cpu_ptr(&cpu_stack_trace->hrtimer, cpu); + hrtimer_cancel(hrtimer); + + timer = per_cpu_ptr(&cpu_stack_trace->timer, cpu); + del_timer_sync(timer); + } +} + +static int enable_show(struct seq_file *m, void *ptr) +{ + seq_printf(m, "%s\n", trace_enable ? "enabled" : "disabled"); + + return 0; +} + +static ssize_t enable_store(void *priv, const char __user *buf, size_t count) +{ + bool enable; + + if (kstrtobool_from_user(buf, count, &enable)) + return -EINVAL; + + if (!!enable == !!trace_enable) + return count; + + if (enable) { + trace_irqoff_start_timers(); + sysak_module_get(&irqoff_ref); + } + else { + trace_irqoff_cancel_timers(); + sysak_module_put(&irqoff_ref); + } + + trace_enable = enable; + + return count; +} +DEFINE_PROC_ATTRIBUTE_RW(enable); + +static int sampling_period_show(struct seq_file *m, void *ptr) +{ + seq_printf(m, "%llums\n", sampling_period / (1000 * 1000UL)); + + return 0; +} + +static ssize_t sampling_period_store(void *priv, const char __user *buf, size_t count) +{ + unsigned long period; + + if (trace_enable) + return -EINVAL; + + if (kstrtoul_from_user(buf, count, 0, &period)) + return -EINVAL; + + period *= 1000 * 1000UL; + if (period > (trace_irqoff_latency >> 1)) + trace_irqoff_latency = period << 1; + + sampling_period = period; + + return count; +} +DEFINE_PROC_ATTRIBUTE_RW(sampling_period); + + +extern int trace_noschedule_init(struct proc_dir_entry *root_dir); +extern void trace_noschedule_exit(void); +extern int trace_runqlat_init(struct proc_dir_entry *root_dir); +extern void trace_runqlat_exit(void); + +int trace_irqoff_init(void) +{ + int ret; + struct proc_dir_entry *root_dir = NULL; + struct proc_dir_entry *parent_dir; + + cpu_stack_trace = alloc_percpu(struct per_cpu_stack_trace); + if (!cpu_stack_trace) + return -ENOMEM; + + stack_trace_skip_hardirq_init(); + + root_dir = sysak_proc_mkdir("runlatency"); + if (!root_dir) { + ret = -ENOMEM; + goto free_percpu; + } + + parent_dir = proc_mkdir("irqoff", root_dir); + if (!parent_dir) { + ret = -ENOMEM; + goto remove_root; + } + + if (!proc_create("latency", S_IRUSR | S_IWUSR, parent_dir, + &trace_latency_fops)){ + ret = -ENOMEM; + goto remove_proc; + } + + if (!proc_create("enable", S_IRUSR | S_IWUSR, parent_dir, &enable_fops)){ + ret = -ENOMEM; + goto remove_proc; + } + + + if (!proc_create("period", S_IRUSR | S_IWUSR, parent_dir, + &sampling_period_fops)){ + ret = -ENOMEM; + goto remove_proc; + } + + ret = trace_noschedule_init(root_dir); + if (ret){ + goto remove_proc; + } + + ret = trace_runqlat_init(root_dir); + if (ret){ + trace_noschedule_exit(); + goto remove_proc; + } + + return 0; + +remove_proc: + remove_proc_subtree("irqoff", root_dir); +remove_root: + sysak_remove_proc_entry("runlatency"); +free_percpu: + free_percpu(cpu_stack_trace); + + return -ENOMEM; +} + +void trace_irqoff_exit(void) +{ + if (trace_enable) + trace_irqoff_cancel_timers(); + trace_noschedule_exit(); + trace_runqlat_exit(); + free_percpu(cpu_stack_trace); +} + diff --git a/source/lib/internal/kernel_module/modules/sched/trace_runqlat.c b/source/lib/internal/kernel_module/modules/sched/trace_runqlat.c new file mode 100644 index 0000000000000000000000000000000000000000..7879f77a5d03548842ca4a52b9152196c0a854b2 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/sched/trace_runqlat.c @@ -0,0 +1,675 @@ +#define pr_fmt(fmt) "runqlat: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sysak_mods.h" +#include "proc.h" + +static unsigned long (*__kallsyms_lookup_name)(const char *name); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,7,0) +#include +static struct kprobe kprobe_kallsyms_lookup_name = { + .symbol_name = "kallsyms_lookup_name" +}; + +static int init_symbol(void) +{ + int ret = -ENODEV; + + ret = register_kprobe(&kprobe_kallsyms_lookup_name); + if (!ret) { + __kallsyms_lookup_name = (void *)kprobe_kallsyms_lookup_name.addr; + unregister_kprobe(&kprobe_kallsyms_lookup_name); + + pr_info("kallsyms_lookup_name is %px\n", __kallsyms_lookup_name); + if (!__kallsyms_lookup_name) + return -ENODEV; + } + + return ret; +} +#else +static int init_symbol(void) +{ + __kallsyms_lookup_name = kallsyms_lookup_name; + return 0; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) +#include +#else +#include +#include +#endif + +#define MAX_TRACE_ENTRIES 128 +#define PER_TRACE_ENTRY_TASKS 16 +#define MAX_TRACE_ENTRY_TASKS \ + (MAX_TRACE_ENTRIES * PER_TRACE_ENTRY_TASKS) + +/* 20ms */ +#define THRESHOLD_DEFAULT (20*1000*1000UL) + +#define INVALID_PID -1 +#define INVALID_CPU -1 +#define PROBE_TRACEPOINTS 4 + +/** + * If we call register_trace_sched_{wakeup,wakeup_new,switch,migrate_task}() + * directly in a kernel module, the compiler will complain about undefined + * symbol of __tracepoint_sched_{wakeup, wakeup_new, switch, migrate_task} + * because the kernel do not export the tracepoint symbol. Here is a workaround + * via for_each_kernel_tracepoint() to lookup the tracepoint and save. + */ +struct tracepoints_probe { + struct tracepoint *tps[PROBE_TRACEPOINTS]; + const char *tp_names[PROBE_TRACEPOINTS]; + void *tp_probes[PROBE_TRACEPOINTS]; + void *priv; + int num_initalized; +}; + +struct task_entry { + u64 runtime; + pid_t pid; + char comm[TASK_COMM_LEN]; +}; + +struct trace_entry { + int cpu; + pid_t pid; + char comm[TASK_COMM_LEN]; + u64 latency; + u64 rq_start; + unsigned int nr_tasks; + struct task_entry *entries; +}; + +struct runqlat_info { + int cpu; /* The target CPU */ + pid_t pid; /* Trace this pid only */ + char comm[TASK_COMM_LEN]; /* target task's comm */ + u64 rq_start; + u64 run_start; + u64 threshold; + struct task_struct *curr; + + unsigned int nr_trace; + struct trace_entry *trace_entries; + + unsigned int nr_task; + struct task_entry *task_entries; + + arch_spinlock_t lock; +}; + +static struct runqlat_info runqlat_info = { + .pid = INVALID_PID, + .cpu = INVALID_CPU, + .threshold = THRESHOLD_DEFAULT, + .lock = __ARCH_SPIN_LOCK_UNLOCKED, +}; + +static int runqlat_ref; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +static struct tracepoint **runq__start___tracepoints_ptrs; +static struct tracepoint **runq__stop___tracepoints_ptrs; + +static int runq_init_local_tracepoints(void) +{ + runq__start___tracepoints_ptrs = (void *)kallsyms_lookup_name("__start___tracepoints_ptrs"); + runq__stop___tracepoints_ptrs = (void *)kallsyms_lookup_name("__stop___tracepoints_ptrs"); + if (runq__start___tracepoints_ptrs == NULL || runq__stop___tracepoints_ptrs == NULL) { + return -1; + } + return 0; +} + +static void runq_for_each_tracepoint_range(struct tracepoint * const *begin, + struct tracepoint * const *end, + void (*fct)(struct tracepoint *tp, void *priv), + void *priv) +{ + struct tracepoint * const *iter; + + if (!begin) + return; + for (iter = begin; iter < end; iter++) + fct(*iter, priv); +} + +/** + * for_each_kernel_tracepoint - iteration on all kernel tracepoints + * @fct: callback + * @priv: private data + */ +void runq_for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), + void *priv) +{ + runq_for_each_tracepoint_range(runq__start___tracepoints_ptrs, + runq__stop___tracepoints_ptrs, fct, priv); +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +static void probe_sched_wakeup(void *priv, struct task_struct *p, int success) +#else +static void probe_sched_wakeup(void *priv, struct task_struct *p) +#endif +{ + struct runqlat_info *info = priv; + + if (p->pid != info->pid) + return; + + /* interrupts should be off from try_to_wake_up() */ + arch_spin_lock(&info->lock); + if (unlikely(p->pid != info->pid)) { + arch_spin_unlock(&info->lock); + return; + } + + info->rq_start = local_clock(); + info->run_start = info->rq_start; + info->cpu = task_cpu(p); + arch_spin_unlock(&info->lock); +} + +static inline void runqlat_info_reset(struct runqlat_info *info) +{ + info->rq_start = 0; + info->run_start = 0; + info->cpu = INVALID_CPU; + info->curr = NULL; +} + +/* Must be called with @info->lock held */ +static void record_task(struct runqlat_info *info, struct task_struct *p, + u64 runtime) + __must_hold(&info->lock) +{ + struct task_entry *task; + struct trace_entry *trace; + + task = info->task_entries + info->nr_task; + trace = info->trace_entries + info->nr_trace; + + if (trace->nr_tasks == 0) + trace->entries = task; + WARN_ON_ONCE(trace->entries != task - trace->nr_tasks); + trace->nr_tasks++; + + task->pid = p->pid; + task->runtime = runtime; + strncpy(task->comm, p->comm, TASK_COMM_LEN); + + info->nr_task++; + if (unlikely(info->nr_task >= MAX_TRACE_ENTRY_TASKS)) { + pr_info("BUG: MAX_TRACE_ENTRY_TASKS too low!"); + runqlat_info_reset(info); + /* Force disable trace */ + info->pid = INVALID_PID; + } +} + +/* Must be called with @info->lock held */ +static bool record_task_commit(struct runqlat_info *info, u64 latency) + __must_hold(&info->lock) +{ + struct trace_entry *trace; + + trace = info->trace_entries + info->nr_trace; + if (trace->nr_tasks == 0) + return false; + + if (latency >= info->threshold) { + trace->latency = latency; + trace->rq_start = info->rq_start; + trace->cpu = info->cpu; + trace->pid = info->pid; + strncpy(trace->comm, info->comm, TASK_COMM_LEN); + info->nr_trace++; + if (unlikely(info->nr_trace >= MAX_TRACE_ENTRIES)) { + pr_info("BUG: MAX_TRACE_ENTRIES too low!"); + runqlat_info_reset(info); + /* Force disable trace */ + info->pid = INVALID_PID; + } + } else { + info->nr_task -= trace->nr_tasks; + trace->nr_tasks = 0; + trace->entries = NULL; + } + + return true; +} + +/* interrupts should be off from __schedule() */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +static void probe_sched_switch(void *priv, + struct task_struct *prev, + struct task_struct *next) +#else +static void probe_sched_switch(void *priv, bool preempt, + struct task_struct *prev, + struct task_struct *next) +#endif +{ + struct runqlat_info *info = priv; + int cpu = smp_processor_id(); + arch_spinlock_t *lock = &info->lock; + + if (info->pid == INVALID_PID) + return; + + if (info->cpu != INVALID_CPU && info->cpu != cpu) + return; + + if (READ_ONCE(info->cpu) == INVALID_CPU) { + if (READ_ONCE(info->pid) != prev->pid || +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0) + prev->__state != TASK_RUNNING) +#elif LINUX_VERSION_CODE == KERNEL_VERSION(4, 18, 0) + prev->__state != TASK_RUNNING) +#else + prev->state != TASK_RUNNING) + return; +#endif + + arch_spin_lock(lock); + /* We could race with grabbing lock */ + if (unlikely(info->cpu != INVALID_CPU || + info->pid != prev->pid)) { + arch_spin_unlock(lock); + return; + } + info->rq_start = cpu_clock(cpu); + info->run_start = info->rq_start; + info->cpu = task_cpu(prev); + + /* update curr for migrate task probe using*/ + if (!is_idle_task(next)) + info->curr = next; + arch_spin_unlock(lock); + } else { + u64 now; + + if (unlikely(READ_ONCE(info->cpu) != cpu || + READ_ONCE(info->pid) == INVALID_PID)) + return; + + arch_spin_lock(lock); + /* We could race with grabbing lock */ + if (unlikely(info->cpu != cpu || info->pid == INVALID_PID)) { + arch_spin_unlock(lock); + return; + } + + /* update curr for migrate task probe using*/ + if (!is_idle_task(next)) + info->curr = next; + + now = cpu_clock(cpu); + if (info->pid == next->pid) { + if (info->run_start) + record_task(info, prev, now - info->run_start); + record_task_commit(info, now - info->rq_start); + } else if (info->pid == prev->pid) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 17, 0) + if (prev->__state == TASK_RUNNING) { +#elif LINUX_VERSION_CODE == KERNEL_VERSION(4, 18, 0) + if (prev->__state == TASK_RUNNING) { +#else + if (prev->state == TASK_RUNNING) { +#endif + info->rq_start = now; + info->run_start = now; + } else { + runqlat_info_reset(info); + } + } else { + if (info->run_start) + record_task(info, prev, now - info->run_start); + info->run_start = now; + } + arch_spin_unlock(lock); + } +} + +static void probe_sched_migrate_task(void *priv, struct task_struct *p, int cpu) +{ + u64 now; + struct runqlat_info *info = priv; + struct task_struct *curr; + + if (p->pid != info->pid || info->cpu == INVALID_CPU) + return; + + /* interrupts should be off from set_task_cpu() */ + arch_spin_lock(&info->lock); + if (unlikely(p->pid != info->pid || info->cpu == INVALID_CPU)) + goto unlock; + + now = local_clock(); + curr = info->curr; + if (curr) { + get_task_struct(curr); + if (info->run_start) + record_task(info, curr, now - info->run_start); + put_task_struct(curr); + } + + info->cpu = cpu; + info->run_start = now; +unlock: + arch_spin_unlock(&info->lock); +} + +static struct tracepoints_probe tps_probe = { + .tp_names = { + "sched_wakeup", + "sched_wakeup_new", + "sched_switch", + "sched_migrate_task", + }, + .tp_probes = { + probe_sched_wakeup, + probe_sched_wakeup, + probe_sched_switch, + probe_sched_migrate_task, + }, + .priv = &runqlat_info, +}; + +static inline bool is_tracepoint_lookup_success(struct tracepoints_probe *tps) +{ + return tps->num_initalized == PROBE_TRACEPOINTS; +} + +static void tracepoint_lookup(struct tracepoint *tp, void *priv) +{ + int i; + struct tracepoints_probe *tps = priv; + + if (is_tracepoint_lookup_success(tps)) + return; + + for (i = 0; i < ARRAY_SIZE(tps->tp_names); i++) { + if (tps->tps[i] || strcmp(tp->name, tps->tp_names[i])) + continue; + tps->tps[i] = tp; + tps->num_initalized++; + } +} + +static int trace_pid_show(struct seq_file *m, void *ptr) +{ + struct runqlat_info *info = m->private; + + seq_printf(m, "%d\n", info->pid); + + return 0; +} + +static struct task_struct *loc_find_get_task_by_vpid(int nr) +{ + struct pid * pid_obj; + struct task_struct *task; + + rcu_read_lock(); + pid_obj = find_vpid(nr); + if (!pid_obj) + goto fail; + + task = pid_task(pid_obj, PIDTYPE_PID); + if (!task) + goto fail; + + get_task_struct(task); + rcu_read_unlock(); + return task; +fail: + rcu_read_unlock(); + return NULL; +} +static ssize_t trace_pid_store(void *priv, const char __user *buf, size_t count) +{ + int pid; + struct task_struct *task = NULL; + struct runqlat_info *info = priv; + + if (kstrtoint_from_user(buf, count, 0, &pid)) + return -EINVAL; + + if (info->pid != INVALID_PID && pid != INVALID_PID) + return -EPERM; + + local_irq_disable(); + arch_spin_lock(&info->lock); + if (info->pid == pid) { + if (pid == INVALID_PID) + sysak_module_put(&runqlat_ref); + goto unlock; + } + + if (pid != INVALID_PID) { + + info->nr_trace = 0; + info->nr_task = 0; + memset(info->trace_entries, 0, + MAX_TRACE_ENTRIES * sizeof(struct trace_entry) + + MAX_TRACE_ENTRY_TASKS * sizeof(struct task_entry)); + sysak_module_get(&runqlat_ref); + } else { + sysak_module_put(&runqlat_ref); + } + runqlat_info_reset(info); + smp_wmb(); + info->pid = pid; + task = loc_find_get_task_by_vpid(pid); + if (task) { + strncpy(info->comm, task->comm, TASK_COMM_LEN); + put_task_struct(task); + } else { + strncpy(info->comm, "NULL", 5); + } +unlock: + arch_spin_unlock(&info->lock); + local_irq_enable(); + + return count; +} + +DEFINE_PROC_ATTRIBUTE_RW(trace_pid); + +static int threshold_show(struct seq_file *m, void *ptr) +{ + struct runqlat_info *info = m->private; + + seq_printf(m, "%llu ms\n", info->threshold/(1000*1000)); + + return 0; +} + +static ssize_t threshold_store(void *priv, const char __user *buf, size_t count) +{ + unsigned long threshold; + struct runqlat_info *info = priv; + + if (kstrtoul_from_user(buf, count, 0, &threshold)) + return -EINVAL; + + info->threshold = threshold; + + return count; +} + +DEFINE_PROC_ATTRIBUTE_RW(threshold); + +static int runqlat_show(struct seq_file *m, void *ptr) +{ + int i, j; + struct runqlat_info *info = m->private; + + if (info->pid != INVALID_CPU) + return -EPERM; + + local_irq_disable(); + arch_spin_lock(&info->lock); + for (i = 0; i < info->nr_trace; i++) { + struct trace_entry *entry = info->trace_entries + i; + + seq_printf(m, "%*ccpu:%d\tcommand:%s\tpid:%d\tlatency:%llums\tSTAMP:%llu\trunqlen:%d\n", + 5, ' ', entry->cpu, + entry->comm, entry->pid, + entry->latency/(1000*1000), + entry->rq_start, + entry->nr_tasks); + } + arch_spin_unlock(&info->lock); + local_irq_enable(); + + return 0; +} + +static ssize_t runqlat_store(void *priv, const char __user *buf, size_t count) +{ + int clear; + struct runqlat_info *info = priv; + + if (kstrtoint_from_user(buf, count, 10, &clear) || clear != 0) + return -EINVAL; + + local_irq_disable(); + arch_spin_lock(&info->lock); + info->nr_trace = 0; + info->nr_task = 0; + memset(info->trace_entries, 0, + MAX_TRACE_ENTRIES * sizeof(struct trace_entry) + + MAX_TRACE_ENTRY_TASKS * sizeof(struct task_entry)); + + runqlat_info_reset(info); + smp_wmb(); + arch_spin_unlock(&info->lock); + local_irq_enable(); + + return count; +} + +DEFINE_PROC_ATTRIBUTE_RW(runqlat); + +int trace_runqlat_init(struct proc_dir_entry *root_dir) +{ + int i; + void *buf; + int ret = -ENOMEM; + struct tracepoints_probe *tps = &tps_probe; + struct proc_dir_entry *parent_dir; + struct runqlat_info *info = &runqlat_info; + + if((ret=init_symbol())) + return ret; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + if (runq_init_local_tracepoints()) + return -ENODEV; +#endif + + buf = vzalloc(MAX_TRACE_ENTRIES * sizeof(struct trace_entry) + + MAX_TRACE_ENTRY_TASKS * sizeof(struct task_entry)); + if (!buf) + return -ENOMEM; + info->trace_entries = buf; + info->task_entries = (void *)(info->trace_entries + MAX_TRACE_ENTRIES); + + parent_dir = proc_mkdir("runqlat", root_dir); + if (!parent_dir) + goto free_buf; + + if (!proc_create_data("pid", 0644, parent_dir, &trace_pid_fops, info)) + goto remove_proc; + + if (!proc_create_data("threshold", 0644, parent_dir, &threshold_fops, + info)) + goto remove_proc; + + if (!proc_create_data("runqlat", 0, parent_dir, &runqlat_fops, info)) + goto remove_proc; + + /* Lookup for the tracepoint that we needed */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + runq_for_each_kernel_tracepoint(tracepoint_lookup, tps); +#else + for_each_kernel_tracepoint(tracepoint_lookup, tps); +#endif + + if (!is_tracepoint_lookup_success(tps)) + goto remove_proc; + + for (i = 0; i < PROBE_TRACEPOINTS; i++) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + ret = tracepoint_probe_register(tps->tps[i]->name, tps->tp_probes[i], + tps->priv); +#else + ret = tracepoint_probe_register(tps->tps[i], tps->tp_probes[i], + tps->priv); +#endif + if (ret) { + pr_err("sched trace: can not activate tracepoint " + "probe to %s\n", tps->tp_names[i]); + while (i--) +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + tracepoint_probe_unregister(tps->tps[i]->name, + tps->tp_probes[i], + tps->priv); +#else + tracepoint_probe_unregister(tps->tps[i], + tps->tp_probes[i], + tps->priv); +#endif + goto remove_proc; + } + } + + return 0; +remove_proc: + remove_proc_subtree("runqlat", root_dir); +free_buf: + vfree(buf); + + return ret; +} + +void trace_runqlat_exit(void) +{ + int i; + struct tracepoints_probe *tps = &tps_probe; + struct runqlat_info *info = &runqlat_info; + + for (i = 0; i < PROBE_TRACEPOINTS; i++) +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + tracepoint_probe_unregister(tps->tps[i]->name, tps->tp_probes[i], + tps->priv); +#else + tracepoint_probe_unregister(tps->tps[i], tps->tp_probes[i], + tps->priv); +#endif + + tracepoint_synchronize_unregister(); + vfree(info->trace_entries); +} diff --git a/source/lib/internal/kernel_module/modules/schedtrace/schedtrace.c b/source/lib/internal/kernel_module/modules/schedtrace/schedtrace.c new file mode 100644 index 0000000000000000000000000000000000000000..88accb64bad5f12738ba9ca0ed93c21f20a9d7f8 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/schedtrace/schedtrace.c @@ -0,0 +1,259 @@ +#include +#include +#include /* regs_get_kernel_argument */ +#include /* PID_MAX_LIMIT */ +#include +#include +#include +#include +#include +#include "sysak_mods.h" +#include "proc.h" + +/* ARRAY_LEN is to define a trace buffer */ +#define ARRAY_LEN 1 +#define BUF_LEN 1024 +#define MAX_STACK_TRACE_DEPTH 8 + +struct tracepoints_probe { + struct tracepoint *tp; + char *name; +}; + +struct traceinfo { + int idx; + struct stack_trace trace[ARRAY_LEN]; + unsigned long entries[ARRAY_LEN][MAX_STACK_TRACE_DEPTH]; +}; + +static int trace_in_fly; +static int target_pid; +char buff[BUF_LEN] = {0}; +struct traceinfo traceinfos; + +struct tracepoints_probe mytp = { + .tp = NULL, + .name = "sched_switch", +}; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +static struct tracepoint **swtc__start___tracepoints_ptrs; +static struct tracepoint **swtc__stop___tracepoints_ptrs; + +static int swtc_init_local_tracepoints(void) +{ + swtc__start___tracepoints_ptrs = (void *)kallsyms_lookup_name("__start___tracepoints_ptrs"); + swtc__stop___tracepoints_ptrs = (void *)kallsyms_lookup_name("__stop___tracepoints_ptrs"); + if (swtc__start___tracepoints_ptrs == NULL || swtc__stop___tracepoints_ptrs == NULL) { + return -1; + } + return 0; +} + +static void swtc_for_each_tracepoint_range(struct tracepoint * const *begin, + struct tracepoint * const *end, + void (*fct)(struct tracepoint *tp, void *priv), + void *priv) +{ + struct tracepoint * const *iter; + + if (!begin) + return; + for (iter = begin; iter < end; iter++) + fct(*iter, priv); +} + +/** + * for_each_kernel_tracepoint - iteration on all kernel tracepoints + * @fct: callback + * @priv: private data + */ +void swtc_for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), + void *priv) +{ + swtc_for_each_tracepoint_range(swtc__start___tracepoints_ptrs, + swtc__stop___tracepoints_ptrs, fct, priv); +} +#endif +static void tracepoint_lookup(struct tracepoint *tp, void *priv) +{ + struct tracepoints_probe *tps = priv; + + if (!strcmp(tp->name, tps->name)) + tps->tp = tp; +} + +static void +(*stack_save_regs)(struct pt_regs *regs, struct stack_trace *trace); +static void +(*stack_save_tsk)(struct task_struct *tsk, struct stack_trace *trace); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) +static void trace_sched_switch(void *priv, + struct task_struct *prev, + struct task_struct *next) + +#else +static void trace_sched_switch(void *priv, bool preempt, + struct task_struct *prev, + struct task_struct *next) +#endif +{ + struct task_struct *p; + int i, size = 0; + + p = prev; + if (((pid_t)target_pid == p->pid) && (p->state)) { + struct traceinfo *tf = &traceinfos; + struct stack_trace *trace = tf->trace; + int idx = tf->idx; + + tf->idx = (idx + 1)%ARRAY_LEN; + trace->nr_entries = 0; + trace->entries = tf->entries[idx]; + trace->max_entries = MAX_STACK_TRACE_DEPTH; + trace->skip = 1; + stack_save_tsk(prev, trace); + + idx = 0; + for (i = 0; i < trace->nr_entries - 1; i++) { + if ((void *)trace->entries[i]) { + size = sprintf(&buff[idx], "<%px>", (void *)(trace->entries[i])); + idx += size; + if (idx > BUF_LEN) + break; + size = sprint_symbol(&buff[idx], trace->entries[i]); + idx += size; + if (idx > BUF_LEN) + break; + size = sprintf(&buff[idx], ","); + idx += size; + if (idx > BUF_LEN) + break; + } + } + trace_printk("%s\n", buff); + memset(trace, 0, sizeof(struct stack_trace)); + } +} + +static int pid_show(struct seq_file *m, void *v) +{ + seq_printf(m, "pid=%d\n", target_pid); + return 0; +} + +static int pid_open(struct inode *inode, struct file *file) +{ + return single_open(file, pid_show, inode->i_private); +} + +static ssize_t pid_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + if (count <= 0 || count > PID_MAX_LIMIT) + return -EINVAL; + + if (kstrtoint_from_user(buf, count, 0, &target_pid)) { + pr_warn("copy_from_user fail\n"); + return -EFAULT; + } + + if (target_pid < 0 && target_pid != -1) + return -EINVAL; + + if (target_pid == -1 && trace_in_fly) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + tracepoint_probe_unregister(mytp.name, trace_sched_switch, NULL); +#else + tracepoint_probe_unregister(mytp.tp, trace_sched_switch, NULL); +#endif + trace_in_fly = 0; + } else if (target_pid > 0 && !trace_in_fly) { + int ret; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + ret = tracepoint_probe_register(mytp.name, trace_sched_switch, NULL); +#else + ret = tracepoint_probe_register(mytp.tp, trace_sched_switch, NULL); +#endif + if (ret) + trace_in_fly = 1; + else + return ret; + } + return count; +} + +static struct file_operations pid_fops = { + .owner = THIS_MODULE, + .read = seq_read, + .open = pid_open, + .write = pid_write, + .release = seq_release, +}; + +static int proc_init(void) +{ + struct proc_dir_entry *parent; + + parent = sysak_proc_mkdir("schedtrace"); + if (!parent) + return -ENOMEM; + + if(!proc_create("pid", + S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP, + parent, + &pid_fops)) + goto proc_fail; + pr_info("proc_init schedtrace success\n"); + return 0; + +proc_fail: + sysak_remove_proc_entry("schedtrace"); + return -ENOMEM; +} + +int schedtrace_init(void) +{ + int ret = 0; + + mytp.tp = NULL; + trace_in_fly = 0; + target_pid = -1; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + if (swtc_init_local_tracepoints()) + return -ENODEV; +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + swtc_for_each_kernel_tracepoint(tracepoint_lookup, &mytp); +#else + for_each_kernel_tracepoint(tracepoint_lookup, &mytp); +#endif + stack_save_tsk = (void *)kallsyms_lookup_name("save_stack_trace_tsk"); + stack_save_regs = (void *)kallsyms_lookup_name("save_stack_trace_regs"); + + if (!stack_save_tsk || !stack_save_regs) { + ret = -EINVAL; + pr_warn("stack_save not found\n"); + goto fail; + } + + ret = proc_init(); + if (ret < 0) { + pr_warn("proc_init fail\n"); + } + +fail: + return ret; +} + +void schedtrace_exit(void) +{ + if (trace_in_fly) +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + tracepoint_probe_unregister(mytp.name, trace_sched_switch, NULL); +#else + tracepoint_probe_unregister(mytp.tp, trace_sched_switch, NULL); +#endif +} diff --git a/source/lib/internal/kernel_module/modules/signal/trace_sig.c b/source/lib/internal/kernel_module/modules/signal/trace_sig.c new file mode 100755 index 0000000000000000000000000000000000000000..b2153a5dc34c0416e2f7200e5676a1b3f6c56408 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/signal/trace_sig.c @@ -0,0 +1,220 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sysak_mods.h" +#include "hook.h" +#include "proc.h" +#include "blackbox.h" + +struct trace_sig_info { + char comm[TASK_COMM_LEN]; + int pid; + int sig; +}trace_info; + +static int sig_ref; +#define BUFFER_LEN 256 +static char process_info_buf[BUFFER_LEN]; +static int tracesig_bid = -1; + +static void save_process_info(struct task_struct *task, void *buf, int size) +{ + int ret; + + ret = snprintf(buf, size, "%s(%d)", task->comm, task->pid); + if (ret <= 0) + return; + + while (ret > 0 && task->parent && task->parent->pid > 1) { + size = size - ret; + if (size <= 1) + break; + task = task->parent; + buf += ret; + ret = snprintf(buf, size, "< %s(%d)", task->comm, task->pid); + } +} + +static void print_signal_info(struct task_struct *task, int sig) +{ + struct bbox_data_info data_info; + int ret, len; + + memset(process_info_buf, 0, BUFFER_LEN); + ret = snprintf(process_info_buf, BUFFER_LEN,"send sig %d to task %s[%d], generated by:", + sig, task->comm, task->pid); + if (ret <= 0 || ret >= (BUFFER_LEN - 1)) { + printk("ret %d\n", ret); + return; + } + + save_process_info(current, process_info_buf + ret, BUFFER_LEN - ret); + len = strlen(process_info_buf); + process_info_buf[len] = '\n'; + data_info.data = process_info_buf; + data_info.size = len + 1; + bbox_write(tracesig_bid, &data_info); +} + +#if KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE +static void signal_generate_trace(void *ignore, int sig, + struct siginfo *info, struct task_struct *task, + int type, int result) +#elif KERNEL_VERSION(3, 10, 0) <= LINUX_VERSION_CODE +static void signal_generate_trace(void *ignore, int sig, + struct siginfo *info, struct task_struct *task, + int group, int result) +#else +static void signal_generate_trace(int sig, + struct siginfo *info, struct task_struct *task, + int group) +#endif +{ + if (trace_info.sig && trace_info.sig != sig) + return; + + if (trace_info.pid && trace_info.pid != task->pid) + return; + + if (strlen(trace_info.comm) && strcmp(trace_info.comm, task->comm)) + return; + print_signal_info(task, sig); +} + +static bool trace_enabled; +static void trace_sig_enable(void) +{ + if (trace_enabled) + return; + + tracesig_bid = bbox_alloc("tracesig", BBOX_TYPE_RING); + if (tracesig_bid < 0) { + printk("bbox alloc failed,cannot enable\n"); + return; + } + + hook_tracepoint("signal_generate", signal_generate_trace, NULL); + trace_enabled = true; + sysak_module_get(&sig_ref); +} + +static void trace_sig_disable(void) +{ + if (!trace_enabled) + return; + + unhook_tracepoint("signal_generate", signal_generate_trace, NULL); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) + synchronize_rcu(); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) || LINUX_VERSION_CODE <= KERNEL_VERSION(4, 17, 0) + synchronize_sched(); +#endif + bbox_free(tracesig_bid); + trace_enabled = false; + sysak_module_put(&sig_ref); +} + +static ssize_t signal_trace_write(struct file *file, + const char __user *buf, size_t count, loff_t *offs) +{ + int ret; + char cmd[256]; + char chr[256]; + int pid, sig; + + if (count < 1 || *offs) + return -EINVAL; + + if (copy_from_user(chr, buf, 256)) + return -EFAULT; + + ret = sscanf(chr, "%255s", cmd); + if (ret <= 0) + return -EINVAL; + + if (strcmp(cmd, "comm") == 0) { + ret = sscanf(chr, "comm %s", cmd); + if (ret <= 0) + return -EINVAL; + strncpy(trace_info.comm, cmd, TASK_COMM_LEN); + trace_info.comm[TASK_COMM_LEN - 1] = '\0'; + } else if (strcmp(cmd, "pid") == 0) { + ret = sscanf(chr, "pid %d", &pid); + if (ret <= 0) + return -EINVAL; + trace_info.pid = pid; + } else if (strcmp(cmd, "sig") == 0) { + ret = sscanf(chr, "sig %d", &sig); + if (ret <= 0) + return -EINVAL; + trace_info.sig = sig; + } else if (strcmp(cmd, "enable") == 0) { + trace_sig_enable(); + } else if (strcmp(cmd, "disable") == 0) { + trace_sig_disable(); + } else { + return -EINVAL; + } + + return count; +} + +static int signal_trace_show(struct seq_file *m, void *v) +{ + seq_printf(m, "comm: %s\n", trace_info.comm); + seq_printf(m, "pid: %d\n", trace_info.pid); + seq_printf(m, "sig: %d\n", trace_info.sig); + if (trace_enabled) + bbox_ring_show(m, tracesig_bid); + return 0; +} + +static int signal_trace_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, signal_trace_show, NULL); +} + +static struct proc_dir_entry *signal_trace_proc; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) +static const struct proc_ops signal_trace_fops = { + .proc_open = signal_trace_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = signal_trace_write, + .proc_release = single_release, +}; +#else +const struct file_operations signal_trace_fops = { + .open = signal_trace_open, + .read = seq_read, + .llseek = seq_lseek, + .write = signal_trace_write, + .release = single_release, +}; +#endif + +int trace_sig_init(void) +{ + signal_trace_proc = sysak_proc_create("sig_trace", &signal_trace_fops); + + return 0; +} + +int trace_sig_exit(void) +{ + trace_sig_disable(); + return 0; +} + diff --git a/source/lib/internal/kernel_module/modules/task_ctl/task_ctrl.c b/source/lib/internal/kernel_module/modules/task_ctl/task_ctrl.c new file mode 100755 index 0000000000000000000000000000000000000000..7be376391a1865230c5d5830b5204f8c4070e8c9 --- /dev/null +++ b/source/lib/internal/kernel_module/modules/task_ctl/task_ctrl.c @@ -0,0 +1,169 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sysak_mods.h" +#include "hook.h" +#include "proc.h" + +enum TASK_CTL_TYPE{ + TASK_LOOP, + TASK_SLEEP, + MAX_CTL_TYPE +}; + +#define TASK_CTL_VALID(x) ((unsigned)(x) < MAX_CTL_TYPE) + +struct task_ctl_info { + int pid; + enum TASK_CTL_TYPE type; +}ctl_info; + +static int taskctl_ref; +static bool ctl_enabled; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 33) +static void syscall_enter_trace(struct pt_regs *regs, long id) +#else +static void syscall_enter_trace(void *__data, struct pt_regs *regs, long id) +#endif +{ + while(ctl_enabled && ctl_info.pid == current->pid) { + if (!TASK_CTL_VALID(ctl_info.type)) + break; + else if (ctl_info.type == TASK_SLEEP) + msleep_interruptible(100); + else + cond_resched(); + rmb(); + } +} + +static void task_ctl_enable(void) +{ + if (ctl_enabled) + return; + hook_tracepoint("sys_enter", syscall_enter_trace, NULL); + ctl_enabled = true; + sysak_module_get(&taskctl_ref); +} + +static void task_ctl_disable(void) +{ + if (!ctl_enabled) + return; + + unhook_tracepoint("sys_enter", syscall_enter_trace, NULL); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) + synchronize_rcu(); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0) || LINUX_VERSION_CODE <= KERNEL_VERSION(4, 17, 0) + synchronize_sched(); +#endif + ctl_enabled = false; + sysak_module_put(&taskctl_ref); +} + +static ssize_t task_ctl_write(struct file *file, + const char __user *buf, size_t count, loff_t *offs) +{ + int ret; + char cmd[256]; + char chr[256]; + int pid; + + if (count < 1 || *offs) + return -EINVAL; + + if (copy_from_user(chr, buf, 256)) + return -EFAULT; + + ret = sscanf(chr, "%255s", cmd); + if (ret <= 0) + return -EINVAL; + + if (strcmp(cmd, "pid") == 0) { + ret = sscanf(chr, "pid %d", &pid); + if (ret <= 0) + return -EINVAL; + ctl_info.pid = pid; + } else if (strcmp(cmd, "type") == 0) { + ret = sscanf(chr, "type %s", cmd); + if (ret <= 0) + return -EINVAL; + if (strcmp(cmd, "loop") == 0) + ctl_info.type = TASK_LOOP; + else if (strcmp(cmd, "sleep") == 0) + ctl_info.type = TASK_SLEEP; + else + ctl_info.type = MAX_CTL_TYPE; + } else if (strcmp(cmd, "enable") == 0) { + task_ctl_enable(); + } else if (strcmp(cmd, "disable") == 0) { + task_ctl_disable(); + } else { + return -EINVAL; + } + + return count; +} + +static int task_ctl_show(struct seq_file *m, void *v) +{ + seq_printf(m, "pid: %d\n", ctl_info.pid); + if (ctl_info.type == TASK_LOOP) + seq_printf(m, "type: loop"); + else if (ctl_info.type == TASK_SLEEP) + seq_printf(m, "type: sleep"); + else + seq_printf(m, "type: invalid"); + + return 0; +} + +static int task_ctl_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, task_ctl_show, NULL); +} + +static struct proc_dir_entry *task_ctl_proc; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) +static const struct proc_ops task_ctl_fops = { + .proc_open = task_ctl_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = task_ctl_write, + .proc_release = single_release, +}; +#else +const struct file_operations task_ctl_fops = { + .open = task_ctl_open, + .read = seq_read, + .llseek = seq_lseek, + .write = task_ctl_write, + .release = single_release, +}; +#endif + +int task_ctl_init(void) +{ + task_ctl_proc = sysak_proc_create("task_ctl", &task_ctl_fops); + + return 0; +} + +int task_ctl_exit(void) +{ + task_ctl_disable(); + return 0; +} + diff --git a/source/lib/internal/kernel_module/modules/test_module/test.c b/source/lib/internal/kernel_module/modules/test_module/test.c new file mode 100755 index 0000000000000000000000000000000000000000..5c04c24bb8bba776ff6d90f9f1571382643f49ab --- /dev/null +++ b/source/lib/internal/kernel_module/modules/test_module/test.c @@ -0,0 +1,26 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int test_init(void) +{ + printk("test_module enter.\n"); + return 0; +} + +int test_exit(void) +{ + printk("test_module exit.\n"); + return 0; +} + diff --git a/source/lib/internal/kernel_module/modules/ulockcheck/ulockcheck.c b/source/lib/internal/kernel_module/modules/ulockcheck/ulockcheck.c new file mode 100644 index 0000000000000000000000000000000000000000..29ff4cacc8aded572e9afd7af80887f810c50a3f --- /dev/null +++ b/source/lib/internal/kernel_module/modules/ulockcheck/ulockcheck.c @@ -0,0 +1,696 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "include/blackbox.h" +#include "proc.h" + +#ifdef CONFIG_X86 +#define MAX_SYMBOL_LEN 64 +#define PATH_LEN 256 +#define STACK_DEPTH 100 +#define STACK_DETAIL_DEPTH 20 +#define PROC_NUMBUF 256 +#define SHOW_BUF_LEN 64 + +#define WAIT_TIMEOUT HZ +#define LOCK_TIMEOUT HZ + +#define REGISTER_FAILED 1 + +LIST_HEAD(monitor_list); +LIST_HEAD(vma_list); +rwlock_t thdlist_lock; +extern struct mm_struct *get_task_mm(struct task_struct *task); + +static struct kprobe kp_wake = { + .symbol_name = "futex_wake", +}; + +static struct kretprobe krp_wait = { + .kp.symbol_name = "futex_wait", + .maxactive = 10000, +}; + +pid_t monitor_pid = 0; +pid_t lock_owner= 0; +bool enable_print_ustack = false; +bool enbale_ulockcheck = false; +unsigned long max_wait_time; +unsigned long max_lock_time; +int wait_delay_thresold = WAIT_TIMEOUT; +int lock_delay_thresold = LOCK_TIMEOUT; + +static int ulock_bid = -1; + +struct stack_info { + unsigned long bp; + char path[PATH_LEN]; +}; + +struct vma_info{ + struct list_head list; + unsigned long start; + unsigned long end; + int exectue; + char path[PATH_LEN]; +}; + +struct task_info{ + pid_t pid; + pid_t tgid; + struct list_head task_list; + char comm[TASK_COMM_LEN]; + + unsigned long fwait_count; + unsigned long fwait_delay; + + unsigned long fwake_count; + unsigned long fwake_time; + + unsigned long wait_time; + unsigned long outtime_count; + + unsigned long sch_total; + unsigned long total_delay; + + unsigned long uaddr; + + unsigned long lock_time; + unsigned long lock_delay; + bool lock; + unsigned long lock_count; + + //struct list_head vma_list; + struct stack_info stack[STACK_DETAIL_DEPTH]; +}; + +void save_mmapstack_trace_user(struct task_struct *task, struct task_info *tsk) +{ + struct list_head *vma_entry; + const struct pt_regs *regs = task_pt_regs(current); + const void __user *fp = (const void __user *)regs->sp; + int stack_len = 0 ; + int i; + + for (i = 0; i < STACK_DEPTH; i++){ + if (stack_len > STACK_DETAIL_DEPTH) + break; + list_for_each(vma_entry, &vma_list){ + //struct vma_info *vma = (struct vma_info *)vma_entry; + struct vma_info *vma = container_of(vma_entry, struct vma_info, list); + unsigned long tmp; + + if (!copy_from_user(&tmp, fp+i*__SIZEOF_LONG__, __SIZEOF_LONG__)) { + if ((tmp >= vma->start) && (tmp <= vma->end)) { + tsk->stack[stack_len].bp = tmp; + strcpy(tsk->stack[stack_len].path,vma->path); + stack_len++; + } + } + } + } +} + +static int save_calltrace(struct pt_regs *regs) +{ + struct list_head *tsk_entry; + struct task_info *new_tsk; + pid_t tgid = 0; + + list_for_each(tsk_entry, &monitor_list){ + struct task_info *tsk = container_of(tsk_entry, struct task_info, task_list); + tgid = tsk->tgid; + if (tsk->pid == current->pid){ + tsk->fwait_count++; + tsk->wait_time = jiffies; + tsk->uaddr = regs->di; + save_mmapstack_trace_user(current,tsk); + return 0; + } + } + if (tgid == current->tgid){ + new_tsk = kzalloc(sizeof(struct task_info),GFP_KERNEL); + if (!new_tsk) + return 0; + new_tsk->pid = current->pid; + new_tsk->tgid = tgid; + memcpy(new_tsk->comm,current->comm,sizeof(new_tsk->comm)); + new_tsk->fwait_count++; + new_tsk->wait_time = jiffies; + new_tsk->uaddr = regs->di; + + save_mmapstack_trace_user(current,new_tsk); + list_add_tail(&new_tsk->task_list,&monitor_list); + } + return 0; +} + +static void get_filename(char *buf, const struct path *path, size_t size) +{ + if (size) { + char *p = d_path(path, buf, size); + if (!IS_ERR(p)) { + strcpy(buf,p); + } + } +} + +/*static int before_futex_wait(struct kprobe *p, struct pt_regs *regs) +{ + int ret; + + if (!monitor_pid || (monitor_pid != current->pid && monitor_pid != current->tgid)) + return 0; + + write_lock(&thdlist_lock); + ret = save_calltrace(regs); + write_unlock(&thdlist_lock); + return 0; +} +*/ + +static int after_futex_wait(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + struct list_head *pos; + unsigned long wait_time; + int i, len; + char task_show_buf[SHOW_BUF_LEN]; + struct bbox_data_info data_info; + + if (!monitor_pid || (monitor_pid != current->pid && monitor_pid != current->tgid)) + return 0 ; + + data_info.data = task_show_buf; + + read_lock(&thdlist_lock); + list_for_each(pos, &monitor_list){ + struct task_info *tsk_info = container_of(pos, struct task_info, task_list); + if (tsk_info->pid == current->pid){ + tsk_info->fwait_delay += jiffies - tsk_info->wait_time; + wait_time = jiffies - tsk_info->wait_time; + max_wait_time = wait_time > max_wait_time ? wait_time : max_wait_time; + if (wait_time > wait_delay_thresold){ + tsk_info->outtime_count++; + if (enable_print_ustack){ + len = snprintf(task_show_buf, SHOW_BUF_LEN, "task %d[%s], wait delay %ld ticks,", + tsk_info->pid, tsk_info->comm, wait_time); + data_info.size = len; + bbox_write(ulock_bid, &data_info); + len = sprintf(task_show_buf,"user stack:\n"); + data_info.size = len; + bbox_write(ulock_bid, &data_info); + for (i = 0; i < STACK_DETAIL_DEPTH; i++){ + if (tsk_info->stack[i].bp == 0) { + continue; + } + len = sprintf(task_show_buf, "#~ 0x%lx %s\n", + tsk_info->stack[i].bp, tsk_info->stack[i].path); + data_info.size = len; + bbox_write(ulock_bid, &data_info); + } + } + } + tsk_info->lock_time = jiffies; + lock_owner = tsk_info->pid; + //tsk_info->lock = TRUE; + tsk_info->lock_count++; + break; + } + + } + read_unlock(&thdlist_lock); + return 0; +} + +static int before_futex_wake(struct kprobe *p, struct pt_regs *regs) +{ + + struct list_head *pos; + char task_show_buf[SHOW_BUF_LEN]; + struct bbox_data_info data_info; + int len, i; + + if (!monitor_pid || (monitor_pid != current->pid && monitor_pid != current->tgid)) + return 0; + data_info.data = task_show_buf; + read_lock(&thdlist_lock); + list_for_each(pos, &monitor_list){ + struct task_info *tsk_info = container_of(pos, struct task_info, task_list); + if (tsk_info->pid == current->pid){ + //pos->fw_cout++; + tsk_info->lock_delay = jiffies - tsk_info->lock_time; + max_lock_time = tsk_info->lock_delay > max_lock_time ? tsk_info->lock_delay : max_lock_time; + if (enable_print_ustack && tsk_info->lock && (tsk_info->lock_delay > lock_delay_thresold)){ + len = snprintf(task_show_buf, SHOW_BUF_LEN, "task %d[%s], lock over %ld ticks,", + current->pid,current->comm, tsk_info->lock_delay); + data_info.size = len; + bbox_write(ulock_bid, &data_info); + len = sprintf(task_show_buf,"user stack:\n"); + data_info.size = len; + bbox_write(ulock_bid, &data_info); + for (i = 0; i < STACK_DETAIL_DEPTH; i++){ + if (tsk_info->stack[i].bp == 0) { + continue; + } + len = sprintf(task_show_buf, "#~ 0x%lx %s\n", + tsk_info->stack[i].bp, tsk_info->stack[i].path); + data_info.size = len; + bbox_write(ulock_bid, &data_info); + } + } + + //tsk_info->lock = FALSE; + tsk_info->fwake_time = jiffies; + tsk_info->fwake_count++; + break; + } + + } + read_unlock(&thdlist_lock); + return 0; +} + +static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + int ret; + + if (!monitor_pid || (monitor_pid != current->pid && monitor_pid != current->tgid)) + return 0; + + write_lock(&thdlist_lock); + ret = save_calltrace(regs); + write_unlock(&thdlist_lock); + return 0; +} + + +/*static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr) +{ + pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr); + return 0; +} +*/ + + +static int futexpid_show(struct seq_file *m, void *v) +{ + struct list_head *pos; + + if (!monitor_pid) { + seq_printf(m, "futex monitor list is empty\n"); + return 0; + } + + seq_printf(m, "max_wait_time %ld ticks, max_lock_time %ld ticks\n", + max_wait_time, max_lock_time); + read_lock(&thdlist_lock); + list_for_each(pos, &monitor_list){ + struct task_info *tsk = container_of(pos, struct task_info, task_list); + if (lock_owner && (tsk->pid == lock_owner)) + seq_puts(m,"current owner:\n"); + seq_printf(m, "pid[%d],name[%s],futex wait count[%lu],total futex_delay[%lu],", + tsk->pid, tsk->comm, tsk->fwait_count, tsk->fwait_delay); + seq_printf(m, "futex lock count[%lu],lock delay[%lu],wait over thresold count[%lu]\n", + tsk->lock_count, tsk->lock_delay, tsk->outtime_count); + //seq_printf(m,"schdule delay[none], ratio :futex[none]/schdule[none]\n", + //tsk->fwait_delay, tsk->fwait_delay, tsk->fwait_delay); + + } + read_unlock(&thdlist_lock); + bbox_ring_show(m, ulock_bid); + return 0; +} + +static ssize_t futexpid_store(void *priv, const char __user *buf, size_t count) +{ + char buffer[PROC_NUMBUF]; + struct task_struct *tsk; + struct task_info *new_tsk; + struct mm_struct *mm; + struct file *vma_file; + struct vm_area_struct *vma; + struct vma_info *new_vma; + struct pid *pid; + pid_t pid_i; + int err = -1; + + if (!enbale_ulockcheck){ + pr_warn("ulockcheck disabled!"); + return count; + } + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + return -EFAULT;; + } + err = kstrtoint(strstrip(buffer), 0, &pid_i); + if (err) + return -EINVAL; + read_lock(&thdlist_lock); + + if (!list_empty(&monitor_list)){ + read_unlock(&thdlist_lock); + return count; + } + read_unlock(&thdlist_lock); + + rcu_read_lock(); + + pid= find_get_pid(pid_i); + tsk = pid_task(pid, PIDTYPE_PID); + if (!tsk || !(tsk->mm)){ + rcu_read_unlock(); + return -EINVAL; + } + + monitor_pid = pid_i; + + if (monitor_pid != 0 ){ + + new_tsk = kzalloc(sizeof(struct task_info),GFP_KERNEL); + if (!new_tsk) + goto failed_tsk; + new_tsk->pid = monitor_pid; + new_tsk->tgid = tsk->tgid; + memcpy(new_tsk->comm,tsk->comm,sizeof(tsk->comm)); + + mm = get_task_mm(tsk); + + if (IS_ERR_OR_NULL(mm)){ + rcu_read_unlock(); + goto failed; + } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) + if (!mmap_read_trylock(mm)){ +#else + if (!down_read_trylock(&mm->mmap_sem)){ +#endif + rcu_read_unlock(); + goto failed; + } + + for (vma = mm->mmap; vma; vma = vma->vm_next){ + if (vma->vm_file && vma->vm_flags & VM_EXEC){ + char buff[PATH_LEN]; + + new_vma = kzalloc(sizeof(struct vma_info),GFP_KERNEL); + if (!new_vma){ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) + mmap_read_unlock(mm); +#else + up_read(&mm->mmap_sem); +#endif + goto failed; + } + new_vma->start = vma->vm_start; + new_vma->end = vma->vm_end; + vma_file = vma->vm_file; + + if (vma_file){ + get_filename(buff, &vma_file->f_path, PATH_LEN); + strcpy(new_vma->path, buff); + list_add_tail(&new_vma->list,&vma_list); + } + } + } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0) + mmap_read_unlock(mm); +#else + up_read(&mm->mmap_sem); +#endif + write_lock(&thdlist_lock); + list_add_tail(&new_tsk->task_list, &monitor_list); + write_unlock(&thdlist_lock); + } + rcu_read_unlock(); + return count; + +failed: + kfree(new_tsk); +failed_tsk: + rcu_read_unlock(); + monitor_pid = 0; + return -ENOMEM; +} + +DEFINE_PROC_ATTRIBUTE_RW(futexpid); + +static int futexprint_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", (int)enable_print_ustack); + return 0; +} + +static ssize_t futexprint_store(void *priv, const char __user *buf, size_t count) +{ + char buffer[PROC_NUMBUF]; + int val; + int err = -1; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + return -EFAULT; + } + err = kstrtoint(strstrip(buffer), 0, &val); + + if (val == 1) + enable_print_ustack = true; + else if (val == 0) + enable_print_ustack = false; + return count; +} + +DEFINE_PROC_ATTRIBUTE_RW(futexprint); + +static int ulockcheck_enable(void) +{ + int ret_wake, ret_wait; + + kp_wake.pre_handler = before_futex_wake; + + krp_wait.handler = after_futex_wait; + krp_wait.entry_handler = entry_handler; + + ret_wake = register_kprobe(&kp_wake); + if (ret_wake < 0) { + pr_err("register_kprobe failed, returned %d\n", ret_wake); + return -REGISTER_FAILED; + } + + ret_wait = register_kretprobe(&krp_wait); + if (ret_wait < 0) { + pr_err("register_kretprobe failed, returned %d\n", ret_wait); + unregister_kprobe(&kp_wake); + return -REGISTER_FAILED; + } + pr_info("Planted return probe at %s: %p\n", + krp_wait.kp.symbol_name, krp_wait.kp.addr); + pr_info("Planted kprobe futex_wake at %p\n", kp_wake.addr); + + ulock_bid = bbox_alloc("ulockcheck", BBOX_TYPE_RING); + if (ulock_bid < 0) { + printk("bbox alloc failed,cannot enable\n"); + unregister_kprobe(&kp_wake); + unregister_kretprobe(&krp_wait); + return -ENOMEM; + } + + return 0; +} + +void ulockcheck_disable(void) +{ + unregister_kprobe(&kp_wake); + unregister_kretprobe(&krp_wait); + + pr_info("kprobe futex_wake at %p unregistered\n", kp_wake.addr); + pr_info("kretprobe futex_wait at %p unregistered\n", krp_wait.kp.addr); + /* nmissed > 0 suggests that maxactive was set too low. */ + pr_info("Missed probing %d instances of %s\n", + krp_wait.nmissed, krp_wait.kp.symbol_name); + + bbox_free(ulock_bid); +} + +static int futexenable_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", (int)enbale_ulockcheck); + return 0; +} + +static ssize_t futexenable_store(void *priv, const char __user *buf, size_t count) +{ + char buffer[PROC_NUMBUF]; + int val; + int err = -1; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + return -EFAULT; + } + err = kstrtoint(strstrip(buffer), 0, &val); + + if (val == 1){ + if (!ulockcheck_enable()) + enbale_ulockcheck = true; + }else if (val == 0){ + ulockcheck_disable(); + enbale_ulockcheck = false; + } + return count; +} + +DEFINE_PROC_ATTRIBUTE_RW(futexenable); + +static int wait_delaythresold_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", wait_delay_thresold); + return 0; +} + +static ssize_t wait_delaythresold_store(void *priv, const char __user *buf, size_t count) +{ + char buffer[PROC_NUMBUF]; + int val; + int err = -1; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + return -EFAULT; + } + err = kstrtoint(strstrip(buffer), 0, &val); + if (err) + return -EINVAL; + + wait_delay_thresold = val; + return count; +} + +DEFINE_PROC_ATTRIBUTE_RW(wait_delaythresold); + +static int lock_delaythresold_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", lock_delay_thresold); + return 0; +} + +static ssize_t lock_delaythresold_store(void *priv, const char __user *buf, size_t count) +{ + char buffer[PROC_NUMBUF]; + int val; + int err = -1; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + return -EFAULT; + } + err = kstrtoint(strstrip(buffer), 0, &val); + if (err) + return -EINVAL; + + lock_delay_thresold = val; + return count; +} + +DEFINE_PROC_ATTRIBUTE_RW(lock_delaythresold); + + +int ulockcheck_init(void) +{ + struct proc_dir_entry *parent_dir; + struct proc_dir_entry *entry_enable; + struct proc_dir_entry *entry_print; + struct proc_dir_entry *entry_pid; + + parent_dir = sysak_proc_mkdir("ulockcheck"); + if (!parent_dir) { + goto failed_root; + } + + entry_enable = proc_create("enable", 0644, parent_dir, &futexenable_fops); + if(!entry_enable) { + goto failed; + } + + entry_print = proc_create("enable_print_ustack", 0644, parent_dir, &futexprint_fops); + if(!entry_print) { + goto failed; + } + + entry_pid = proc_create("ulockcheck_pid", 0644, parent_dir, &futexpid_fops); + if(!entry_pid) { + goto failed; + } + + if(!proc_create("wait_delaythresold", 0644, parent_dir, &wait_delaythresold_fops)) + goto failed; + + if(!proc_create("lock_delaythresold", 0644, parent_dir, &lock_delaythresold_fops)) + goto failed; + + return 0; + +failed: + sysak_remove_proc_entry("ulockcheck"); +failed_root: + return -1; +} + +int ulockcheck_exit(void) +{ + struct list_head *tsk_entry; + struct list_head *vma_entry; + struct list_head *tsk_prev; + struct list_head *vma_prev; + + if (!monitor_pid) + return 0; + + if (enbale_ulockcheck) + ulockcheck_disable(); + + list_for_each(tsk_entry, &monitor_list){ + struct task_info *tsk = container_of(tsk_entry, struct task_info, task_list); + tsk_prev = tsk_entry->prev; + + list_del(tsk_entry); + kfree(tsk); + tsk_entry = tsk_prev; + } + + list_for_each(vma_entry, &vma_list){ + struct vma_info *vma = container_of(vma_entry, struct vma_info, list); + vma_prev = vma_entry->prev; + + list_del(vma_entry); + kfree(vma); + vma_entry = vma_prev; + } + return 0; +} +#endif diff --git a/source/lib/internal/kernel_module/sysak_mods.c b/source/lib/internal/kernel_module/sysak_mods.c new file mode 100644 index 0000000000000000000000000000000000000000..5804e9613434fce4fde351c60df7d518704ae404 --- /dev/null +++ b/source/lib/internal/kernel_module/sysak_mods.c @@ -0,0 +1,100 @@ +#include "sysak_mods.h" + +int __attribute__((weak)) trace_sig_init(void) +{ + return 0; +} + +int __attribute__((weak)) trace_sig_exit(void) +{ + return 0; +} + +int __attribute__((weak)) memleak_init(void) +{ + return 0; +} + +int __attribute__((weak)) memleak_uninit(void) +{ + return 0; +} + +int __attribute__((weak)) memhunter_init(void) +{ + return 0; +} + +int __attribute__((weak)) memhunter_uninit(void) +{ + return 0; +} + +int __attribute__((weak)) trace_irqoff_init(void) +{ + return 0; +} + +int __attribute__((weak)) trace_irqoff_exit(void) +{ + return 0; +} + +int __attribute__((weak)) task_ctl_init(void) +{ + return 0; +} + +int __attribute__((weak)) task_ctl_exit(void) +{ + return 0; +} + +int __attribute__((weak)) schedtrace_init(void) +{ + return 0; +} + +int __attribute__((weak)) schedtrace_exit(void) +{ + return 0; +} +int __attribute__((weak)) mmaptrace_init(void) +{ + return 0; +} +int __attribute__((weak)) mmaptrace_exit(void) +{ + return 0; +} +int __attribute__((weak)) disk_hang_init(void) +{ + return 0; +} +int __attribute__((weak)) disk_hang_exit(void) +{ + return 0; +} +int __attribute__((weak)) ulockcheck_init(void) +{ + return 0; +} + +int __attribute__((weak)) ulockcheck_exit(void) +{ + return 0; +} + +struct sysak_module sysak_modules[] = { + { "trace_sig", trace_sig_init, trace_sig_exit}, + { "memleak", memleak_init, memleak_uninit}, + { "trace_irqoff", trace_irqoff_init, trace_irqoff_exit}, + { "task_ctl", task_ctl_init, task_ctl_exit}, + { "schedtrace", schedtrace_init, schedtrace_exit}, + { "mmap_trace", mmaptrace_init, mmaptrace_exit}, + { "iosdiag", disk_hang_init, disk_hang_exit}, + {"ulockcheck", ulockcheck_init, ulockcheck_exit}, + {"memhunter", memhunter_init, memhunter_uninit}, +}; + +const int sysk_module_num = sizeof(sysak_modules) / sizeof(struct sysak_module); diff --git a/source/lib/internal/kernel_module/sysak_mods.h b/source/lib/internal/kernel_module/sysak_mods.h new file mode 100644 index 0000000000000000000000000000000000000000..bf82d82aff6f294a90cc335b3a941a6dfca10c2a --- /dev/null +++ b/source/lib/internal/kernel_module/sysak_mods.h @@ -0,0 +1,21 @@ +#ifndef SYSAK_MOD_H +#define SYSAK_MOD_H + + +typedef int(*sysak_module_func)(void); + +struct sysak_module { + char name[16]; + sysak_module_func init; + sysak_module_func exit; +}; + +extern struct sysak_module sysak_modules[]; +extern const int sysk_module_num; +extern void sysak_module_get(int *mod_ref); +extern void sysak_module_put(int *mod_ref); +extern int sysak_dev_init(void); +extern void sysak_dev_uninit(void); +extern int sysak_bbox_init(void); +extern void sysak_bbox_exit(void); +#endif diff --git a/source/sysak b/source/sysak new file mode 100755 index 0000000000000000000000000000000000000000..ae205066530d74b3d549ff91b370e945cc8b4219 Binary files /dev/null and b/source/sysak differ diff --git a/source/tools/detect/mem/memleak/main.c b/source/tools/detect/mem/memleak/main.c index 989f3d0507d9c4b2dbd70fa1b71b533637c0083f..c90938e5c3d5e8b810dead50452da5099f4e34a3 100644 --- a/source/tools/detect/mem/memleak/main.c +++ b/source/tools/detect/mem/memleak/main.c @@ -14,6 +14,10 @@ #include "memleak.h" #include "user_api.h" +#define SLABINFO_FILE "/proc/slabinfo" +#define SLABINFO_MAX_LINES 100 +#define SLABINFO_MAX_LINE_LENGTH 256 + extern int read_meminfo(struct meminfo *mem); extern int slab_main(struct memleak_settings *set, int fd); extern int vmalloc_main(int argc, char **argv); @@ -66,6 +70,76 @@ static int memleak_check_only(struct meminfo *mi) return 0; } +int validate_monitor_time ( char * optarg) { + int monitor_time = 300; + int rc = 1; + + if (optarg == NULL) { + printf("Arguments needed in \"-i\".\n"); + rc = 0; + } else { + if (strchr(optarg, '.') == NULL && + sscanf(optarg, "%d", &monitor_time) && + monitor_time > 0) { + } else { + printf("Only the integer bigger than 0 is valid.\n"); + rc = 0; + } + } + return rc; +} + +int check_sys_kmalloc_list ( char * results[], char * kmalloc_name ) { + int rc = 0; + int count = 0; + char slabinfo_line[SLABINFO_MAX_LINE_LENGTH]; + + FILE* file = fopen(SLABINFO_FILE, "r"); + if (file == NULL) { + printf("Fail to open \"/proc/slabinfo\".\n"); + return rc; + } + while (fgets(slabinfo_line, sizeof(slabinfo_line), file) != NULL) { + char * token = strtok(slabinfo_line, " "); + if (token != NULL && strncmp(token, "kmalloc", 7) == 0) { + if (strcmp(token, kmalloc_name) == 0) { + rc = 1; + break; + } + results[count++] = strdup(token); + } + } + fclose(file); + + if (!rc) { + printf("You've probably entered the wrong name of kmalloc.\n"); + printf("The list of system-supported kmallocs: \n"); + for (int i = 0; i < count; i++) { + printf("%s\n", results[i]); + free(results[i]); + } + } + + return rc; +} + +int validate_slab_name ( char * optarg ) { + int rc; + char * kmalloc_list[SLABINFO_MAX_LINES]; + char * kmalloc_name = ""; + + if (optarg == NULL) { + printf("Arguments needed in \"-n\".\n"); + } else { + kmalloc_name = optarg; + } + rc = check_sys_kmalloc_list(kmalloc_list, kmalloc_name); + + return rc; +} + + + int get_arg(struct memleak_settings *set, int argc, char * argv[]) { int ch; @@ -83,7 +157,10 @@ int get_arg(struct memleak_settings *set, int argc, char * argv[]) set->type = MEMLEAK_TYPE_VMALLOC; break; case 'i': - set->monitor_time = atoi(optarg); + if (validate_monitor_time(optarg)) + set->monitor_time = atoi(optarg); + else + error = 1; break; case 'r': set->rate = atoi(optarg); @@ -93,7 +170,10 @@ int get_arg(struct memleak_settings *set, int argc, char * argv[]) error = 1; break; case 'n': - strncpy(set->name, optarg, NAME_LEN - 1); + if (validate_slab_name(optarg)) + strncpy(set->name, optarg, NAME_LEN - 1); + else + error = 1; break; case 'h': show_usage(); diff --git a/sysak-module.zip b/sysak-module.zip new file mode 100644 index 0000000000000000000000000000000000000000..ddc5fa139de3ad3f7a9729b4d9920d3f64c09faa Binary files /dev/null and b/sysak-module.zip differ