diff --git a/sysmonitor-1.3.2/CMakeLists.txt b/sysmonitor-1.3.2/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..8da7b76afefc79cbac6e0d1179e29530e92634ba --- /dev/null +++ b/sysmonitor-1.3.2/CMakeLists.txt @@ -0,0 +1,27 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. +# Description: cmake file of sysmonitor +# Author: xuchunmei +# Create: 2018-12-15 + +cmake_minimum_required(VERSION 3.12.1) +project(sysmonitor) + +SET(CMAKE_VERBOSE_MAKEFILE OFF) + +IF(COVERAGE_ENABLE) + MESSAGE(STATUS "Enable coverage compile option") + SET(COVERAGE_OPTION "${COVERAGE_OPTION} -fprofile-arcs -ftest-coverage") +ENDIF(COVERAGE_ENABLE) + +IF(ASAN_ENABLE) + MESSAGE(STATUS "Enable asan compile option") + SET(ASAN_OPTIONS "${ASAN_OPTION} -fsanitize=address -fsanitize-recover=address") +ENDIF(ASAN_ENABLE) + +IF(CMAKE_BUILD_TYPE STREQUAL Debug) + SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${COVERAGE_OPTION} ${ASAN_OPTIONS}") + SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COVERAGE_OPTION} ${ASAN_OPTIONS}") +ENDIF() + +add_subdirectory(src) +add_subdirectory(test) diff --git a/sysmonitor-1.3.2/LICENSE b/sysmonitor-1.3.2/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..d159169d1050894d3ea3b98e1c965c4058208fe1 --- /dev/null +++ b/sysmonitor-1.3.2/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/sysmonitor-1.3.2/conf/clock_transition b/sysmonitor-1.3.2/conf/clock_transition new file mode 100644 index 0000000000000000000000000000000000000000..79f155014958abc78bccd24eabb8638c8874a152 --- /dev/null +++ b/sysmonitor-1.3.2/conf/clock_transition @@ -0,0 +1,4 @@ +MONITOR_SWITCH="on" +TYPE="periodic" +EXECSTART="/usr/bin/python3 /usr/libexec/sysmonitor/clocktransition.py" +PERIOD="600" diff --git a/sysmonitor-1.3.2/conf/collect_fd_list b/sysmonitor-1.3.2/conf/collect_fd_list new file mode 100644 index 0000000000000000000000000000000000000000..2b7a44466b6685cfd69aa23a8452a7e0165244af --- /dev/null +++ b/sysmonitor-1.3.2/conf/collect_fd_list @@ -0,0 +1,9 @@ +#占句柄数最多的进程个数 +ALL_FD_NUM_MONITOR_TOP_N="15" + +#占socket句柄数最多的进程个数 +SOCKET_FD_NUM_MONITOR_TOP_N="15" + +#收集的进程执行路径及参数,与/proc/pid/cmdline中一致,不支持模糊匹配 +CMD_LINE="/usr/bin/sysmonitor" +CMD_LINE="/usr/sbin/rsyslogd -n" diff --git a/sysmonitor-1.3.2/conf/cpu b/sysmonitor-1.3.2/conf/cpu new file mode 100644 index 0000000000000000000000000000000000000000..3edc577bd8e4235fd6ffe900e78901724af9883d --- /dev/null +++ b/sysmonitor-1.3.2/conf/cpu @@ -0,0 +1,20 @@ +# cpu usage alarm percent +ALARM="90" + +# cpu usage alarm resume percent +RESUME="80" + +# monitor period (second) +MONITOR_PERIOD="60" + +# stat period (second) +STAT_PERIOD="300" + +# cpu range in a domain, format can be "X,Y" or "X-Y" or mixed. Also can +# be followed by ALARM/RESUME to set usage alarm/resume percent in this domain. +# DOMAIN="0,1" +# DOMAIN="2-3" ALARM="50" RESUME="40" +DOMAIN="" + +# command executed when cpu usage exceeds alarm percent +REPORT_COMMAND="" diff --git a/sysmonitor-1.3.2/conf/disk b/sysmonitor-1.3.2/conf/disk new file mode 100644 index 0000000000000000000000000000000000000000..899df05a5e5bf031bdcc281282a5250c1a30d927 --- /dev/null +++ b/sysmonitor-1.3.2/conf/disk @@ -0,0 +1,5 @@ +# DISK: disk mount point; ALARM: alarm percent; RESUME: alarm resume percent +# DISK="/var" ALARM="80" RESUME="70" +# DISK="/root" ALARM="90" RESUME="75" +DISK="/" +DISK="/var/log" diff --git a/sysmonitor-1.3.2/conf/file b/sysmonitor-1.3.2/conf/file new file mode 100644 index 0000000000000000000000000000000000000000..e7baa0518ad938e5989df53fbc65be3bf2c63e9b --- /dev/null +++ b/sysmonitor-1.3.2/conf/file @@ -0,0 +1,2 @@ +# for now, 0x200 is del, 0x100 is add, and 0x300 is both del and add +/usr/sbin/logrotate 0x200 diff --git a/sysmonitor-1.3.2/conf/inode b/sysmonitor-1.3.2/conf/inode new file mode 100644 index 0000000000000000000000000000000000000000..899df05a5e5bf031bdcc281282a5250c1a30d927 --- /dev/null +++ b/sysmonitor-1.3.2/conf/inode @@ -0,0 +1,5 @@ +# DISK: disk mount point; ALARM: alarm percent; RESUME: alarm resume percent +# DISK="/var" ALARM="80" RESUME="70" +# DISK="/root" ALARM="90" RESUME="75" +DISK="/" +DISK="/var/log" diff --git a/sysmonitor-1.3.2/conf/io_monitor b/sysmonitor-1.3.2/conf/io_monitor new file mode 100644 index 0000000000000000000000000000000000000000..b727399c9319c87cb0ec78979ac904e0b6af45ba --- /dev/null +++ b/sysmonitor-1.3.2/conf/io_monitor @@ -0,0 +1,4 @@ +MONITOR_SWITCH="on" +TYPE="periodic" +EXECSTART="/usr/sbin/iomonitor_daemon" +PERIOD="1800" diff --git a/sysmonitor-1.3.2/conf/iodelay b/sysmonitor-1.3.2/conf/iodelay new file mode 100644 index 0000000000000000000000000000000000000000..e37404bd1b27154df150f466a1bec68ced1366da --- /dev/null +++ b/sysmonitor-1.3.2/conf/iodelay @@ -0,0 +1,2 @@ +# local disk io delay (milliseconds) +DELAY_VALUE="500" diff --git a/sysmonitor-1.3.2/conf/logind_monitor b/sysmonitor-1.3.2/conf/logind_monitor new file mode 100644 index 0000000000000000000000000000000000000000..40d7b3611208fd1ce03790c30961652f3a97a86b --- /dev/null +++ b/sysmonitor-1.3.2/conf/logind_monitor @@ -0,0 +1,4 @@ +MONITOR_SWITCH="on" +TYPE="periodic" +EXECSTART="/usr/libexec/sysmonitor/logind_clear.sh" +PERIOD="28800" diff --git a/sysmonitor-1.3.2/conf/logrotate.d/sysmonitor-logrotate b/sysmonitor-1.3.2/conf/logrotate.d/sysmonitor-logrotate new file mode 100644 index 0000000000000000000000000000000000000000..7eddc9cf68f0a1b0bdd8e17a018704e0554f3fbf --- /dev/null +++ b/sysmonitor-1.3.2/conf/logrotate.d/sysmonitor-logrotate @@ -0,0 +1,15 @@ +/var/log/sysmonitor.log +{ + create + compress + rotate 30 + missingok + notifempty + size +2048k + createolddir 700 root root + olddir /var/log/logdump/sysmonitor + sharedscripts + postrotate + /bin/kill -HUP `cat /var/run/rsyslogd.pid 2> /dev/null` 2> /dev/null || true + endscript +} diff --git a/sysmonitor-1.3.2/conf/memory b/sysmonitor-1.3.2/conf/memory new file mode 100644 index 0000000000000000000000000000000000000000..8900afb46ef52535f5f4f331bb672ea616991c7c --- /dev/null +++ b/sysmonitor-1.3.2/conf/memory @@ -0,0 +1,8 @@ +# memory usage alarm percent +ALARM="90" + +# memory usage alarm resume percent +RESUME="80" + +# monitor period(second) +PERIOD="60" diff --git a/sysmonitor-1.3.2/conf/network b/sysmonitor-1.3.2/conf/network new file mode 100644 index 0000000000000000000000000000000000000000..b9abcdddf39479963eea4b56649046f0e32b9d77 --- /dev/null +++ b/sysmonitor-1.3.2/conf/network @@ -0,0 +1 @@ +# network event monitor, format: eth0 UP/DOWN/NEWADDR/DELADDR diff --git a/sysmonitor-1.3.2/conf/process/acpid b/sysmonitor-1.3.2/conf/process/acpid new file mode 100644 index 0000000000000000000000000000000000000000..f38a2769998b794fecfaa443f61bba0961925824 --- /dev/null +++ b/sysmonitor-1.3.2/conf/process/acpid @@ -0,0 +1,5 @@ +USER=root +NAME=acpid +RECOVER_COMMAND=systemctl restart acpid +MONITOR_COMMAND=systemctl status acpid +STOP_COMMAND=systemctl stop acpid diff --git a/sysmonitor-1.3.2/conf/process/cron b/sysmonitor-1.3.2/conf/process/cron new file mode 100644 index 0000000000000000000000000000000000000000..c0ee91825f3c35a917acefed94748567cf9da2d7 --- /dev/null +++ b/sysmonitor-1.3.2/conf/process/cron @@ -0,0 +1,6 @@ +#MONITOR_COMMAND return value = 0, The status of crond is normal +#MONITOR_COMMAND return value = 1, The status of crond is abnormal +#MONITOR_COMMAND return value = 2, The number of crond process is abnormal +USER=root +NAME=crond +MONITOR_COMMAND=/usr/libexec/sysmonitor/check_cron.sh diff --git a/sysmonitor-1.3.2/conf/process/dbus b/sysmonitor-1.3.2/conf/process/dbus new file mode 100644 index 0000000000000000000000000000000000000000..b4ede5e2cf2ba649774c7c75b36e20deef8cb763 --- /dev/null +++ b/sysmonitor-1.3.2/conf/process/dbus @@ -0,0 +1,5 @@ +USER=root +NAME=dbus-daemon +MONITOR_MODE=parallel +MONITOR_PERIOD=30 +MONITOR_COMMAND=/usr/libexec/sysmonitor/check_dbus.sh diff --git a/sysmonitor-1.3.2/conf/process/irqbalance b/sysmonitor-1.3.2/conf/process/irqbalance new file mode 100644 index 0000000000000000000000000000000000000000..431b17f004588dec1fa4756f4cb1e316e2a982c6 --- /dev/null +++ b/sysmonitor-1.3.2/conf/process/irqbalance @@ -0,0 +1,5 @@ +USER=root +NAME=irqbalance +RECOVER_COMMAND=systemctl restart irqbalance +MONITOR_COMMAND=systemctl status irqbalance +STOP_COMMAND=systemctl stop irqbalance \ No newline at end of file diff --git a/sysmonitor-1.3.2/conf/process/libvirtd-monitor b/sysmonitor-1.3.2/conf/process/libvirtd-monitor new file mode 100644 index 0000000000000000000000000000000000000000..1032cd1cad297a38d43a4ae27a0e3cc0bff0501f --- /dev/null +++ b/sysmonitor-1.3.2/conf/process/libvirtd-monitor @@ -0,0 +1,5 @@ +USER=root +NAME=libvirtd +RECOVER_COMMAND=systemctl restart libvirtd +MONITOR_COMMAND=systemctl status libvirtd +STOP_COMMAND=systemctl stop libvirtd diff --git a/sysmonitor-1.3.2/conf/process/sshd b/sysmonitor-1.3.2/conf/process/sshd new file mode 100644 index 0000000000000000000000000000000000000000..19983b12937de9913e3caebc216dc4d838a01f78 --- /dev/null +++ b/sysmonitor-1.3.2/conf/process/sshd @@ -0,0 +1,5 @@ +USER=root +NAME=sshd +RECOVER_COMMAND=systemctl restart sshd +MONITOR_COMMAND=/usr/libexec/sysmonitor/check_sshd.sh +STOP_COMMAND=systemctl stop sshd diff --git a/sysmonitor-1.3.2/conf/process/syslog b/sysmonitor-1.3.2/conf/process/syslog new file mode 100644 index 0000000000000000000000000000000000000000..ebd7add3a20b6b26effdc663d980e9e06b350652 --- /dev/null +++ b/sysmonitor-1.3.2/conf/process/syslog @@ -0,0 +1,5 @@ +USER=root +NAME=rsyslogd +RECOVER_COMMAND=systemctl restart rsyslog +MONITOR_COMMAND=/usr/libexec/sysmonitor/check_syslog.sh +STOP_COMMAND=systemctl stop rsyslog diff --git a/sysmonitor-1.3.2/conf/process_fd_conf b/sysmonitor-1.3.2/conf/process_fd_conf new file mode 100644 index 0000000000000000000000000000000000000000..4846039a5796c5422975887eb0807fdbada89b83 --- /dev/null +++ b/sysmonitor-1.3.2/conf/process_fd_conf @@ -0,0 +1,2 @@ +#fd usgae for one process(percent) +PR_FD_ALARM="80" diff --git a/sysmonitor-1.3.2/conf/pscnt b/sysmonitor-1.3.2/conf/pscnt new file mode 100644 index 0000000000000000000000000000000000000000..c9ce030b17e85a8b0531c6e36e519179d597b341 --- /dev/null +++ b/sysmonitor-1.3.2/conf/pscnt @@ -0,0 +1,18 @@ +# number of processes(include threads) when alarm occur +ALARM="1600" + +# number of processes(include threads) when alarm resume +RESUME="1500" + +# monitor period(second) +PERIOD="60" + +# process count usage alarm percent +ALARM_RATIO="90" + +# process count usage resume percent +RESUME_RATIO="80" + +# print top process info with largest num of threads when threads alarm +# (range: 0-1024, default: 10, monitor for thread off:0) +SHOW_TOP_PROC_NUM="10" diff --git a/sysmonitor-1.3.2/conf/rsyslog.d/sysmonitor.conf b/sysmonitor-1.3.2/conf/rsyslog.d/sysmonitor.conf new file mode 100644 index 0000000000000000000000000000000000000000..676c4c7a61460a620a687155bb065a5911ecc0e6 --- /dev/null +++ b/sysmonitor-1.3.2/conf/rsyslog.d/sysmonitor.conf @@ -0,0 +1,17 @@ +$template sysmonitorformat,"%TIMESTAMP:::date-rfc3339%|%syslogseverity-text%|%msg%\n" + +$outchannel sysmonitor, /var/log/sysmonitor.log, 2097152, /usr/libexec/sysmonitor/sysmonitor_log_dump.sh +if ($programname == 'sysmonitor' and $syslogseverity <= 6) then { +:omfile:$sysmonitor;sysmonitorformat +stop +} + +if ($msg contains 'Time has been changed') then { +:omfile:$sysmonitor;sysmonitorformat +stop +} + +if ($programname == 'sysmonitor' and $syslogseverity > 6) then { +/dev/null +stop +} diff --git a/sysmonitor-1.3.2/conf/signal b/sysmonitor-1.3.2/conf/signal new file mode 100644 index 0000000000000000000000000000000000000000..7c00bb4c8774f89c1e86b7dd479bd7edf24b52cd --- /dev/null +++ b/sysmonitor-1.3.2/conf/signal @@ -0,0 +1,3 @@ +SIGKILL="on" + +SIGTERM="on" diff --git a/sysmonitor-1.3.2/conf/sys_fd_conf b/sysmonitor-1.3.2/conf/sys_fd_conf new file mode 100644 index 0000000000000000000000000000000000000000..d9274e4552004a299dad2f4a1ce6b57a51e2a5d5 --- /dev/null +++ b/sysmonitor-1.3.2/conf/sys_fd_conf @@ -0,0 +1,6 @@ +# system fd usage alarm percent +SYS_FD_ALARM="80" +# system fd usage alarm resume percent +SYS_FD_RESUME="70" +# monitor period (second) +SYS_FD_PERIOD="600" diff --git a/sysmonitor-1.3.2/conf/sysmonitor b/sysmonitor-1.3.2/conf/sysmonitor new file mode 100644 index 0000000000000000000000000000000000000000..5cf5124b9c8a1b9524570a759e672657feafa7fc --- /dev/null +++ b/sysmonitor-1.3.2/conf/sysmonitor @@ -0,0 +1,81 @@ +# process monitor +PROCESS_MONITOR="on" + +# process monitor period(second) +PROCESS_MONITOR_PERIOD="3" + +# process recover failed, recall recover period(minute) +PROCESS_RECALL_PERIOD="1" + +# process recover timeout (second) +PROCESS_RESTART_TIMEOUT="90" + +#alarm restrict num +PROCESS_ALARM_SUPRESS_NUM="5" + +# file system monitor +FILESYSTEM_MONITOR="on" + +# signal monitor +SIGNAL_MONITOR="on" + +# disk monitor +DISK_MONITOR="on" + +# disk monitor period(second) +DISK_MONITOR_PERIOD="60" + +# disk inode monitor +INODE_MONITOR="on" + +# disk inode monitor period(second) +INODE_MONITOR_PERIOD="60" + +# netcard monitor +NETCARD_MONITOR="on" + +# file monitor +FILE_MONITOR="on" + +# cpu monitor +CPU_MONITOR="on" + +# memory usage monitor +MEM_MONITOR="on" + +# process(thread) count monitor +PSCNT_MONITOR="on" + +# systemd fd usage monitor +FDCNT_MONITOR="on" + +# process fd usage monitor +PROCESS_FD_NUM_MONITOR="on" + +# custom daemon monitor +CUSTOM_DAEMON_MONITOR="on" + +# custom periodic monitor +CUSTOM_PERIODIC_MONITOR="on" + +# local disk io delay monitor +IO_DELAY_MONITOR="off" + +# zombie process count monitor +ZOMBIE_MONITOR="off" + +# Sysmonitor wait all service finish when 'PROCESS_MONITOR_DELAY' set 'on' or 'PROCESS_MONITOR_DELAY' item is null. +# Not wait all service finish when 'PROCESS_MONITOR_DELAY' set 'off'. +PROCESS_MONITOR_DELAY="on" + +# network fib info print rate limit, default is 5 in on second, range: 0-100 +NET_RATE_LIMIT_BURST="5" + +# fd monitor log path +FD_MONITOR_LOG_PATH="/var/log/fd_monitor.log" + +# check thread status monitor +CHECK_THREAD_MONITOR="on" + +# check thread failure num, range: 2-10 +CHECK_THREAD_FAILURE_NUM="3" diff --git a/sysmonitor-1.3.2/conf/w_log_conf b/sysmonitor-1.3.2/conf/w_log_conf new file mode 100644 index 0000000000000000000000000000000000000000..aeb423e67be5fc2137cb4ff63aba6b7b0ecb6958 --- /dev/null +++ b/sysmonitor-1.3.2/conf/w_log_conf @@ -0,0 +1,3 @@ +#log path config for normal mode +WRITE_LOG_PATH="/var/log/sysmonitor.log" +UTC_TIME="on" diff --git a/sysmonitor-1.3.2/conf/zombie b/sysmonitor-1.3.2/conf/zombie new file mode 100644 index 0000000000000000000000000000000000000000..ce43a7040804aa5845417c3efc93aa199aa75864 --- /dev/null +++ b/sysmonitor-1.3.2/conf/zombie @@ -0,0 +1,8 @@ +# Ceiling zombie process counts of alarm +ALARM="500" + +# Floor zombie process counts of resume +RESUME="400" + +# Periodic (second) +PERIOD="600" diff --git a/sysmonitor-1.3.2/module/Makefile b/sysmonitor-1.3.2/module/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..8030152a0b6ca5a047252bf42c4380d921a5ca10 --- /dev/null +++ b/sysmonitor-1.3.2/module/Makefile @@ -0,0 +1,21 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. +# Description: make file of sysmonitor +# Author: xuchunmei +# Create: 2018-12-15 + +obj-m += sysmonitor.o +sysmonitor-objs := sysmonitor_main.o signo_catch.o fdstat.o monitor_netdev.o +KERNELDIR ?= /lib/modules/$(shell uname -r)/build +PWD := $(shell pwd) +EXTRA_CFLAGS += -Wall -Werror + +modules: + $(MAKE) -C $(KERNELDIR) M=$(PWD) modules + +modules_install: + $(MAKE) -C $(KERNELDIR) M=$(PWD) modules_install + +clean: + $(MAKE) -C $(KERNELDIR) SUBDIRS=$(PWD) clean + +.PHONY: modules modules_install clean diff --git a/sysmonitor-1.3.2/module/fdstat.c b/sysmonitor-1.3.2/module/fdstat.c new file mode 100644 index 0000000000000000000000000000000000000000..5b1aa9a7f08b56a9c4b198cded2942359fbb8517 --- /dev/null +++ b/sysmonitor-1.3.2/module/fdstat.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * Description: file handle statistic + * Author: xuchunmei + * Create: 2016-1-1 + */ +#include "fdstat.h" + +#include +#include +#include +#include +#include + +#include "sysmonitor_main.h" + +#ifdef CONFIG_EULEROS_SYSMONITOR_FD +static int do_fdstat(struct notifier_block *self, unsigned long val, void *data) +{ + struct fdstat *notifier_call_data = (struct fdstat *)data; + struct fdstat msg; + int ret; + + (void)memset(&msg, 0, sizeof(struct fdstat)); + msg.pid = notifier_call_data->pid; + msg.total_fd_num = notifier_call_data->total_fd_num + 1; + (void)memcpy(msg.comm, notifier_call_data->comm, TASK_COMM_LEN); + (void)save_msg(FDSTAT, &msg, sizeof(struct fdstat)); + return NOTIFY_DONE; +} + +static struct notifier_block g_fdstat_nb = { + .notifier_call = do_fdstat, + .priority = NOTIFY_CALL_PRIORITY, +}; +#endif + +void fdstat_init(void) +{ +#ifdef CONFIG_EULEROS_SYSMONITOR_FD + (void)register_fdstat_notifier(&g_fdstat_nb); +#endif +} + +void fdstat_exit(void) +{ +#ifdef CONFIG_EULEROS_SYSMONITOR_FD + (void)unregister_fdstat_notifier(&g_fdstat_nb); +#endif +} diff --git a/sysmonitor-1.3.2/module/fdstat.h b/sysmonitor-1.3.2/module/fdstat.h new file mode 100644 index 0000000000000000000000000000000000000000..ae9ccc39062f99eae13b5c8a5892fd9f949d68e0 --- /dev/null +++ b/sysmonitor-1.3.2/module/fdstat.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * Description: file handle statistic + * Author: xuchunmei + * Create: 2016-1-1 + */ +#ifndef SYSMONITOR_FDSTAT_H +#define SYSMONITOR_FDSTAT_H + +#include +#include + +#ifndef CONFIG_EULEROS_SYSMONITOR_FD +struct fdstat { + pid_t pid; + unsigned int total_fd_num; + char comm[TASK_COMM_LEN]; +}; +#endif + +void fdstat_init(void); +void fdstat_exit(void); +#endif diff --git a/sysmonitor-1.3.2/module/monitor_netdev.c b/sysmonitor-1.3.2/module/monitor_netdev.c new file mode 100644 index 0000000000000000000000000000000000000000..50d7a6c5a0b014f7feed51382cce068727d308ad --- /dev/null +++ b/sysmonitor-1.3.2/module/monitor_netdev.c @@ -0,0 +1,362 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * Description: network device event monitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#include "monitor_netdev.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include "sysmonitor_main.h" + +#define LOOKUP_REG_NETDEV_FUNC_BIT 0x00000001 +#define REG_NETDEV_NOTIFIER_BIT 0x00000002 +#define REG_INETADDR_NOTIFIER_BIT 0x00000004 +#define REG_INET6ADDR_NOTIFIER_BIT 0x00000008 +#define REG_FIB_TBL_NOTIFIER_BIT 0x00000010 +#define ALL_FUNC_AVAILABLE_MASK \ + (REG_NETDEV_NOTIFIER_BIT | \ + REG_INETADDR_NOTIFIER_BIT | \ + REG_INET6ADDR_NOTIFIER_BIT | \ + REG_FIB_TBL_NOTIFIER_BIT) +static unsigned int g_func_available_bit_mask; +static int g_is_exiting; + +/* Not more than 5 messages every 1s */ +static DEFINE_RATELIMIT_STATE(monitor_netdev_ratelimit, (1 * HZ), (5)); + +static int save_msg_process_info(struct netmonitor_info *msg) +{ + msg->pid = current->pid; + msg->parent_pid = current->real_parent->pid; + (void)memcpy(msg->comm, current->comm, TASK_COMM_LEN); + (void)memcpy(msg->parent_comm, current->real_parent->comm, TASK_COMM_LEN); + + return 0; +} + +static void print_netdev_status(const char *name, unsigned long event) +{ + struct netmonitor_info msg; + int ret; + + if (unlikely(g_is_exiting != 0)) { + return; + } + + (void)memset(&msg, 0, sizeof(struct netmonitor_info)); + + if (event == NETDEV_PRE_UP) { + msg.event = UP; + } else if (event == NETDEV_GOING_DOWN) { + msg.event = DOWN; + } else { + return; + } + + ret = save_msg_process_info(&msg); + if (ret != 0) { + return; + } + + (void)memcpy(msg.dev, name, IFNAMSIZ); + + (void)save_msg(NETWORK, &msg, sizeof(struct netmonitor_info)); +} + +static void print_address_status(const struct in_ifaddr *in_dev, unsigned long event) +{ + struct netmonitor_info msg; + int ret; + + (void)memset(&msg, 0, sizeof(struct netmonitor_info)); + + if (event == NETDEV_UP) { + msg.event = NEWADDR; + } else if (event == NETDEV_DOWN) { + msg.event = DELADDR; + } else { + return; + } + + ret = save_msg_process_info(&msg); + if (ret != 0) { + return; + } + + msg.addr.in.s_addr = in_dev->ifa_address; + msg.plen = in_dev->ifa_prefixlen; + (void)memcpy(msg.dev, in_dev->ifa_label, IFNAMSIZ); + (void)save_msg(NETWORK, &msg, sizeof(struct netmonitor_info)); +} + +static int monitor_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) +{ + struct netdev_notifier_info *info = (struct netdev_notifier_info *)ptr; + + if (info != NULL && info->dev != NULL) { + print_netdev_status(info->dev->name, event); + } + + return NOTIFY_DONE; +} + +static void print_address6_status(const struct inet6_ifaddr *if6, unsigned long event) +{ + struct netmonitor_info msg; + struct net_device *dev = (struct net_device *)if6->idev->dev; + int ret; + + (void)memset(&msg, 0, sizeof(struct netmonitor_info)); + + if (event == NETDEV_UP) { + msg.event = NEWADDR6; + } else if (event == NETDEV_DOWN) { + msg.event = DELADDR6; + } else { + return; + } + + ret = save_msg_process_info(&msg); + if (ret != 0) { + return; + } + + msg.addr.in6 = if6->addr; + msg.plen = (int)if6->prefix_len; + (void)memcpy(msg.dev, dev->name, IFNAMSIZ); + (void)save_msg(NETWORK, &msg, sizeof(struct netmonitor_info)); +} + +static int monitor_address_notifier(struct notifier_block *this, unsigned long event, void *ifa) +{ + struct in_ifaddr *in_dev = (struct in_ifaddr *)ifa; + + if (in_dev != NULL) { + print_address_status(in_dev, event); + } + + return NOTIFY_DONE; +} + +static int monitor_address6_notifier(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr; + if (if6 != NULL) + print_address6_status(if6, event); + + return NOTIFY_DONE; +} + +static struct notifier_block g_test_inet_notifier = { + .notifier_call = monitor_address_notifier, +}; + +static struct notifier_block g_test_inet6_notifier = { + .notifier_call = monitor_address6_notifier, +}; + +static struct notifier_block g_test_dev_notifier = { + .notifier_call = monitor_netdevice_event, +}; + +static void print_fib4_table_status(const struct fib_entry_notifier_info *fib_entry_info, unsigned long event) +{ + struct netmonitor_info msg; + int ret; + + if (fib_entry_info == NULL) { + printk("[monitor_netdev]print_fib4_table_status: fib4_entry_info is null\n"); + return; + } + + if (monitor_netdev_ratelimit.burst != get_netratelimit_burst()) + monitor_netdev_ratelimit.burst = get_netratelimit_burst(); + + if (!__ratelimit(&monitor_netdev_ratelimit)) + return; + + (void)memset(&msg, 0, sizeof(struct netmonitor_info)); + if (event == FIB_EVENT_ENTRY_DEL) { + msg.event = FIB_DEL; + } else if (event == FIB_EVENT_ENTRY_ADD) { + msg.event = FIB_ADD; + } else if (event == FIB_EVENT_ENTRY_REPLACE) { + msg.event = FIB_REPLACE; + } else if (event == FIB_EVENT_ENTRY_APPEND) { + msg.event = FIB_APPEND; + } else { + return; + } + + ret = save_msg_process_info(&msg); + if (ret != 0) { + return; + } + + msg.tb_id = (int)fib_entry_info->tb_id; + msg.plen = fib_entry_info->dst_len; + msg.addr.in.s_addr = htonl(fib_entry_info->dst); + (void)save_msg(NETWORK, &msg, sizeof(struct netmonitor_info)); +} + +static void print_fib6_table_status(const struct fib6_entry_notifier_info *fib6_entry_info, unsigned long event) +{ + struct netmonitor_info msg; + int ret; + + if (fib6_entry_info == NULL) { + printk("[monitor_netdev]print_fib6_table_status: fib6_entry_info is null\n"); + return; + } + if (fib6_entry_info->rt == NULL) { + printk("[monitor_netdev]print_fib6_table_status: fib6_entry_info->rt is null\n"); + return; + } + + if (monitor_netdev_ratelimit.burst != get_netratelimit_burst()) + monitor_netdev_ratelimit.burst = get_netratelimit_burst(); + + if (!__ratelimit(&monitor_netdev_ratelimit)) + return; + + (void)memset(&msg, 0, sizeof(struct netmonitor_info)); + + if (event == FIB_EVENT_ENTRY_DEL) { + msg.event = FIB6_DEL; + } else if (event == FIB_EVENT_ENTRY_ADD) { + msg.event = FIB6_ADD; + } else if (event == FIB_EVENT_ENTRY_REPLACE) { + msg.event = FIB6_REPLACE; + } else if (event == FIB_EVENT_ENTRY_APPEND) { + msg.event = FIB6_APPEND; + } else { + return; + } + + ret = save_msg_process_info(&msg); + if (ret != 0) { + return; + } + + msg.plen = fib6_entry_info->rt->fib6_dst.plen; + + msg.addr.in6 = fib6_entry_info->rt->fib6_dst.addr; + (void)save_msg(NETWORK, &msg, sizeof(struct netmonitor_info)); +} + +static void print_fib_table_status(const struct fib_notifier_info *fib_info, unsigned long event) +{ + if (fib_info == NULL) { + printk("[monitor_netdev]print_fib_table_status: fib_info is null\n"); + return; + } + + if (fib_info->family == AF_INET) { + struct fib_entry_notifier_info *fib_entry_info = + container_of(fib_info, struct fib_entry_notifier_info, info); + + print_fib4_table_status(fib_entry_info, event); + } else if (fib_info->family == AF_INET6) { + struct fib6_entry_notifier_info *fib6_entry_info = + container_of(fib_info, struct fib6_entry_notifier_info, info); + + print_fib6_table_status(fib6_entry_info, event); + } +} + +static int monitor_fib_table_event(struct notifier_block *unused, unsigned long event, void *ptr) +{ + struct fib_notifier_info *fib_info = (struct fib_notifier_info *)ptr; + + if (!(event == FIB_EVENT_ENTRY_REPLACE || event == FIB_EVENT_ENTRY_APPEND || + event == FIB_EVENT_ENTRY_ADD || event == FIB_EVENT_ENTRY_DEL)) + return NOTIFY_DONE; + + if (fib_info != NULL) + print_fib_table_status(fib_info, event); + + return NOTIFY_DONE; +} + +static struct notifier_block g_fib_table_notifier = { + .notifier_call = monitor_fib_table_event, +}; + +void monitor_netdev_init(void) +{ + int err; + + g_func_available_bit_mask = 0; + + /* init net device status monitor */ + g_func_available_bit_mask |= LOOKUP_REG_NETDEV_FUNC_BIT; + err = register_netdevice_notifier(&g_test_dev_notifier); + if (err < 0) { + printk(KERN_ERR "[monitor_netdev] register_netdevice_notifier fail\n"); + } else { + g_func_available_bit_mask |= REG_NETDEV_NOTIFIER_BIT; + } + + /* init net device ip monitor */ + err = register_inetaddr_notifier(&g_test_inet_notifier); + if (err < 0) { + printk(KERN_ERR "[monitor_netdev] register_inetaddr_notifier fail\n"); + } else { + g_func_available_bit_mask |= REG_INETADDR_NOTIFIER_BIT; + } + + err = register_inet6addr_notifier(&g_test_inet6_notifier); + if (err < 0) { + printk(KERN_ERR "[monitor_netdev] register_inetaddr_notifier fail\n"); + } else { + g_func_available_bit_mask |= REG_INET6ADDR_NOTIFIER_BIT; + } + + /* init fib table monitor */ + err = register_fib_notifier(&init_net, &g_fib_table_notifier, NULL, NULL); + if (err < 0) { + printk(KERN_ERR "[monitor_netdev] register_fib_notifier fail\n"); + } else { + g_func_available_bit_mask |= REG_FIB_TBL_NOTIFIER_BIT; + } + + if (!(g_func_available_bit_mask & ALL_FUNC_AVAILABLE_MASK)) { + printk("[monitor_netdev] all functions are unavailable(0x%x), has to exit.\n", + g_func_available_bit_mask); + return; + } + + printk("[monitor_netdev] initial finished. function available: 0x%x\n", + g_func_available_bit_mask); +} + +void monitor_netdev_exit(void) +{ + g_is_exiting = 1; + + if (g_func_available_bit_mask & REG_NETDEV_NOTIFIER_BIT) { + unregister_netdevice_notifier(&g_test_dev_notifier); + } + + if (g_func_available_bit_mask & REG_INETADDR_NOTIFIER_BIT) { + unregister_inetaddr_notifier(&g_test_inet_notifier); + } + + if (g_func_available_bit_mask & REG_INET6ADDR_NOTIFIER_BIT) { + unregister_inet6addr_notifier(&g_test_inet6_notifier); + } + + if (g_func_available_bit_mask & REG_FIB_TBL_NOTIFIER_BIT) { + unregister_fib_notifier(&init_net, &g_fib_table_notifier); + } + + printk("[monitor_netdev] exit\n"); +} diff --git a/sysmonitor-1.3.2/module/monitor_netdev.h b/sysmonitor-1.3.2/module/monitor_netdev.h new file mode 100644 index 0000000000000000000000000000000000000000..5cca9b40c50c9e1e00c7b0979f92865ec223c5d0 --- /dev/null +++ b/sysmonitor-1.3.2/module/monitor_netdev.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * Description: network device event monitor, structure for net event + * Author: xuchunmei + * Create: 2016-1-1 + */ +#ifndef MONITOR_NETDEV_H +#define MONITOR_NETDEV_H + +#include +#include +#include +#include + +enum netmonitor_event { + UP, + DOWN, + DELADDR, + NEWADDR, + DELADDR6, + NEWADDR6, + FIB_DEL, + FIB_ADD, + FIB_REPLACE, + FIB_APPEND, + FIB6_DEL, + FIB6_ADD, + FIB6_REPLACE, + FIB6_APPEND +}; + +struct netmonitor_info { + int event; + pid_t pid; + char comm[TASK_COMM_LEN]; + pid_t parent_pid; + char parent_comm[TASK_COMM_LEN]; + char dev[IFNAMSIZ]; + int plen; + int tb_id; + union nf_inet_addr addr; +}; + +void monitor_netdev_init(void); +void monitor_netdev_exit(void); +#endif diff --git a/sysmonitor-1.3.2/module/signo_catch.c b/sysmonitor-1.3.2/module/signo_catch.c new file mode 100644 index 0000000000000000000000000000000000000000..5273f29b28dadb3bf290f69c0b4ce16df55d69e8 --- /dev/null +++ b/sysmonitor-1.3.2/module/signo_catch.c @@ -0,0 +1,289 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * Description: signal catch module + * Author: xuchunmei + * Create: 2016-1-1 + */ +#include "signo_catch.h" + +#include +#include +#include +#include +#include +#include + +#include "sysmonitor_main.h" + +#define SIGNAL_COUNT 31 + +/* qemu kill -9 process, for libvirt use, do not change !!!! */ +#define QEMU_SIG +#ifdef QEMU_SIG +static DECLARE_WAIT_QUEUE_HEAD(g_qemu_wait); +static ulong g_qemu_log_seq; /* index for logged buffer */ +static ulong g_qemu_buf_seq; /* index for reader */ +#define SIG_BUFSIZE 256 +#define SIG_BUFMASK (SIG_BUFSIZE - 1) +static qemu_signo_msg g_qemu_buf[SIG_BUFSIZE]; +struct proc_dir_entry *g_proc_qemu; + +static ssize_t qemu_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + int error, index; + + if (buf == NULL || count < sizeof(qemu_signo_msg)) { + return -EINVAL; + } + + /* ring buf size is SIG_BUFSIZE, so we can't read more than that */ + if ((g_qemu_buf_seq - g_qemu_log_seq) >= SIG_BUFSIZE) { + g_qemu_log_seq = g_qemu_buf_seq - SIG_BUFSIZE + 1; + } + + /* it will return immediately if secend arg is not 0 */ + error = wait_event_interruptible(g_qemu_wait, g_qemu_buf_seq != g_qemu_log_seq); + if (error != 0) { + return error; + } + + index = g_qemu_log_seq & SIG_BUFMASK; + g_qemu_log_seq++; + + error = copy_to_user(buf, g_qemu_buf + index, sizeof(qemu_signo_msg)); + if (error != 0) { + return -EFAULT; + } + return sizeof(qemu_signo_msg); +} + +static unsigned int qemu_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &g_qemu_wait, wait); + if (g_qemu_buf_seq != g_qemu_log_seq) { + return POLLIN | POLLRDNORM; + } + + return 0; +} + +static int qemu_open(struct inode *inode, struct file *file) +{ + if (!try_module_get(THIS_MODULE)) { + return -ENOENT; + } + + return 0; +} + +static int qemu_release(struct inode *inode, struct file *file) +{ + module_put(THIS_MODULE); + return 0; +} + +static const struct proc_ops g_proc_qemu_operations = { + .proc_read = qemu_read, + .proc_poll = qemu_poll, + .proc_open = qemu_open, + .proc_release = qemu_release, + .proc_lseek = generic_file_llseek, +}; +#endif + +#ifdef CONFIG_EULEROS_SYSMONITOR_SIGNAL +/* Here introduce euler_get_mm_exe_file and euler_get_task_exe_file + * to solve the build and insmod error. + */ +static struct file *euler_get_mm_exe_file(const struct mm_struct *mm) +{ + struct file *exe_file = NULL; + + rcu_read_lock(); + exe_file = rcu_dereference(mm->exe_file); + if (exe_file && !get_file_rcu(exe_file)) { + exe_file = NULL; + } + rcu_read_unlock(); + return exe_file; +} + +static struct file *euler_get_task_exe_file(struct task_struct *task) +{ + struct file *exe_file = NULL; + struct mm_struct *mm = NULL; + + /* in oom_killer_process, task lock will be hold before send signal */ + if (spin_is_locked(&task->alloc_lock)) { + return NULL; + } + + task_lock(task); + mm = task->mm; + if (mm != NULL) { + if (!(task->flags & PF_KTHREAD)) { + exe_file = euler_get_mm_exe_file(mm); + } + } + task_unlock(task); + return exe_file; +} + +static int save_exe_info(char *exe, int exe_size, struct task_struct *task) +{ + struct file *exe_file = NULL; + int ret; + + exe_file = euler_get_task_exe_file(task); + if (exe_file != NULL) { + ret = memcpy(exe, + exe_file->f_path.dentry->d_name.name, + exe_file->f_path.dentry->d_name.len); + if (ret != 0) { + fput(exe_file); + return ret; + } + fput(exe_file); + } + return 0; +} + +static int save_msg_info(ce_signo_msg *msg, const send_sig_info_data_t *notifier_call_data) +{ + int ret, i; + struct task_struct *ptask = NULL; + + (void)memset(msg, 0, sizeof(ce_signo_msg)); + + msg->send_pid = current->pid; + (void)memcpy(msg->send_comm, current->comm, TASK_COMM_LEN); + + msg->send_parent_pid = current->parent->pid; + (void)memcpy(msg->send_parent_comm, current->parent->comm, TASK_COMM_LEN); + + rcu_read_lock(); + ptask = rcu_dereference(current->parent); + for (i = 0; i < CALL_CHAIN_NUM; i++) { + if ((ptask->pid == 0) || (ptask->pid == 1)) + break; + + ptask = rcu_dereference(ptask->real_parent); + msg->send_chain_pid[i] = task_pid_nr(ptask); + (void)memcpy(msg->send_chain_comm[i], ptask->comm, TASK_COMM_LEN); + } + rcu_read_unlock(); + + msg->recv_pid = notifier_call_data->p->pid; + (void)memcpy(msg->recv_comm, notifier_call_data->p->comm, TASK_COMM_LEN); + + msg->signo = notifier_call_data->sig; + ret = save_exe_info(msg->send_exe, NAME_MAX, current); + if (ret != 0) { + pr_err("[signo]: memcpy msg->send_exe failed, ret: %d\n", ret); + return -1; + } + + ret = save_exe_info(msg->send_parent_exe, NAME_MAX, current->parent); + if (ret != 0) { + pr_err("[signo]: memcpy msg->send_parent_exe failed, ret: %d\n", ret); + return -1; + } + + ret = save_exe_info(msg->recv_exe, NAME_MAX, notifier_call_data->p); + if (ret != 0) { + pr_err("[signo]: memcpy msg->recv_exe failed, ret: %d\n", ret); + return -1; + } + + return 0; +} + +static int save_qemu_msg_info(qemu_signo_msg *qemu_msg, const send_sig_info_data_t *notifier_call_data) +{ + int ret; + + (void)memset(qemu_msg, 0, sizeof(qemu_signo_msg)); + qemu_msg->send_pid = current->pid; + (void)memcpy(qemu_msg->send_comm, current->comm, TASK_COMM_LEN); + qemu_msg->send_parent_pid = current->parent->pid; + (void)memcpy(qemu_msg->send_parent_comm, current->parent->comm, TASK_COMM_LEN); + qemu_msg->recv_pid = notifier_call_data->p->pid; + (void)memcpy(qemu_msg->recv_comm, notifier_call_data->p->comm, TASK_COMM_LEN); + qemu_msg->signo = notifier_call_data->sig; + return 0; +} + +static int do_store_sig_info(struct notifier_block *self, unsigned long val, void *data) +{ + send_sig_info_data_t *notifier_call_data = (send_sig_info_data_t *)data; + ce_signo_msg msg; + ulong index; + qemu_signo_msg *qemu_msg = NULL; + unsigned long sigcatchmask = get_sigcatchmask(); + int ret; + + if ((notifier_call_data->sig <= SIGNAL_COUNT) && + (sigcatchmask & (1ul << (unsigned int)(notifier_call_data->sig - 1)))) { + ret = save_msg_info(&msg, notifier_call_data); + if (ret != 0) { + goto out; + } + + (void)save_msg(SIGNAL, &msg, sizeof(ce_signo_msg)); + } + +#ifdef QEMU_SIG + if ((notifier_call_data->sig == SIGKILL) && + !strcmp(notifier_call_data->p->comm, "qemu-kvm")) { + index = g_qemu_buf_seq & SIG_BUFMASK; + qemu_msg = g_qemu_buf + index; + + ret = save_qemu_msg_info(qemu_msg, notifier_call_data); + if (ret) { + goto out; + } + + g_qemu_buf_seq++; + + if (waitqueue_active(&g_qemu_wait)) { + wake_up_interruptible(&g_qemu_wait); + } + } +#endif +out: + return NOTIFY_DONE; +} + +static struct notifier_block g_signo_catch_nb = { + .notifier_call = do_store_sig_info, + .priority = NOTIFY_CALL_PRIORITY, +}; +#endif + +void signo_catch_init(void) +{ +#ifdef QEMU_SIG + g_proc_qemu = proc_create("sig_catch", 0400, NULL, &g_proc_qemu_operations); + if (g_proc_qemu == NULL) { + printk(KERN_ERR "signo_catch: create /proc/sig_catch failed.\n"); + } +#endif +#ifdef CONFIG_EULEROS_SYSMONITOR_SIGNAL + (void)register_signo_catch_notifier(&g_signo_catch_nb); +#endif + printk(KERN_INFO "signo_catch: Planted send_sig_info_notifier_list register\n"); +} + +void signo_catch_exit(void) +{ +#ifdef CONFIG_EULEROS_SYSMONITOR_SIGNAL + (void)unregister_signo_catch_notifier(&g_signo_catch_nb); +#endif +#ifdef QEMU_SIG + if (g_proc_qemu != NULL) { + proc_remove(g_proc_qemu); + } +#endif + printk(KERN_INFO "signo_catch: send_sig_info_notifier_list unregistered\n"); +} + diff --git a/sysmonitor-1.3.2/module/signo_catch.h b/sysmonitor-1.3.2/module/signo_catch.h new file mode 100644 index 0000000000000000000000000000000000000000..0c2b9f7030766fdbe0d13e48ead8032e60a4669c --- /dev/null +++ b/sysmonitor-1.3.2/module/signo_catch.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * Description: define variable, structure and function for signal catch module + * Author: xuchunmei + * Create: 2016-1-1 + */ +#ifndef SIGNO_CATCH_H +#define SIGNO_CATCH_H + +#include +#include +#include + +#define CALL_CHAIN_NUM 4 + +typedef struct _signo_msg { + unsigned long signo; + pid_t send_pid; + char send_comm[TASK_COMM_LEN]; + char send_exe[NAME_MAX]; + pid_t send_parent_pid; + char send_parent_comm[TASK_COMM_LEN]; + char send_parent_exe[NAME_MAX]; + pid_t recv_pid; + char recv_comm[TASK_COMM_LEN]; + char recv_exe[NAME_MAX]; + pid_t send_chain_pid[CALL_CHAIN_NUM]; + char send_chain_comm[CALL_CHAIN_NUM][TASK_COMM_LEN]; +} ce_signo_msg; + +typedef struct __signo_msg { + unsigned long signo; + pid_t send_pid; + char send_comm[TASK_COMM_LEN]; + pid_t send_parent_pid; + char send_parent_comm[TASK_COMM_LEN]; + pid_t recv_pid; + char recv_comm[TASK_COMM_LEN]; +} qemu_signo_msg; + +void signo_catch_init(void); +void signo_catch_exit(void); + +#endif diff --git a/sysmonitor-1.3.2/module/sysmonitor_main.c b/sysmonitor-1.3.2/module/sysmonitor_main.c new file mode 100644 index 0000000000000000000000000000000000000000..a55403787cbb9d5efb48eb5a1efb6afa08cba63c --- /dev/null +++ b/sysmonitor-1.3.2/module/sysmonitor_main.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + * Description: sysmonitor event msg handler, include signal, fd and network + * Author: xuchunmei + * Create: 2019-3-20 + */ +#include "sysmonitor_main.h" + +#include +#include +#include +#include +#include + +//#include +#include "signo_catch.h" +#include "fdstat.h" +#include "monitor_netdev.h" + +#define NET_RATELIMIT_BURST_MIN 0 +#define NET_RATELIMIT_BURST_MAX 100 +#define SYSMONITOR_MSG_MAX_LEN 1024 +struct sysmonitor_msg { + int type; + char msg[SYSMONITOR_MSG_MAX_LEN]; +}; + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("sysmonitor module, monitor for signal, fd and net device"); + +static ulong sigcatchmask; +module_param(sigcatchmask, ulong, 0600); +MODULE_PARM_DESC(sigcatchmask, + "mask for signal catch, set corresponding bit to 1 to enable signal catch"); + +static int pararm_set_netratelimit_burst(const char *val, const struct kernel_param *kp); +const struct kernel_param_ops netratelimit_burst_param_ops = { + .set = pararm_set_netratelimit_burst, + .get = param_get_int, +}; + +static int netratelimit_burst = 5; +module_param_cb(netratelimit_burst, &netratelimit_burst_param_ops, &netratelimit_burst, 0600); +MODULE_PARM_DESC(netratelimit_burst, "network fib route event messgae rate limit"); +struct proc_dir_entry *g_proc_sysmonitor; +static unsigned long g_msg_log_seq; +static unsigned long g_msg_buf_seq; +#define MSG_BUFSIZE 256 +#define MSG_BUFMASK (MSG_BUFSIZE - 1) +static struct sysmonitor_msg g_msg_buf[MSG_BUFSIZE]; +DECLARE_WAIT_QUEUE_HEAD(g_msg_wait); +DEFINE_SPINLOCK(g_msg_buf_lock); + +static int pararm_set_netratelimit_burst(const char *val, const struct kernel_param *kp) +{ + int pre_value = netratelimit_burst; + int res = param_set_int(val, kp); + if (res == 0) { + if (netratelimit_burst < NET_RATELIMIT_BURST_MIN || netratelimit_burst > NET_RATELIMIT_BURST_MAX) { + (void)printk(KERN_WARNING "set netratelimit_burst out of range, keep the original\n"); + netratelimit_burst = pre_value; + } + return 0; + } + return -1; +} + +ulong get_sigcatchmask(void) +{ + return sigcatchmask; +} + +int get_netratelimit_burst(void) +{ + return netratelimit_burst; +} + +static ssize_t sysmonitor_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + int error, index; + + if (buf == NULL || count < sizeof(struct sysmonitor_msg)) { + return -EINVAL; + } + + /* ring buf size is MSG_BUFSIZE, so we can't read more than that */ + if ((g_msg_buf_seq - g_msg_log_seq) >= MSG_BUFSIZE) { + g_msg_log_seq = g_msg_buf_seq - MSG_BUFSIZE + 1; + } + + /* it will return immediately if secend arg is not 0 */ + error = wait_event_interruptible(g_msg_wait, g_msg_buf_seq != g_msg_log_seq); + if (error != 0) { + return error; + } + + index = g_msg_log_seq & MSG_BUFMASK; + g_msg_log_seq++; + + error = copy_to_user(buf, g_msg_buf + index, sizeof(struct sysmonitor_msg)); + if (error != 0) { + return -EFAULT; + } + return sizeof(struct sysmonitor_msg); +} + +static unsigned int sysmonitor_poll(struct file *file, poll_table *wait) +{ + poll_wait(file, &g_msg_wait, wait); + if (g_msg_buf_seq != g_msg_log_seq) { + return POLLIN | POLLRDNORM; + } + + return 0; +} + +static int sysmonitor_open(struct inode *inode, struct file *file) +{ + if (!try_module_get(THIS_MODULE)) { + return -ENOENT; + } + + return 0; +} + +static int sysmonitor_release(struct inode *inode, struct file *file) +{ + module_put(THIS_MODULE); + return 0; +} + +static const struct proc_ops g_proc_sysmonitor_operations = { + .proc_read = sysmonitor_read, + .proc_poll = sysmonitor_poll, + .proc_open = sysmonitor_open, + .proc_release = sysmonitor_release, + .proc_lseek = generic_file_llseek, +}; + +int save_msg(int type, const void *msg, int msg_size) +{ + struct sysmonitor_msg *tmp_msg = NULL; + unsigned int index; + int ret; + unsigned long flags; + + if (msg_size <= 0) { + pr_err("[sysmonitor]: save_msg, msg size is illegal\n"); + return -1; + } + + if (msg_size > SYSMONITOR_MSG_MAX_LEN) { + pr_err("[sysmonitor]: msg_size[%d] is larger than msg max size[%d]\n", + msg_size, SYSMONITOR_MSG_MAX_LEN); + return -1; + } + + spin_lock_irqsave(&g_msg_buf_lock, flags); + index = g_msg_buf_seq & MSG_BUFMASK; + tmp_msg = g_msg_buf + index; + (void)memset(tmp_msg, 0, sizeof(struct sysmonitor_msg)); + tmp_msg->type = type; + (void)memcpy(tmp_msg->msg, msg, msg_size); + g_msg_buf_seq++; + spin_unlock_irqrestore(&g_msg_buf_lock, flags); + + if (ret == 0) { + if (waitqueue_active(&g_msg_wait)) + wake_up_interruptible(&g_msg_wait); + } + + return ret; +} + +static int __init sysmonitor_module_init(void) +{ + g_proc_sysmonitor = proc_create("sysmonitor", 0400, NULL, &g_proc_sysmonitor_operations); + if (g_proc_sysmonitor == NULL) { + pr_err("[sysmonitor]: create /proc/sysmonitor failed.\n"); + return -1; + } + signo_catch_init(); + fdstat_init(); + monitor_netdev_init(); + return 0; +} + +static void __exit sysmonitor_module_exit(void) +{ + proc_remove(g_proc_sysmonitor); + signo_catch_exit(); + fdstat_exit(); + monitor_netdev_exit(); +} +module_init(sysmonitor_module_init); +module_exit(sysmonitor_module_exit); diff --git a/sysmonitor-1.3.2/module/sysmonitor_main.h b/sysmonitor-1.3.2/module/sysmonitor_main.h new file mode 100644 index 0000000000000000000000000000000000000000..41da5f3859e2296354d3f61cf9b8d4f30ab9febd --- /dev/null +++ b/sysmonitor-1.3.2/module/sysmonitor_main.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved. + * Description: define for sysmonitor event msg and function + * Author: xuchunmei + * Create: 2019-3-20 + */ +#ifndef SYSMONITOR_H +#define SYSMONITOR_H + +#define NOTIFY_CALL_PRIORITY 100 +enum sysmonitor_event_type { + SIGNAL, + FDSTAT, + NETWORK +}; + +unsigned long get_sigcatchmask(void); +int get_netratelimit_burst(void); +int save_msg(int type, const void *msg, int msg_size); +#endif diff --git a/sysmonitor-1.3.2/script/check_cron.sh b/sysmonitor-1.3.2/script/check_cron.sh new file mode 100755 index 0000000000000000000000000000000000000000..2308cc07073c8356048946168b62e6e8f91232da --- /dev/null +++ b/sysmonitor-1.3.2/script/check_cron.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (c) Huawei Technologies Co., Ltd. 2017-2019. All rights reserved. +# Description: check cron service enable/disable +# Author: +# Create: 2017-09-29 + +# -------------------------- variables ------------------------------------- # +G_CRON_NUM=0 +CRON_STATUS="" +CRON_BIN=/usr/sbin/crond +# -------------------------- main ------------------------------------------ # +function crond_process_exist() +{ + status=$(systemctl status crond -n 0 | grep "Active:" | grep running) + [ -n "${status}" ] && return 0 + #if crond was stopped normally, do not report monitor error + status=$(systemctl status crond -n 0 | grep "Active:" | grep 'inactive (dead)') + [ -n "${status}" ] && return 0 + return 1 +} + +for((i=0;i<2;i++)) +do + crond_process_exist + CRON_STATUS=$? + if [ $CRON_STATUS -eq 0 ]; then + break; + fi + sleep 4 +done + +G_CRON_PID=$(ps -ef | grep "$CRON_BIN" |awk '{if($3==1)print $2}') +G_CRON_NUM=$(echo $G_CRON_PID | wc -w) + +if [ $CRON_STATUS -ne 0 ];then + exit 1 +fi +if [ $G_CRON_NUM -gt 1 ]; then + kill -9 $G_CRON_PID 2>/dev/null + exit 2 +fi diff --git a/sysmonitor-1.3.2/script/check_dbus.sh b/sysmonitor-1.3.2/script/check_dbus.sh new file mode 100755 index 0000000000000000000000000000000000000000..6e41fc41d88297511b900b53b4c72aacab3c6306 --- /dev/null +++ b/sysmonitor-1.3.2/script/check_dbus.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright (c) Huawei Technologies Co., Ltd. 2016-2022. All rights reserved. +# Description: check dbus status +# Author: +# Create: 2022-7-25 + +DBUS_STRING=":1" + +function can_dbus_process() +{ + which busctl > /dev/null 2>&1 + if [ $? -ne 0 ]; then + return 0 + fi + + result=$(timeout 26s busctl call org.freedesktop.DBus /org/freedesktop/DBus org.freedesktop.DBus GetNameOwner "s" "org.freedesktop.systemd1" 2>&1) + if [[ $result =~ $DBUS_STRING ]]; then + return 0 + fi + + return 1 +} + +can_dbus_process +exit $? diff --git a/sysmonitor-1.3.2/script/check_sshd.sh b/sysmonitor-1.3.2/script/check_sshd.sh new file mode 100755 index 0000000000000000000000000000000000000000..34899a29cd757b1a8315e81edf2032660f207da5 --- /dev/null +++ b/sysmonitor-1.3.2/script/check_sshd.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. +# Description: check sshd service +# Author: +# Create: 2016-9-1 + +SSHD_STATUS="" +SSHD_PID="" +NUM=2 +count=$(expr $NUM - 1) + +function sshd_process_exist() +{ + status=$(systemctl status sshd -n 0 | grep "Active:" | grep running) + [ -n "${status}" ] && return 0 + return 1 +} + +for((i=0;i<$NUM;i++)) +do + sshd_process_exist + SSHD_STATUS=$? + if [ $SSHD_STATUS -eq 0 ]; then + break; + fi + if [ "$i" -lt "$count" ];then + sleep 2 + fi +done + +if [ $SSHD_STATUS -ne 0 ];then + SSHD_PID=$(ps -ef | grep -w "/usr/sbin/sshd" | grep -v grep | awk '{if ($3==1) print $2}') + kill -9 $SSHD_PID 2>/dev/null + exit 1 +fi +exit 0 diff --git a/sysmonitor-1.3.2/script/check_syslog.sh b/sysmonitor-1.3.2/script/check_syslog.sh new file mode 100755 index 0000000000000000000000000000000000000000..c31047ed2aa0da40abfcdfe4a7f8dc30517688c2 --- /dev/null +++ b/sysmonitor-1.3.2/script/check_syslog.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +# Copyright (c) Huawei Technologies Co., Ltd. 2017-2019. All rights reserved. +# Description: check syslog service enable/disable +# Author: +# Create: 2017-08-31 + +# -------------------------- variables ------------------------------------- # +declare -i MESSAGE_AVAILABLE_LIMIT +G_SYSLOG_PID="" +G_MESSAGE_AVAILABLE="" +MESSAGE_AVAILABLE_LIMIT=8 +DEV_MSG="" +DEV_TYPE="" +MEM_RESTART=0 +DISK_RESTART=0 +DISK_FLAG=0 +MEM_LIST="" +DISK_LIST="" +# -------------------------- main ------------------------------------------ # +G_SYSLOG_PID=$(systemctl status rsyslog | grep "Main PID:" | awk '{print $3}') +if [ ! -d /proc/${G_SYSLOG_PID}/fd ]; then + logger -t "sysmonitor" "[$(date +"%Y-%m-%d:%H:%M:%S")]sysmonitor[$(pidof sysmonitor)]: The directory of /proc/${G_SYSLOG_PID}/fd does not exist, now restart rsyslog" + exit 1 +fi +#only read journal log, when journal log is deleted, +#rsyslog can ensure correct reading, so do not check journal log. +for i in $(ls -l /proc/${G_SYSLOG_PID}/fd | grep " \-> .* (deleted)$" | grep -wv journal | awk '{print $9}') +do + DELETE_FILE="$(ls -l /proc/${G_SYSLOG_PID}/fd/$i | awk -F '-> ' '{print $2}')" + FD_STATUS="$(file /proc/${G_SYSLOG_PID}/fd/$i)" + # add new judgment condition for no broken to adapt file command change + if [ "$FD_STATUS" != "/proc/${G_SYSLOG_PID}/fd/$i: broken symbolic link to $DELETE_FILE" ] && \ + [ "$FD_STATUS" != "/proc/${G_SYSLOG_PID}/fd/$i: symbolic link to $DELETE_FILE" ];then + continue + fi + DELETE_FILE="${DELETE_FILE% (deleted)}" + DELETE_PATH="${DELETE_FILE%/*}" + DEV_MSG=$(df -mT "$DELETE_PATH" | awk 'NR>1') + DEV_TYPE=$(echo $DEV_MSG | awk '{print $2}' | grep -v rootfs | grep -v tmpfs) + if [ -z "$DEV_TYPE" ];then + MEM_RESTART=1 + if [ -n "$MEM_LIST" ];then + MEM_LIST="${MEM_LIST};${DELETE_FILE}" + else + MEM_LIST="$DELETE_FILE" + fi + continue + fi + DISK_FLAG=1 + G_MESSAGE_AVAILABLE=$(echo $DEV_MSG | awk '{print $5}') + if [ "${G_MESSAGE_AVAILABLE}" -ge "${MESSAGE_AVAILABLE_LIMIT}" ]; then + DISK_RESTART=1 + if [ -n "$DISK_LIST" ];then + DISK_LIST="${DISK_LIST};${DELETE_FILE}" + else + DISK_LIST="$DELETE_FILE" + fi + else + DISK_RESTART=0 + break; + fi +done +if [ $DISK_FLAG -eq 0 ];then + if [ $MEM_RESTART -eq 1 ];then + logger -t "sysmonitor" "[$(date +"%Y-%m-%d:%H:%M:%S")]sysmonitor[$(pidof sysmonitor)]: The fd of $MEM_LIST in rsyslog is abnormal, now restart rsyslog" + exit 1 + else + exit 0 + fi +else + if [ $DISK_RESTART -eq 1 ];then + if [ -z "$MEM_LIST" ];then + logger -t "sysmonitor" "[$(date +"%Y-%m-%d:%H:%M:%S")]sysmonitor[$(pidof sysmonitor)]: The fd of $DISK_LIST in rsyslog is abnormal, now restart rsyslog" + else + logger -t "sysmonitor" "[$(date +"%Y-%m-%d:%H:%M:%S")]sysmonitor[$(pidof sysmonitor)]: The fd of $DISK_LIST;$MEM_LIST in rsyslog is abnormal, now restart rsyslog" + fi + exit 1 + else + exit 0 + fi +fi diff --git a/sysmonitor-1.3.2/script/clean_remain_process.sh b/sysmonitor-1.3.2/script/clean_remain_process.sh new file mode 100755 index 0000000000000000000000000000000000000000..f47fa9780506f4fdb13c6c315e47de9239141c36 --- /dev/null +++ b/sysmonitor-1.3.2/script/clean_remain_process.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved. +# Description: clean remaining main process when stop sysmonitor service +# Create: 2021-8-24 + +REMAIN_PID="" +SYSMONITOR_DAEMON="/usr/bin/sysmonitor --daemon" + +REMAIN_PID=$(ps -ef | grep -w "$SYSMONITOR_DAEMON" | grep -v grep | awk '{if($3==1) print $2}') +if [ -n "$REMAIN_PID" ]; then + kill -TERM $REMAIN_PID 2>/dev/null +fi + diff --git a/sysmonitor-1.3.2/script/clocktransition.py b/sysmonitor-1.3.2/script/clocktransition.py new file mode 100755 index 0000000000000000000000000000000000000000..4db95c326f8dd5e68d9b42057615d629c96bc508 --- /dev/null +++ b/sysmonitor-1.3.2/script/clocktransition.py @@ -0,0 +1,187 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +# Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + +""" +Description: check time change and handle NetworkManager service +Author: +Create: 2016-12-14 +""" + +from __future__ import absolute_import +import time +import os +import subprocess +import signal +import syslog + +CLOCK_TIME_FILE = "/usr/libexec/sysmonitor/data/clocktime.data" + + +def start_dhclient_task(cmd): + """Start cmd and move the process to network cgroup slice + otherwise restart sysmonitor will kill the thread + """ + ret = os.system("/usr/bin/systemctl |grep NetworkManager.service") + if ret == 0: + syslog.syslog(syslog.LOG_INFO, "wait for restarting dhclient") + restart_cmd = "systemctl restart NetworkManager" + ret, _ = subprocess.getstatusoutput(restart_cmd) + if ret != 0: + syslog.syslog(syslog.LOG_ERR, "restart NetworkManager failed.") + return + + ret = os.system(cmd) + if ret != 0: + syslog.syslog(syslog.LOG_ERR, "start dhclient failed.") + + check_cmd = "ps -eLwwo pid,args|grep \"{0}\"|grep -v grep".format(cmd) + ret, ps_result = subprocess.getstatusoutput(check_cmd) + if ret != 0: + syslog.syslog(syslog.LOG_ERR, ("create dhcliet error," + " need network restart!")) + return + + ps_result = ps_result.splitlines() + for line in ps_result: + # Get the pid and change the systemd cgroup + sp_result = line.strip().split() + pid = int(sp_result[0]) + + os.system(("mkdir -p /sys/fs/cgroup/systemd/system.slice/" + "network.service")) + res_cmd = ("echo {0} > /sys/fs/cgroup/systemd/system.slice/" + "network.service/tasks").format(pid) + + ret = os.system(res_cmd) + if ret != 0: + syslog.syslog(syslog.LOG_ERR, "write pid of dhclient failed.") + + +def check_cmd_user(cmd_user): + """check cmd user is root""" + std_cmd_user = "root" + if cmd_user: + if cmd_user == std_cmd_user: + return True + return False + + +def check_cmd_name(cmd_line): + """check cmd name is /sbin/dhclient""" + std_cmd_name = "/sbin/dhclient" + if cmd_line is None: + return False + cmd_name = cmd_line.split() + if len(cmd_name) < 2: + return False + if cmd_name[1]: + if cmd_name[1] == std_cmd_name: + return True + return False + + +def reset_dhclient(): + """find and kill dhclient process and start new dhclient""" + ret, ps_result = subprocess.getstatusoutput(("ps -eLwwo user,pid,args|" + "grep -w /sbin/dhclient|grep -v grep")) + if ret != 0: + return + + sp_result = ps_result.splitlines() + invalidstr = [ + '!', '\\n', ';', '|', '&', '$', '>', '<', '(', ')', + './', '/.', '?', '*', '`', '\\', '[', ']', '\'' + ] + for line in sp_result: + inval_flag = False + # Remove space,find the dev device and restart dhclient + line = line.strip(' ') + i = line.find(' ') + cmd_user = line[:i] + line = line[i + 1:] + line = line.strip(' ') + ret_flag = check_cmd_user(cmd_user) + if not ret_flag: + syslog.syslog(syslog.LOG_ERR, "invaild cmd user, continue") + continue + ret_flag = check_cmd_name(line) + if not ret_flag: + syslog.syslog(syslog.LOG_ERR, "invaild cmd name, continue") + continue + for inval in invalidstr: + if line.find(inval) != -1: + inval_flag = True + str_info = ("invaild symbol in line cmd is {0}, " + "continue").format(inval) + syslog.syslog(syslog.LOG_ERR, str_info) + break + if inval_flag is True: + continue + i = line.rfind(' ') + dev = line[i + 1:] + cmd = "/usr/sbin/ifconfig {0}".format(dev) + + ret, _ = subprocess.getstatusoutput(cmd) + if ret != 0: + # Dev not found + continue + + i = line.find(' ') + pid = int(line[:i]) + cmd = line[i + 1:] + + try: + os.kill(pid, signal.SIGKILL) + # Wait process killed + time.sleep(1) + except BaseException: + syslog.syslog(syslog.LOG_ERR, "killed dhclient failed.") + else: + syslog.syslog(syslog.LOG_INFO, "killed dhclient successed.") + finally: + syslog.syslog(syslog.LOG_INFO, "process kill dhclient end.") + + start_dhclient_task(cmd) + + +def read_time_file(): + """read time from file""" + tmp_time = None + if os.path.exists(CLOCK_TIME_FILE): + tmp_file = open(CLOCK_TIME_FILE, mode='r') + tmp_time = tmp_file.read() + tmp_file.close() + return tmp_time + + +def write_time_file(now): + """write time to file""" + chmod_flag = False + if not os.path.exists(CLOCK_TIME_FILE): + chmod_flag = True + tmp_file = open(CLOCK_TIME_FILE, mode='w') + if chmod_flag: + os.chmod(CLOCK_TIME_FILE, 0o640) + tmp_file.write(str(now)) + tmp_file.flush() + tmp_file.close() + + +def check_time_change(): + """check if time has been changed""" + tmp_time = None + reset_time = 3620 + now = time.time() + tmp_time = read_time_file() + if tmp_time: + # More than one hour + if (float(tmp_time) - now) > reset_time: + str_time = ("time change catched, before is {0}," + " now is {1}").format(tmp_time, now) + syslog.syslog(syslog.LOG_WARNING, str_time) + reset_dhclient() + write_time_file(now) + +if __name__ == '__main__': + check_time_change() diff --git a/sysmonitor-1.3.2/script/get_local_disk.sh b/sysmonitor-1.3.2/script/get_local_disk.sh new file mode 100755 index 0000000000000000000000000000000000000000..318374cbffdd3113a32a5266f4f8f6bf46b20548 --- /dev/null +++ b/sysmonitor-1.3.2/script/get_local_disk.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. +# Description: get local disk +# Author: +# Create: 2016-12-14 + +fc_disk_file="" +local_disk_file="" +if uname -a | grep -q aarch64; then + fc_local_disk=`ls -l /sys/block/ | grep -E 'pci|HISI0162' 2> /dev/null` +else + fc_local_disk=`ls -l /sys/block/ | grep pci 2> /dev/null` +fi +disk_list="" + +# **************************************************************************** # +# Function Name: OS_CREATE_TMP_FILE +# Description: Create a secure tmp file +# Parameter: tmp file +# Return: 0-succ, 1-failed +# **************************************************************************** # +OS_CREATE_TMP_FILE() +{ + local file_name=$1 + local tmp_file="" + local save_mask=$(umask) + + umask 077 + tmp_file=$(mktemp "${file_name}_XXXXXXXXXX" 2>/dev/kmsg) + if [ $? -ne 0 ] + then + umask "${save_mask}" + return 1 + fi + umask "${save_mask}" + echo "${tmp_file}" + return 0 +} + +fc_disk_file=$(OS_CREATE_TMP_FILE "/tmp/fc_disk") +if [ $? -eq 1 ];then + rm -rf "${fc_disk_file}" + exit 1 +fi + +local_disk_file=$(OS_CREATE_TMP_FILE "/tmp/local_disk") +if [ $? -eq 1 ];then + rm -rf "${fc_disk_file}" "${local_disk_file}" + exit 1 +fi + +ls -l /sys/class/fc_host/ > ${fc_disk_file} 2> /dev/null +while read line +do + total_line=`echo ${line} | grep total` + if [ ! -z "${total_line}" ];then + continue + fi + host=$(echo "${line}" | awk -F "/" '{print $NF}' 2> /dev/null) + fc_local_disk=`echo "${fc_local_disk}" | grep -v -w ${host} 2> /dev/null` +done < ${fc_disk_file} + +echo "${fc_local_disk}" > ${local_disk_file} 2> /dev/null +while read line +do + disk=`echo ${line} | awk -F "/" '{print $NF}' 2> /dev/null` + cd_rom=`echo "${disk}" | grep "sr[0-9]\{1,\}$"` + if [ ! -z "${cd_rom}" ];then + continue + fi + if [ -z ${disk_list} ];then + disk_list=${disk} + else + disk_list="${disk_list},${disk}" + fi +done < ${local_disk_file} +echo -e "${disk_list}\c" + +rm -f ${fc_disk_file} ${local_disk_file} + diff --git a/sysmonitor-1.3.2/script/getzombieparent.py b/sysmonitor-1.3.2/script/getzombieparent.py new file mode 100755 index 0000000000000000000000000000000000000000..0f915db4e78625a159615834d58aa2fe8e70e33d --- /dev/null +++ b/sysmonitor-1.3.2/script/getzombieparent.py @@ -0,0 +1,38 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +# Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + +""" +Description: get and print the information of the zombies' parent process +Author: +Create: 2019-7-22 +""" +from __future__ import absolute_import +import subprocess +import syslog + + +def zombie_get_parent_process(): + """Get and output the zombie process info""" + check_cmd = "ps -eLwwo pid,stat,ppid,args| awk 'NR>1'" + ret, ps_result = subprocess.getstatusoutput(check_cmd) + if ret != 0: + syslog.syslog(syslog.LOG_WARNING, "Failed to get all process info!") + return + ps_result = ps_result.splitlines() + all_process = {} + zombie_parent = {} + for line in ps_result: + sp_result = line.strip().split() + all_process[sp_result[0]] = sp_result[3] + if sp_result[1].startswith(('Z', 'z')): + zombie_parent[sp_result[2]] = sp_result[0] + + for ppid in zombie_parent: + str_log = ("zombie parent process: pid is {0}, " + "args is {1}").format(ppid, all_process[ppid]) + syslog.syslog(syslog.LOG_ERR, str_log) + +if __name__ == '__main__': + zombie_get_parent_process() + diff --git a/sysmonitor-1.3.2/script/iomonitor_daemon b/sysmonitor-1.3.2/script/iomonitor_daemon new file mode 100755 index 0000000000000000000000000000000000000000..da99333eb3bb7d6bf81b8de2b87c854166f77b7b --- /dev/null +++ b/sysmonitor-1.3.2/script/iomonitor_daemon @@ -0,0 +1,120 @@ +#!/bin/bash + +# Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. +# Description: io monitor +# Author: +# Create: 2018-11-2 + +#remove old tmp file +rm -rf /tmp/io_sample.log +rm -rf /tmp/iomonitor_*.log +IO_THRESHOLD=50 +SAVE_TOPN=3 +CMD_NAME="iomonitor_daemon" + +trap 'echo "`date` [$0] trap exceptional signal! exit." > /dev/kmsg; rm -rf /tmp/iomonitor_*.log >/dev/null 2>&1; exit 0;' SIGHUP SIGINT SIGTERM SIGQUIT SIGKILL + +PATH=$PATH:/usr/sbin:/usr/bin:/bin:/sbin + +function iotop_logger() +{ + logger -it "sysmonitor" "[LOC $(date +"%Y-%m-%d:%H:%M:%S")]sysmonitor[$(pidof sysmonitor)]:" -p info "$1" +} + +function get_inflight_log() +{ + disk_name_all=`ls /sys/block` + for disk_name in $disk_name_all + do + iotop_logger "$disk_name inflight info:`cat /sys/block/$disk_name/inflight`" + done +} + +function get_iotop_log() +{ + local first_line=0 + local head_line=2 + local print_i=0 + local tmp_file="" + + iotop_logger "$CMD_NAME start" + + head_line=2 + save_umask=$(umask) + umask 077 + tmp_file=`mktemp /tmp/iomonitor_XXXXXXXXXX.log` + if [ $? -ne 0 ] + then + iotop_logger "create tmp file failed." + umask "${save_umask}" + exit 1 + fi + umask "${save_umask}" + iotop -n 3 -b -t -d 1 -o > $tmp_file + last_line=`grep -n "Total DISK READ" $tmp_file | awk 'END {print}' | awk -F: '{print $1}'` + if [ -z $last_line ] + then + iotop_logger "iotop run fail." + rm -rf "$tmp_file" >/dev/null 2>&1 + return + fi + #some version iotop display "Actual DISK READ" + if [ -n "`grep 'Actual DISK READ' $tmp_file`" ] + then + head_line=$((head_line+1)) + fi + + #skip head + first_line=$((last_line+head_line)) + io_size=`cat $tmp_file | tail -n +$first_line | head -n 1 | awk '{print $11}'` + if [ -z "$io_size" ] + then + #no io info + rm -rf "$tmp_file" >/dev/null 2>&1 + return + fi + io_size=${io_size%%.*} + if [ $io_size -lt $IO_THRESHOLD ] + then + rm -rf "$tmp_file" >/dev/null 2>&1 + return + fi + #the header info + print_i=0 + while [ $print_i -lt $head_line ] + do + iotop_logger "`sed -n ${last_line}p $tmp_file`" + last_line=$((last_line+1)) + print_i=$((print_i+1)) + done + + #log the io info + iotop_logger "`sed -n ${first_line}p $tmp_file`" + get_inflight_log + first_line=$((first_line+1)) + print_i=1 + while [ $print_i -lt $SAVE_TOPN ] + do + io_size=`cat $tmp_file | tail -n +$first_line | head -n 1 | awk '{print $11}'` + if [ -z "$io_size" ] + then + #no io info + break + fi + io_size=${io_size%%.*} + if [ $io_size -lt $IO_THRESHOLD ] + then + break + fi + + iotop_logger "`sed -n ${first_line}p $tmp_file`" + get_inflight_log + first_line=$((first_line+1)) + print_i=$((print_i+1)) + done + + rm -rf "$tmp_file" >/dev/null 2>&1 +} + +get_iotop_log +exit 0 diff --git a/sysmonitor-1.3.2/script/ko.sh b/sysmonitor-1.3.2/script/ko.sh new file mode 100755 index 0000000000000000000000000000000000000000..8c91046a1cc230df5443bae5d290b7f6272582bd --- /dev/null +++ b/sysmonitor-1.3.2/script/ko.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. +# Description: handle sysmonitor module insmod and rmmod +# Author: +# Create: 2018-8-14 + +function install_ko() +{ + local ko_list= + + ko_list[0]="signo_catch" + ko_list[1]="fdstat" + ko_list[2]="monitor_netdev" + + for i in ${ko_list[*]} + do + rmmod $i 2>/dev/null 1>/dev/null + done + insmod /lib/modules/sysmonitor/sysmonitor.ko 2>/dev/null 1>/dev/null +} + +function rm_ko() +{ + local ko_list= + + ko_list[0]="sysmonitor" + + for i in ${ko_list[*]} + do + rmmod $i 2>/dev/null 1>/dev/null + done +} + +case "$1" in +install) + install_ko + ;; +rm) + rm_ko + ;; +*) + exit 1 +esac diff --git a/sysmonitor-1.3.2/script/logind_clear.sh b/sysmonitor-1.3.2/script/logind_clear.sh new file mode 100755 index 0000000000000000000000000000000000000000..5c1a5994f80f5c06ac3cc93296265500100a351a --- /dev/null +++ b/sysmonitor-1.3.2/script/logind_clear.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright (c) Huawei Technologies Co., Ltd. 2017-2019. All rights reserved. +# Description: clear login session +# Author: +# Create: 2017-3-15 + +session_path="/run/systemd/sessions" +scope_path="/run/systemd/system" +clear_start=100 + +session_files=$(ls ${session_path}) +session_array=(${session_files}) +count=${#session_array[@]} + +if [ ${count} -le ${clear_start} ];then + exit 0 +fi + +for files in ${session_array[*]} +do + ref=`echo ${files} | grep ref` + if [ ! -z "${ref}" ];then + continue + fi + session_file=${session_path}/${files} + state=`cat ${session_file} | grep STATE | awk -F '=' '{print $2}'` + scope=`cat ${session_file} | grep SCOPE | awk -F '=' '{print $2}'` + if [ "${state}" == "closing" ];then + rm -f ${session_file} + rm -f ${session_file}.ref + rm -f ${scope_path}/${scope} + rm -rf ${scope_path}/${scope}.d + fi +done + diff --git a/sysmonitor-1.3.2/script/process_clock_data.sh b/sysmonitor-1.3.2/script/process_clock_data.sh new file mode 100755 index 0000000000000000000000000000000000000000..49353ae24091b3d1e24f048d15437ac380d21378 --- /dev/null +++ b/sysmonitor-1.3.2/script/process_clock_data.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. +# Description: check clocktime.data +# Author: +# Create: 2020-04-16 + +CLOCK_TIME_FILE="/usr/libexec/sysmonitor/data/clocktime.data" + +function init_clockdata() +{ + if [ -f $CLOCK_TIME_FILE ];then + rm -rf $CLOCK_TIME_FILE + fi + umask 026 + touch $CLOCK_TIME_FILE +} + +function rm_clockdata() +{ + if [ -f $CLOCK_TIME_FILE ];then + rm -rf $CLOCK_TIME_FILE + fi +} + +case "$1" in +init) + init_clockdata + ;; +rm) + rm_clockdata + ;; +*) + exit 1 +esac diff --git a/sysmonitor-1.3.2/script/rm_duplicat_conf.sh b/sysmonitor-1.3.2/script/rm_duplicat_conf.sh new file mode 100755 index 0000000000000000000000000000000000000000..1ebd44f77d7e1c05a1832a84b797c2d325a3d6ef --- /dev/null +++ b/sysmonitor-1.3.2/script/rm_duplicat_conf.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. +# Description: remove duplicat config file +# Author: +# Create: 2018-6-5 + +function fn_replace_conf_file() +{ + local process_path="/etc/sysmonitor/process" + local conf_file_list="libvirtd UVPHostd vBMC_agentd" + + for i in $conf_file_list + do + if [ -f ${process_path}/${i}-monitor ]&&[ -f ${process_path}/${i}-daemon ];then + rm -f ${process_path}/${i}-monitor + if [ $? -ne 0 ];then + logger -it rm_duplicat_conf.sh "Delete ${i}-monitor failed." + else + logger -it rm_duplicat_conf.sh "Deleted ${i}-monitor." + fi + fi + done +} + +fn_replace_conf_file + diff --git a/sysmonitor-1.3.2/script/sysmonitor_log_dump.sh b/sysmonitor-1.3.2/script/sysmonitor_log_dump.sh new file mode 100755 index 0000000000000000000000000000000000000000..816c899c0153f99fa33021618e2337e0256bd1d5 --- /dev/null +++ b/sysmonitor-1.3.2/script/sysmonitor_log_dump.sh @@ -0,0 +1,121 @@ +#!/bin/bash + +# Copyright (c) Huawei Technologies Co., Ltd. 2017-2019. All rights reserved. +# Description: log dump for sysmonitor +# Author: +# Create: 2017-10-19 + +logdump_dir=/var/log/logdump/sysmonitor +logrotate_conf_file=/usr/libexec/sysmonitor/sysmonitor-logrotate +lock_file=$logdump_dir/sysmonitor-logrotate.lock +status_file=$logdump_dir/sysmonitor-logrotate.status + +function mkdir_logdump_dir() +{ + local logdump_dir=$1 + if [ -e "${logdump_dir%/*}" -a ! -d "${logdump_dir%/*}" ];then + logger -it sysmonitor_log_dump.sh "${logdump_dir%/*} is not a directory,now remove it" -p warning + rm -rf ${logdump_dir%/*} + elif [ -e "$logdump_dir" -a ! -d "$logdump_dir" ];then + logger -it sysmonitor_log_dump.sh "$logdump_dir is not a directory,now remove it" -p warning + rm -rf $logdump_dir + fi + + mkdir_out=$(mkdir -m 700 -p $logdump_dir 2>&1) + if [ $? -ne 0 ];then + logger -it sysmonitor_log_dump.sh "$mkdir_out" -p warning + rm -rf $lock_file + exit 1 + fi + chmod 700 $logdump_dir/.. +} + +function exec_logrotate() +{ + out=$(logrotate $logrotate_conf_file -s $status_file 2>&1) + if [ $? -ne 0 ];then + logger -it sysmonitor_log_dump.sh "$out" -p warning + # if logrotate.status is invalid ,remove it + if [ -n "$(echo $out | grep sysmonitor-logrotate.status)" ];then + rm -f $status_file + out=$(logrotate $logrotate_conf_file -s $status_file 2>&1) + if [ $? -ne 0 ];then + logger -it sysmonitor_log_dump.sh "$out" -p warning + rm -rf $lock_file + exit 1 + fi + else + rm -rf $lock_file + exit 1 + fi + fi + chmod 400 $logdump_dir/* +} + +function get_save_cnt() +{ + max_save_cnt=$(cat $logrotate_conf_file | grep -w rotate | awk '{print $2}') + if [ -z "$max_save_cnt" ];then + max_save_cnt=30 + fi + echo $max_save_cnt +} + +function check_rotate_file() +{ + rootbak_dir=$1 + sysmonitor_rootbak_dir=$rootbak_dir/logdump/sysmonitor + rotate_cnt=$(ls $logdump_dir/sysmonitor.log.*.gz | wc -l) + if [ $rotate_cnt -ne 1 ];then + return + fi + rootbak_type=$(df -T $rootbak_dir | awk 'NR>1' | egrep -wv "/dev/(ram|loop)[0-9]{0,}" | awk '{print $2}' | grep -v rootfs | grep -v tmpfs) + if [ -z "$rootbak_type" ];then + return + fi + mkdir_logdump_dir $sysmonitor_rootbak_dir + logdump_id=$(ls $sysmonitor_rootbak_dir/sysmonitor.log.*.gz | awk -F . '{print $3}' | sort -nr | head -n1) + rotate_id=$(($logdump_id+1)) + rotate_date=$(date "+%Y%m%d%H%M%S") + + mv_result=$(mv $logdump_dir/sysmonitor.log.*.gz $sysmonitor_rootbak_dir/sysmonitor.log.$rotate_id.$rotate_date.gz 2>&1) + if [ $? -ne 0 ];then + logger -it sysmonitor_log_dump.sh "$mv_result" -p warning + fi + logdump_file_count=$(ls $sysmonitor_rootbak_dir/sysmonitor.log.*.gz | wc -l) + max_save_cnt=$(get_save_cnt) + if [ $logdump_file_count -le $max_save_cnt ];then + return + fi + + delet_file=$(ls $sysmonitor_rootbak_dir/sysmonitor.log.*.gz | sort -n -k 3 -t . | head -n $(($logdump_file_count-$max_save_cnt))) + rm -rf $delet_file +} + +#main +mkdir_logdump_dir $logdump_dir + +exec 7<>$lock_file +flock 7 + +varlog_type=$(df -T /var/log | awk 'NR>1' | egrep -wv "/dev/(ram|loop)[0-9]{0,}" | awk '{print $2}' | grep -v rootfs | grep -v tmpfs) +if [ -n "$varlog_type" ];then + exec_logrotate + rm -rf $lock_file + exit 0 +fi +rm -rf $logdump_dir/* +exec_logrotate +log_bak_dir=$(cat /etc/esyslog/oslogdump.conf | grep LOG_BAK_DIR= | awk -F = '{print $2}') +if [ -z "$log_bak_dir" ];then + #on logical part + check_rotate_file /opt/udisk/log/transfer + #on memory file system of CE + check_rotate_file /rootbak/var/log +else + check_rotate_file $log_bak_dir +fi +if [ -z "$(ls $logdump_dir/sysmonitor.log.*.gz)" ];then + rm -rf $logdump_dir +fi +rm -rf $lock_file diff --git a/sysmonitor-1.3.2/service/sysmonitor.service b/sysmonitor-1.3.2/service/sysmonitor.service new file mode 100644 index 0000000000000000000000000000000000000000..488f2fdf3c0bf1ed00ebe709bc12f16efcbac889 --- /dev/null +++ b/sysmonitor-1.3.2/service/sysmonitor.service @@ -0,0 +1,24 @@ +[Unit] +Before=network-pre.target +Wants=network-pre.target +After=ksecurec.service +Description=System Monitor Service + +[Service] +Type=forking +PIDFile=/var/run/sysmonitor.pid +ExecStartPre=-/bin/bash /usr/libexec/sysmonitor/ko.sh install +ExecStartPre=-/bin/bash /usr/libexec/sysmonitor/rm_duplicat_conf.sh +ExecStartPre=-/bin/bash /usr/libexec/sysmonitor/process_clock_data.sh init +ExecStart=/usr/bin/sysmonitor --daemon +ExecStop=/usr/bin/sleep 0.5 +ExecStopPost=-/bin/bash /usr/libexec/sysmonitor/ko.sh rm +ExecStopPost=-/bin/bash /usr/libexec/sysmonitor/process_clock_data.sh rm +ExecStopPost=-/bin/bash /usr/libexec/sysmonitor/clean_remain_process.sh +ExecReload=/bin/kill -USR2 $MAINPID +KillMode=process +StandardOutput=null +Restart=on-failure + +[Install] +WantedBy=multi-user.target diff --git a/sysmonitor-1.3.2/src/CMakeLists.txt b/sysmonitor-1.3.2/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e5af6122dd673c23daeccc01a8f910f794533c4c --- /dev/null +++ b/sysmonitor-1.3.2/src/CMakeLists.txt @@ -0,0 +1,20 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. +# Description: cmake file of sysmonitor src +# Author: xuchunmei +# Create: 2018-12-15 + +project(sysmonitor) + +set(CMAKE_C_FLAGS "-Wall -Werror -D_FORTIFY_SOURCE=2 -O2 -fPIE -fstack-protector-strong -g") + +if (VERSION) + add_compile_options(-D ${VERSION}) +endif(VERSION) + +if (CUSTOM) + add_compile_options(-D ${CUSTOM}) +endif(CUSTOM) + +add_executable(sysmonitor common.c custom.c disk.c fsmonitor.c filemonitor.c process.c sys_resources.c sys_event.c sysmonitor.c zombie.c monitor_thread.c) +set_target_properties(sysmonitor PROPERTIES LINK_FLAGS "-Wl,-z,relro -Wl,-z,noexecstack -Wl,-z,now -Wtrampolines -pie") +target_link_libraries(sysmonitor boundscheck pthread) diff --git a/sysmonitor-1.3.2/src/common.c b/sysmonitor-1.3.2/src/common.c new file mode 100644 index 0000000000000000000000000000000000000000..129ee7494e170eca45b74e8c38f0c3310ac636b1 --- /dev/null +++ b/sysmonitor-1.3.2/src/common.c @@ -0,0 +1,1000 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: common function + * Author: xuchunmei + * Create: 2016-1-1 + */ + +#include "common.h" + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#define PFD_NUM 2 + +/* + * process exit handle + * first send SIGTERM to the process, then wait for most 10 second + * if process is still alive after 10 second, send SIGKILL and wait + */ +static void process_exit(pid_t pid, int *status) +{ + int ret; + int timeout = PROCESS_EXIT_TIMEOUT; + + (void)kill(-pid, SIGTERM); + (void)sleep(1); + while (timeout--) { + ret = waitpid(pid, status, WNOHANG); + if (ret > 0) { + return; + } + (void)sleep(1); + } + log_printf(LOG_INFO, "task[%d] process SIGTERM timeout,use SIGKILL.", pid); + (void)kill(-pid, SIGKILL); + (void)waitpid(pid, status, 0); +} + +static FILE *get_pfd_file(int pfd, int *ret) +{ + int flags; + FILE *fp = NULL; + + flags = fcntl(pfd, F_GETFL, 0); + if (flags < 0) { + log_printf(LOG_ERR, "monitor_popen: fcntl F_GETFL error [%d]", errno); + *ret = ERROR_FCNTL; + return NULL; + } + + flags = fcntl(pfd, F_SETFL, (unsigned int)flags | O_NONBLOCK); + if (flags < 0) { + log_printf(LOG_ERR, "monitor_popen: fcntl F_SETFL error [%d]", errno); + *ret = ERROR_FCNTL; + return NULL; + } + + fp = fdopen(pfd, "r"); + if (fp == NULL) { + *ret = ERROR_FDOPEN; + return NULL; + } + + return fp; +} + +static int get_child_exit_code(int status) +{ + int ret = 0; + + if (WIFEXITED(status)) { + ret = WEXITSTATUS(status); + if (ret != 0) { + log_printf(LOG_INFO, "get child exit code error ret[%d]", ret); + } + } + return ret; +} + +static int process_monitor_popen_timeout(const char *psz_cmd, const char *psz_stop_cmd, int pfd) +{ + int status; + pid_t pid; + pid_t child_pid; + int ret; + + log_printf(LOG_INFO, "execute \"%s\" timeout", psz_cmd); + if (psz_stop_cmd == NULL) { + return ERROR_TIMEOUT; + } + + pid = fork(); + if (pid < 0) { + log_printf(LOG_ERR, "monitor_popen: timeout fork error [%d]", errno); + return ERROR_FORK; + } else if (pid == 0) { + (void)close(pfd); + (void)execl("/bin/sh", "sh", "-c", psz_stop_cmd, NULL); + exit(errno); + } + + child_pid = waitpid(pid, &status, 0); + if (child_pid == pid) { + ret = get_child_exit_code(status); + if (ret != 0) { + log_printf(LOG_WARNING, "monitor popen: psz_stop_cmd[%s] execl error[%d]", psz_stop_cmd, ret); + } + } + return ERROR_TIMEOUT; +} + +static void process_pfd_and_fd(int pfd) +{ + int fd = -1; + + if (pfd != STDOUT_FILENO) { + (void)dup2(pfd, STDOUT_FILENO); + (void)close(pfd); + } + + fd = open("/dev/null", O_RDWR, 0); + if (fd >= 0) { + (void)dup2(fd, STDIN_FILENO); + (void)dup2(fd, STDERR_FILENO); + if (fd != STDERR_FILENO) { + (void)close(fd); + } + } +} + +static int process_timeout_waitpid(pid_t pid, int *status, long timeout, int *sec) +{ + if (waitpid(pid, status, WNOHANG) > 0) { + return 0; + } else if (timeout > 0) { + (void)sleep(1); + *sec = *sec + 1; + return 1; + } else { + /* timeout is 0 */ + (void)waitpid(pid, status, 0); + return 0; + } +} + +/* + * exec psz_cmd and put the result of psz_cmd in psz_buffer + * if timeout > 0, when timeout exec psz_stop_cmd + */ +int monitor_popen(const char *psz_cmd, char *psz_buffer, unsigned int size, long timeout, const char *psz_stop_cmd) +{ + int result = 0; + FILE *fp = NULL; + int pfd[PFD_NUM] = {0}; + pid_t pid; + fd_set rfds; + struct timeval tv; + int retval; + int status; + char *stdout_str = psz_buffer; + int sec = 0; + int ret; + unsigned int len; + + ret = memset_s(psz_buffer, size, 0, size); + if (ret) { + log_printf(LOG_ERR, "monitor_popen: memset_s psz_buffer failed, ret: %d", ret); + return -1; + } + + if (pipe(pfd) < 0) { + log_printf(LOG_ERR, "pipe error [%d]", errno); + return ERROR_PIPE; + } + + pid = fork(); + if (pid < 0) { + log_printf(LOG_ERR, "monitor_popen: fork error [%d]", errno); + (void)close(pfd[0]); + (void)close(pfd[1]); + return ERROR_FORK; + } else if (pid == 0) { + (void)setpgrp(); + (void)prctl(PR_SET_PDEATHSIG, SIGTERM); + + (void)close(pfd[0]); + + process_pfd_and_fd(pfd[1]); + + (void)execl("/bin/sh", "sh", "-c", psz_cmd, NULL); + exit(errno); + } + + (void)close(pfd[1]); + fp = get_pfd_file(pfd[0], &result); + if (fp == NULL) { + process_exit(pid, &status); + (void)close(pfd[0]); + return result; + } + + for (;;) { + /* Watch pfd[0] to see when it has input. */ + FD_ZERO(&rfds); + FD_SET(pfd[0], &rfds); + + if (timeout > 0) { + if (sec >= timeout) { + result = process_monitor_popen_timeout(psz_cmd, psz_stop_cmd, pfd[0]); + process_exit(pid, &status); + break; + } + tv.tv_sec = 1; + tv.tv_usec = 0; + retval = select(pfd[0] + 1, &rfds, NULL, NULL, &tv); + } else { + retval = select(pfd[0] + 1, &rfds, NULL, NULL, NULL); + } + + if (retval == -1) { + log_printf(LOG_ERR, "select error [%d]", errno); + result = ERROR_SELECT; + process_exit(pid, &status); + break; + } else if (retval) { + if (!FD_ISSET(pfd[0], &rfds)) { + continue; + } + len = fread(stdout_str, 1, size, fp); + /* Pipe is closed, which means the child process has already exited. */ + if (len == 0) { + (void)waitpid(pid, &status, 0); + break; + } + + if (size > len) { + stdout_str += len; + size -= len; + continue; + } + + /* len is larger than size, so waitpid to exit */ + stdout_str += size; + size = 0; + ret = process_timeout_waitpid(pid, &status, timeout, &sec); + if (ret == 0) { + break; + } + } else { + /* Grandson process could inherit the pipe fd. */ + if (waitpid(pid, &status, WNOHANG) > 0) { + break; + } + sec++; + } + } + + if (WIFEXITED(status)) { + result = WEXITSTATUS(status); + if (result != 0) { + log_printf(LOG_WARNING, "monitor popen: psz_cmd[%s] execl error[%d]", psz_cmd, result); + } + } + + (void)fclose(fp); + + return result; +} + +/* + * save info to dst[pos] from src + * src and len is promissed by caller + */ +static int save_args(char ***dst, int pos, const char *src, int len) +{ + int ret; + char **args = *dst; + + if (pos >= ARGS_MAX) { + log_printf(LOG_INFO, "save_args: too many args."); + return -1; + } + + if (len >= EXEC_MAX) { + log_printf(LOG_INFO, "save_args: args len is longer than %d.", EXEC_MAX); + return -1; + } + + args[pos] = malloc(sizeof(char) * EXEC_MAX); + if (args[pos] == NULL) { + log_printf(LOG_ERR, "save_args: malloc for args failed."); + return -1; + } + + ret = memset_s(args[pos], EXEC_MAX, 0, EXEC_MAX); + if (ret != 0) { + log_printf(LOG_ERR, "save_args: memset_s args[%d] failed.", pos); + goto err; + } + + ret = strncpy_s(args[pos], EXEC_MAX, src, (size_t)len); + if (ret != 0) { + log_printf(LOG_ERR, "save_args: strncpy_s dst failed."); + goto err; + } + return 0; + +err: + free(args[pos]); + args[pos] = NULL; + return -1; +} + +static void get_arg_begin_pos(int *arg_begin_pos, int i) +{ + if (*arg_begin_pos == -1) { + *arg_begin_pos = i; + } +} + +static int parse_args_from_cmd(const char *cmd, char ***cmdline, int *args_count) +{ + int i = 0; + int arg_begin_pos = -1; + bool quota_flag = false; + int count = 0; + int ret; + + while (cmd[i] != '\0') { + if (cmd[i] == '\"') { + if (quota_flag == false) { + quota_flag = true; + goto next_cmd; + } + quota_flag = false; + if (arg_begin_pos == -1) { + goto next_cmd; + } + ret = save_args(cmdline, count, cmd + arg_begin_pos, i - arg_begin_pos); + if (ret < 0) { + goto err; + } + count++; + arg_begin_pos = -1; + } else if (cmd[i] == ' ') { + if (quota_flag == true) { + get_arg_begin_pos(&arg_begin_pos, i); + goto next_cmd; + } + if (arg_begin_pos == -1) { + goto next_cmd; + } + ret = save_args(cmdline, count, cmd + arg_begin_pos, i - arg_begin_pos); + if (ret < 0) { + goto err; + } + count++; + arg_begin_pos = -1; + } else { + get_arg_begin_pos(&arg_begin_pos, i); + } +next_cmd: + i++; + } + + if (quota_flag == true) { + log_printf(LOG_ERR, "get_exec_and_args, cmd[%s] config illegal.", cmd); + goto err; + } + + if (arg_begin_pos != -1) { + ret = save_args(cmdline, count, cmd + arg_begin_pos, i - arg_begin_pos); + if (ret < 0) { + goto err; + } + count++; + } + + *args_count = count; + return 0; + +err: + log_printf(LOG_ERR, "get_exec_and_args, parse cmd[%s] for exec and args failed.", cmd); + *args_count = count; + return -1; +} + +/* parse args, split by spaces and "" */ +int get_exec_and_args(const char *cmd, char *exec, char ***cmdline) +{ + char **args = NULL; + int i; + int count = 0; + int ret; + + *cmdline = malloc(sizeof(char *) * ARGS_MAX); + if (*cmdline == NULL) { + log_printf(LOG_ERR, "get_exec_and_args: malloc for cmdline failed."); + return -1; + } + + args = *cmdline; + for (i = 0; i < ARGS_MAX; i++) { + args[i] = NULL; + } + + ret = parse_args_from_cmd(cmd, cmdline, &count); + if (ret < 0) { + goto err; + } + + if (count == 0) { + log_printf(LOG_INFO, "get_exec_and_args, exec and args is empty, cmd[%s]", cmd); + goto err; + } + /* the last of args[] should be NULL when use execvp */ + args[count] = NULL; + + if (count > 0) { + ret = strncpy_s(exec, EXEC_MAX, args[0], strlen(args[0])); + if (ret != 0) { + log_printf(LOG_ERR, "get_exec_and_args, strncpy_s exec failed."); + goto err; + } + } + return count; + +err: + for (i = 0; i < count; i++) { + if (args[i] != NULL) { + free(args[i]); + } + } + free(args); + *cmdline = NULL; + return -1; +} + +/* + * free memory for args + */ +void free_args(char **args, int args_num) +{ + int i; + + if (args == NULL) { + return; + } + for (i = 0; i < args_num; i++) { + if (args[i] != NULL) { + free(args[i]); + } + } + free(args); + args = NULL; +} + +/* + * exec psz_cmd, when bash_cmd is true, use "/bin/bash sh -c" to exec psz_cmd + * otherwise split psz_cmd to exec and args and use execvp to exec command. + */ +static pid_t exec_cmd(uid_t uid, const char *psz_cmd, bool bash_cmd) +{ + char exec[EXEC_MAX] = {0}; + char **args = NULL; + int args_num = 0; + pid_t pid; + int fd = -1; + + if (!bash_cmd) { + args_num = get_exec_and_args(psz_cmd, exec, &args); + if (args_num < 0) { + return -1; + } + } + + pid = fork(); + if (pid < 0) { + log_printf(LOG_ERR, "exec_cmd: fork error [%d]", errno); + goto err; + } else if (pid == 0) { + (void)setpgrp(); + (void)prctl(PR_SET_PDEATHSIG, SIGTERM); + + fd = open("/dev/null", O_RDWR, 0); + if (fd >= 0) { + (void)dup2(fd, STDIN_FILENO); + (void)dup2(fd, STDERR_FILENO); + if (fd != STDERR_FILENO) { + (void)close(fd); + } + } + + if (uid != DEFAULT_USER_ID) { + if (setuid(uid) != 0) { + exit(ERROR_SETUID); + } + } + if (bash_cmd) { + (void)execl("/bin/sh", "sh", "-c", psz_cmd, NULL); + } else { + (void)execvp(exec, args); + } + exit(errno); + } + +err: + if (!bash_cmd) { + free_args(args, args_num); + } + return pid; +} + +static void handle_monitor_cmd_timeout(uid_t uid, const char *stop_cmd, bool bash_cmd, int *status) +{ + pid_t pid; + pid_t child_pid; + int ret; + + if (stop_cmd == NULL) { + return; + } + + pid = exec_cmd(uid, stop_cmd, bash_cmd); + if (pid > 0) { + child_pid = waitpid(pid, status, 0); + if (child_pid == pid) { + ret = get_child_exit_code(*status); + if (ret != 0) { + log_printf(LOG_WARNING, "handle monitor cmd timeout: stop_cmd[%s] execl error[%d]", stop_cmd, ret); + } + } + } +} + +/* + * process monitor: exec monitor cmd, when timeout and stop cmd is not NULL, exec stop cmd + * return 0 means success, otherwise means exception + */ +int monitor_cmd(uid_t uid, const char *psz_cmd, long timeout, const char *psz_stop_cmd, bool bash_cmd) +{ + int result = 0; + pid_t pid; + int status = 0; + int msec = 0; + struct timespec ts = {0}; + + pid = exec_cmd(uid, psz_cmd, bash_cmd); + if (pid < 0) { + return pid; + } + + for (;;) { + if (timeout > 0) { + if (waitpid(pid, &status, WNOHANG) > 0) { + break; + } + + ts.tv_nsec = PROCESS_SLEEP_INTERVAL; + ts.tv_sec = 0; + (void)nanosleep(&ts, NULL); + msec++; + /* msec++ every 100ms, so divid 10 to compare with timeout */ + if (msec / 10 >= timeout) { + log_printf(LOG_INFO, "execute \"%s\" timeout", psz_cmd); + handle_monitor_cmd_timeout(uid, psz_stop_cmd, bash_cmd, &status); + result = ERROR_TIMEOUT; + process_exit(pid, &status); + break; + } + } else { + (void)waitpid(pid, &status, 0); + break; + } + } + + if (WIFEXITED(status) && result != ERROR_TIMEOUT) { + result = WEXITSTATUS(status); + if (result != 0) { + log_printf(LOG_WARNING, "monitor cmd: psz_cmd[%s] execl error[%d]", psz_cmd, result); + } + } + + return result; +} + +/* + * get value from config, the format is like this: + * MONITOR_SWITCH="on" + * value must be in "" + */ +void get_value(const char *config, unsigned int item_size, char *value, unsigned int value_len) +{ + char *ptr = NULL; + unsigned int size; + int ret; + + /* item="value", so here skip 2 to get value */ + config += item_size + 2; + ptr = strchr(config, '\"'); + if (ptr != NULL) { + size = (unsigned int)(ptr - config); + size = size < value_len ? size : value_len - 1; + ret = strncpy_s(value, value_len, config, size); + if (ret) { + log_printf(LOG_ERR, "get_value: strncpy_s value failed, ret: %d", ret); + return; + } + } +} + +/* + * parse config specified by conf + */ +bool parse_config(const char *conf, bool (*parse_line)(const char *line)) +{ + char config[MAX_CONFIG]; + bool ret = true; + FILE *fp = NULL; + + fp = fopen(conf, "r"); + if (fp == NULL) { + if (get_log_interface_flag() == NORMAL_WRITE && get_flag_log_ok() == false) { + (void)printf("[sysmonitor] open '%s' failed, errno [%d]\n", conf, errno); + } else { + log_printf(LOG_ERR, "open %s error [%d]", conf, errno); + } + return false; + } + + for (;;) { + if (!fgets(config, MAX_CONFIG - 1, fp)) { + break; + } + + if (parse_line != NULL) { + if (parse_line(config) == false) { + ret = false; + } + } + } + + (void)fclose(fp); + return ret; +} + +/* + * open config file and check file mode + */ +FILE *open_cfgfile(const char *d_name, int *config_fd) +{ + struct stat sb; + FILE *file = NULL; + int ret; + + ret = memset_s(&sb, sizeof(sb), 0, sizeof(sb)); + if (ret) { + log_printf(LOG_ERR, "open_cfgfile: memset_s sb failed, ret: %d", ret); + return NULL; + } + *config_fd = open(d_name, O_RDONLY | O_NONBLOCK | O_CLOEXEC, 0); + if (*config_fd < OK) { + log_printf(LOG_ERR, "open %s error [%d]", d_name, errno); + return NULL; + } + if (stat(d_name, &sb) || !S_ISREG(sb.st_mode)) { + goto err; + } + /* config file mode should be 700 */ + if (sb.st_mode & (S_IRWXG | S_IRWXO)) { + log_printf(LOG_ERR, "%s: bad file mode", d_name); + goto err; + } + file = fdopen(*config_fd, "r"); + if (file == NULL) { + log_printf(LOG_ERR, "fdopen %s error [%d]", d_name, errno); + goto err; + } + return file; + +err: + (void)close(*config_fd); + *config_fd = -1; + return NULL; +} + +/* + * check if the input is only number + */ +bool check_int(const char *input) +{ + const char *p = input; + + if (p == NULL) { + log_printf(LOG_ERR, "check_int failed, input is NULL."); + return false; + } + + do { + /* also return false if empty, ie, the first character is '\0' */ + if (*p < '0' || *p > '9') { + return false; + } + p++; + } while (*p); + + return true; +} + +/* + * check if the input is only decimal + */ +bool check_decimal(const char *input) +{ + const char *p = input; + + if (p == NULL) { + log_printf(LOG_ERR, "check_decimal failed, input is NULL."); + return false; + } + + do { + /* also return false if empty, ie, the first character is '\0' */ + if ((*p < '0' || *p > '9') && *p != '.') { + return false; + } + p++; + } while (*p); + + return true; +} + +/* + * exec the cmdstring, used to restart sysalarm + */ +int lovs_system(const char *cmdstring) +{ + pid_t pid; + int status = 0; + + if (cmdstring == NULL) { + return -1; + } + + pid = fork(); + if (pid < 0) { + status = -1; + } else if (pid == 0) { + (void)execl("/bin/sh", "sh", "-c", cmdstring, (char *)0); + exit(errno); + } else { + while (waitpid(pid, &status, 0) < 0) { + if (errno != EINTR) { + status = -1; + break; + } + } + } + + return status; +} + +const char *g_invalid_string[] = { ";", "|", "&", "$", ">", + "<", "(", ")", "./", "/.", + "?", "*", "`", "\\", "[", + "]", "'", "!" }; + +/* + * check config with illegal parameter + */ +int check_conf_file_valid(const char *config) +{ + unsigned int i; + + for (i = 0; i < array_size(g_invalid_string); i++) { + if (strstr(config, g_invalid_string[i])) { + log_printf(LOG_INFO, "ERROR: \"%s\" include nonsecure character!", config); + return -1; + } + } + + return 0; +} + +/* + * check realpath of file + */ +bool check_file(const char *file) +{ + char *real_path = NULL; + + if (file == NULL || strlen(file) == 0) { + return false; + } + + if (access(file, F_OK) != 0) { + log_printf(LOG_INFO, "access %s failed, errno: %d.", file, errno); + return false; + } + + real_path = realpath(file, NULL); + if (real_path == NULL) { + log_printf(LOG_INFO, "realpath %s failed, errno: %d.", file, errno); + return false; + } + + if (strcmp(real_path, file) != 0) { + log_printf(LOG_INFO, "%s should be absolute path.", file); + free(real_path); + return false; + } + + free(real_path); + return true; +} + +/* + * convert value to int + */ +bool parse_value_int(const char *item, const char *value, unsigned int *result) +{ + if (check_int(value) == false || strtol(value, NULL, STRTOL_NUMBER_BASE) < 0) { + log_printf(LOG_INFO, "%s config illegal, check %s.", item, value); + return false; + } + + *result = (unsigned int)strtol(value, NULL, STRTOL_NUMBER_BASE); + return true; +} + +bool parse_value_ulong(const char *item, const char *value, unsigned long *result) +{ + if (check_int(value) == false) { + log_printf(LOG_INFO, "%s config illegal, check %s.", item, value); + return false; + } + *result = strtoul(value, NULL, 0); + return true; +} + +/* + * save value to result + * result and size are promissed by caller + */ +bool parse_value_string(const char *item, const char *value, char *result, unsigned int size) +{ + int ret; + + if (strlen(value) >= size) { + log_printf(LOG_INFO, "parse %s failed, %s: too long (>%u)", item, value, size - 1); + return false; + } + + ret = strcpy_s(result, size, value); + if (ret) { + log_printf(LOG_ERR, "parse config failed, strcpy_s %s failed.", value); + return false; + } + return true; +} + +/* + * parse value to bool + * ON/on to true + * OFF/off to false + */ +bool parse_value_bool(const char *item, const char *value, bool *result) +{ + if (strcmp(value, "on") == 0 || strcmp(value, "ON") == 0) { + *result = true; + } else if (strcmp(value, "off") == 0 || strcmp(value, "OFF") == 0) { + *result = false; + } else { + log_printf(LOG_INFO, "%s config illegal, check %s.", item, value); + return false; + } + return true; +} + +/* + * parse value to float + * for cpu, memory, sysfd alarm_value and resume_value check + */ +bool parse_value_float(const char *item, const char *value, float *result) +{ + if (check_decimal(value) == false) { + return false; + } + *result = strtof(value, NULL); + return true; +} + +bool check_log_path(const char *log_path) +{ + char tmp[LOG_FILE_LEN] = {0}; + char *dir = NULL; + int ret; + + if (!access(log_path, F_OK)) { + return check_file(log_path); + } + + /* file not exist, so check file directory realpath */ + ret = strncpy_s(tmp, LOG_FILE_LEN, log_path, LOG_FILE_LEN - 1); + if (ret) { + (void)printf("check_log_path: strncpy_s tmp failed, ret: %d.", ret); + return false; + } + + dir = dirname(tmp); + return check_file(dir); +} + +/* + * write msg to kernel mod file + */ +int set_value_to_file(const char *msg, const char *path) +{ + ssize_t ret; + int fd = -1; + + fd = open(path, O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, KERNELMODE_FILE_PERMISSION); + if (fd < 0) { + log_printf(LOG_ERR, "set_value_to_file open %s failed, errno[%d].", path, errno); + return -1; + } + + ret = write(fd, msg, strlen(msg)); + if (ret == -1) { + log_printf(LOG_ERR, "set_value_to_file write failed, errno[%d].", errno); + (void)close(fd); + return -1; + } + + (void)close(fd); + return 0; +} + +/* + * return value: + * 1: do not find value + * 0: get value from string successfully + * -1: value length exceeds outsize or memcpy_s failed + */ +int get_string(const char *config, const char *value, char *outstr, unsigned int outsize, const char *item) +{ + char *begin = NULL; + char *end = NULL; + unsigned int size; + int ret; + + begin = strstr(config, value); + if (begin == NULL) { + return 1; + } + begin += strlen(value); + end = strstr(begin, "\""); + if (end == NULL) { + return 1; + } + + size = (unsigned int)(end - begin); + if (size >= outsize) { + log_printf(LOG_ERR, "parse %s failed, length exceeds %d", item, outsize - 1); + return -1; + } + + if (size == 0) { + return 1; + } + + ret = memset_s(outstr, outsize, 0, outsize); + if (ret != 0) { + log_printf(LOG_ERR, "get_string: memset_s outstr failed, ret: %d", ret); + return -1; + } + ret = memcpy_s(outstr, outsize, begin, size); + if (ret != 0) { + log_printf(LOG_ERR, "get_string: memcpy_s outstr failed, ret: %d", ret); + return -1; + } + + return 0; +} diff --git a/sysmonitor-1.3.2/src/common.h b/sysmonitor-1.3.2/src/common.h new file mode 100644 index 0000000000000000000000000000000000000000..e31f2aa8b1f632a9d999b402f00f2f995907df59 --- /dev/null +++ b/sysmonitor-1.3.2/src/common.h @@ -0,0 +1,308 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: define common functions and variables + * Author: xuchunmei + * Create: 2016-1-1 + */ +#ifndef COMMON_H +#define COMMON_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define DAEMON_SYSLOG 0 +#define NORMAL_WRITE 1 +#define OK 0 +#define ERR (-1) +#define LOG_FILE_LEN 128 +#define MAX_LOG_LEN (4096 + (2 * MAX_TEMPSTR)) +#define MAX_TEMPSTR 200 +#define MAX_CONFIG 4096 + +#define POPEN_TIMEOUT 30 +#define WORKER_TASK_TIMEOUT 30 +#define ERROR_FORK (-1) +#define ERROR_FDOPEN (-2) +#define ERROR_SELECT (-3) +#define ERROR_TIMEOUT (-4) +#define ERROR_PIPE (-5) +#define ERROR_FCNTL (-6) +#define ERROR_ARGS_WRONG (-7) +#define ERROR_CONF (-8) +#define ERROR_OPEN (-9) +#define ERROR_SETUID (-10) +#define ERROR_PARSE (-11) +#define ERROR_CREATE_THREAD (-12) +#define ERROR_NO_CONF (-13) + +#define DEFAULT_USER_ID 0xffffffff +#define QUEUE_SIZE 1000 +#define TASK_QUEUE_SIZE 100 +#define PARAS_LEN 256 + +#define ITEM_LEN 50 +#define VALUE_LEN 10 + +#define COMMON_ALARM_TYPE_EVENT 2 +#define COMMON_ALARM_TYPE_OCCUR 1 +#define COMMON_ALARM_TYPE_RESUME 0 + +#define CPU_ABNORMAL 1001 +#define MEM_ABNORMAL 1002 +#define DISK_ABNORMAL 1003 +#define FS_ABNORMAL 1004 +#define PS_ABNORMAL 1005 +#define FILE_ABNORMAL 1006 +#define NET_ABNORMAL 1007 +#define SIG_ABNORMAL 1008 +#define PSCNT_ABNORMAL 1009 +#define FDCNT_ABNORMAL 1010 +#define DISK_INODE_ABNORMAL 1011 +#define DISK_IO_DELAY_ABNORMAL 1012 +#define PROCESS_FD_NUM_ABNORMAL 1014 +#define PROCESS_FD_LEAK_ABNORMAL 1015 +#define ZOMBIE_ABNORMAL 1016 +#define FS_EXT4_ABNORMAL 1019 +#define PS_THREADS_ABNORMAL 1025 + +#define ALARM_LEVEL_CRITICAL 1 +#define ALARM_LEVEL_MAJOR 2 +#define ALARM_LEVEL_MINOR 3 +#define ALARM_LEVEL_WARNING 4 +#define ALARM_LEVEL_INDETERMINATE 5 + +#define MAX_STRERROR_SIZE 1024 +#ifndef O_CLOEXEC +#define O_CLOEXEC 0 +#endif + +#define DEFALUT_PROCESS_RESTART_TIMEOUT 90 +#define TASK_COMM_LEN 16 +#define EXEC_MAX 256 +#define ARGS_MAX 64 +#define TM_YEAR_BEGIN 1900 +#define LOG_FILE_PERMISSION 0640 +#define KERNELMODE_FILE_PERMISSION 0600 +#define PROCESS_EXIT_TIMEOUT 10 +#define PROCESS_SLEEP_INTERVAL (100 * 1000 * 1000) +#define THREAD_PID_OFFSET 16 +#define POLL_TIMEOUT_DEFAULT (30 * 1000) +#define FAIL_NUM 3 + +#define RET_SUCCESS 0 +#define RET_BREAK (-1) +#define RET_CONTINUE 1 + +#define array_size(arr) (sizeof(arr) / sizeof((arr)[0])) +#define SYSMONITOR_PERIOD 2 + +#define STRTOL_NUMBER_BASE 10 +#define STRTOL_HEX_NUMBER_BASE 16 +#define STRTOULL_NUMBER_BASE 10 + +typedef enum monitor_type { + PS_ITEM, + FS_ITEM, + FILE_ITEM, + DISK_ITEM, + INODE_ITEM, + CUSTOM_DAEMON_ITEM, + CUSTOM_PERIODIC_ITEM, + IO_DELAY_ITEM, + SYSTEM_ITEM, + SYS_EVENT_ITEM, + ZOMBIE_ITEM, + MONITOR_ITEMS_CNT +} monitor_item_type; + +struct list_head { + struct list_head *next, *prev; +}; + +typedef struct monitor_thread_s { + pthread_t tid; + bool monitor; + bool alarm; + bool reload; + int period; + void (*init)(void); +} monitor_thread; + +typedef enum task_state_type { + RUNNING_STATE = 1, + EXITED_STATE, + EXITING_STATE +} task_state; + +typedef struct worker_task_s { + pid_t cpid; + int time_count; + task_state state; +} worker_task; + +enum heart_msg_type { + PID_TYPE = 0, + STOP_TYPE, + START_TYPE +}; + +/* + * type == 0 pid + * type == 1 service stop + * type == 2 service start + */ +typedef struct heart_msg_s { + int type; + pid_t pid; +} heart_message; + +struct alarm_level_info { + unsigned short alarm_id; + unsigned char alarm_level; +}; + +extern int get_log_interface_flag(void); +extern bool get_flag_log_ok(void); +extern void log_printf(int priority, const char *format, ...); +extern bool get_thread_item_reload_flag(monitor_item_type type); +extern void set_thread_item_reload_flag(monitor_item_type type, bool flag); +extern void set_thread_item_tid(monitor_item_type type, pthread_t tid); +extern int get_thread_item_period(monitor_item_type type); +extern void set_thread_item_period(monitor_item_type type, int period); +extern bool get_thread_item_monitor_flag(monitor_item_type type); +extern void set_thread_item_monitor_flag(monitor_item_type type, bool flag); +extern bool get_thread_item_alarm_flag(monitor_item_type type); + +/* exec command */ +int monitor_popen(const char *psz_cmd, char *psz_buffer, unsigned int size, long timeout, const char *psz_stop_cmd); +int lovs_system(const char *cmdstring); +int monitor_cmd(uid_t uid, const char *psz_cmd, long timeout, const char *psz_stop_cmd, bool bash_cmd); + +/* parse config */ +void get_value(const char *config, unsigned int item_size, char *value, unsigned int value_len); +bool parse_config(const char *conf, bool (*parse_line)(const char *line)); +FILE *open_cfgfile(const char *d_name, int *config_fd); +bool check_int(const char *input); +bool check_decimal(const char *input); +int check_conf_file_valid(const char *config); +bool check_file(const char *file); +bool parse_value_int(const char *item, const char *value, unsigned int *result); +bool parse_value_string(const char *item, const char *value, char *result, unsigned int size); +bool parse_value_bool(const char *item, const char *value, bool *result); +bool parse_value_float(const char *item, const char *value, float *result); +bool parse_value_ulong(const char *item, const char *value, unsigned long *result); +bool check_log_path(const char *log_path); + +/* parse command */ +int get_exec_and_args(const char *cmd, char *exec, char ***cmdline); +void free_args(char **args, int args_num); + +static inline void init_list_head(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +#define m_offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) + +/* + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + */ +#define container_of(ptr, type, member) ({ \ + const typeof(((type *)0)->member) * __mptr = (ptr); \ + (type *)((char *)__mptr - m_offsetof(type, member)); \ +}) + +/* refer to linux source code: include/linux/list.h */ + +/* + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) container_of(ptr, type, member) + +/* + * list_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for ((pos) = list_entry((head)->next, typeof(*(pos)), member); \ + &(pos)->member != (head); \ + (pos) = list_entry((pos)->member.next, typeof(*(pos)), member)) + +/* + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for ((pos) = list_entry((head)->next, typeof(*(pos)), member), \ + (n) = list_entry((pos)->member.next, typeof(*(pos)), member); \ + &(pos)->member != (head); \ + (pos) = (n), (n) = list_entry((n)->member.next, typeof(*(n)), member)) + +static inline void list_add(struct list_head *new, struct list_head *head) +{ + head->next->prev = new; + new->next = head->next; + new->prev = head; + head->next = new; +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void _list_del(struct list_head *prev, struct list_head *next) +{ + next->prev = prev; + prev->next = next; +} + +/* delete a list entry */ +static inline void list_del(struct list_head *entry) +{ + _list_del(entry->prev, entry->next); +} + +static inline int list_empty(struct list_head *head) +{ + return head->next == head; +} + +extern int set_value_to_file(const char *msg, const char *path); +extern int get_string(const char *config, const char *value, char *outstr, unsigned int outsize, const char *item); + +#endif diff --git a/sysmonitor-1.3.2/src/custom.c b/sysmonitor-1.3.2/src/custom.c new file mode 100644 index 0000000000000000000000000000000000000000..6e40fdc9ebe17e4f80945aeaf9410de9763566f6 --- /dev/null +++ b/sysmonitor-1.3.2/src/custom.c @@ -0,0 +1,1457 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: custom process monitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#include "custom.h" + +#include +#include +#include +#include + +#include +#include "monitor_thread.h" + +static struct list_head g_custom_daemon_head; /* daemon monitor items */ +static struct list_head g_custom_periodic_head; /* periodic monitor items */ +static worker_task g_task_queue[TASK_QUEUE_SIZE]; +static pthread_mutex_t g_task_mtx = PTHREAD_MUTEX_INITIALIZER; + +static int worker_task_init(const worker_task *task, int *index); +static void worker_task_add(const worker_task *task, int index); + +static bool parse_monitor_switch(const char *item, const char *value, str_custom *t) +{ + bool result = parse_value_bool(item, value, &t->monitor_switch); + if (!result) { + log_printf(LOG_ERR, "custom monitor: monitor switch configuration error!"); + return false; + } + return true; +} + +static bool parse_type(const char *item, const char *value, str_custom *t) +{ + if (!strcmp(value, "daemon")) { + t->type = CUSTOM_DAEMON; + } else if (!strcmp(value, "periodic")) { + t->type = CUSTOM_PERIODIC; + } else { + log_printf(LOG_INFO, "custom monitor: type configuration error!"); + return false; + } + + return true; +} + +static bool check_cmd_valid(const char *cmd) +{ + int args_num; + char exec[EXEC_MAX] = {0}; + char **args = NULL; + + args_num = get_exec_and_args(cmd, exec, &args); + if (args_num < 0) { + return false; + } + free_args(args, args_num); + return true; +} + +static bool parse_exec_start(const char *item, const char *value, str_custom *t) +{ + bool ret = false; + + if (strlen(value) == 0) { + log_printf(LOG_ERR, "custom monitor: execstart configuration error!"); + return false; + } + ret = parse_value_string(item, value, t->start_cmd, MAX_CUSTOM_CMD_LEN); + if (!ret) { + return false; + } + + return check_cmd_valid(t->start_cmd); +} + +static bool parse_exec_other(const char *item, const char *value, str_custom *t) +{ + return true; +} + +static bool parse_period(const char *item, const char *value, str_custom *t) +{ + bool result = parse_value_int(item, value, &t->period); + if (!result) { + log_printf(LOG_ERR, "custom monitor: period configuration error!"); + return false; + } + return true; +} + +static bool parse_environmentfile(const char *item, const char *value, str_custom *t) +{ + if (strlen(value) == 0) { + log_printf(LOG_ERR, "custom monitor: enviromentfile configuration error!"); + return false; + } + if (strlen(value) >= MAX_CFG_NAME_LEN) { + log_printf(LOG_ERR, "custom monitor: enviromentfile path should be less than %d, error!", MAX_CFG_NAME_LEN); + return false; + } + return parse_value_string(item, value, t->enviroment_file, MAX_CFG_NAME_LEN); +} + +static custom_item_func g_custom_item_func_table[] = { + { "MONITOR_SWITCH", parse_monitor_switch }, + { "TYPE", parse_type }, + { "EXECSTART", parse_exec_start }, + { "EXECSTARTPRE", parse_exec_other }, + { "EXECSTARTPOST", parse_exec_other }, + { "EXECSTOP", parse_exec_other }, + { "EXECSTOPPRE", parse_exec_other }, + { "EXECSTOPPOST", parse_exec_other }, + { "PERIOD", parse_period }, + { "ENVIROMENTFILE", parse_environmentfile }, +}; + +static const char *g_custom_cfg_type[] = { + "daemon", + "periodic", +}; + +static bool get_value_custom(const char *config, unsigned int key_size, char *value, unsigned int value_len) +{ + char *ptr = NULL; + unsigned int size; + int ret; + + /* key="value", so here skip 2 to get value */ + config += key_size + 2; + ptr = strchr(config, '\"'); + if (ptr != NULL) { + size = (unsigned int)(ptr - config); + if (size >= MAX_CUSTOM_CMD_LEN) { + log_printf(LOG_ERR, "custom monitor: size should be less than %d, error!", MAX_CUSTOM_CMD_LEN); + return false; + } + ret = strncpy_s(value, value_len, config, size); + if (ret) { + log_printf(LOG_ERR, "custom parse_line strncpy_s value error, ret: %d", ret); + return false; + } + } + return true; +} + +/* + * parse /etc/sysmonitor.d/ config files + */ +static bool parse_line(str_custom *t, const char *config) +{ + unsigned int size; + char *ptr = NULL; + char item[ITEM_LEN] = {0}; + char value[MAX_CONFIG] = {0}; + int ret; + unsigned int i; + + while (*config == ' ' || *config == '\t') { + config++; + } + + if ((*config == '#') || (*config == '\n')) { + return true; + } + + if (check_conf_file_valid(config) == -1) { + return false; + } + + ptr = strstr(config, "=\""); + if (ptr == NULL) { + return false; + } + + size = (unsigned int)(ptr - config); + if (size >= sizeof(item)) { + log_printf(LOG_ERR, "custom parse_line: item length(%u) too long(>%lu).", size, sizeof(item)); + return false; + } + ret = strncpy_s(item, sizeof(item), config, size); + if (ret != 0) { + log_printf(LOG_ERR, "custom parse_line: strncpy_s item failed, ret: %d", ret); + return false; + } + + if (get_value_custom(config, size, value, sizeof(value)) == false) { + return false; + } + + for (i = 0; i < array_size(g_custom_item_func_table); i++) { + if (!strcmp(g_custom_item_func_table[i].item, item)) { + return g_custom_item_func_table[i].func(item, value, t); + } + } + + log_printf(LOG_ERR, "%s not supported", item); + return false; +} + +/* + * get environment variables from config files + * the number of environment variables cannot exceed MAX_ENV_CONFIG + */ +static bool get_envp(const char *file_dir, char **envp, unsigned int *cout) +{ + char config[MAX_CONFIG] = {0}; + FILE *fp = NULL; + size_t len; + unsigned int i; + unsigned int env = 0; + int ret; + + if (check_file(file_dir) == false) { + return false; + } + + fp = fopen(file_dir, "r"); + if (fp == NULL) { + log_printf(LOG_INFO, "open %s error [%d]", file_dir, errno); + return false; + } + + while (fgets(config, MAX_CONFIG, fp)) { + i = 0; + while (config[i] == ' ' || config[i] == '\t') { + i++; + continue; + } + + if ((config[i] == '#') || (config[i] == '\n')) { + continue; + } + + len = strlen(&config[i]); + if (len == 0) { + continue; + } + if (config[i + len - 1] == '\n') { + config[i + len - 1] = '\0'; + len -= 1; + } + + envp[env] = malloc(len + 1); + if (envp[env] == NULL) { + log_printf(LOG_ERR, "malloc envp error."); + *cout = env; + (void)fclose(fp); + return false; + } + ret = memcpy_s(envp[env], len + 1, &config[i], len + 1); + if (ret != 0) { + log_printf(LOG_ERR, "get_envp: memcpy_s envp failed, ret: %d", ret); + free(envp[env]); + envp[env] = NULL; + *cout = env; + (void)fclose(fp); + return false; + } + + env++; + + if (env >= MAX_ENV_CONFIG) { + break; + } + } + + *cout = env; + (void)fclose(fp); + return true; +} + +static void free_custom_env(str_custom **t) +{ + unsigned int i; + + if ((*t)->envp != NULL) { + free((*t)->envp); + (*t)->envp = NULL; + } + + for (i = 0; i < (*t)->envp_config_count; i++) { + if ((*t)->envp_config[i] != NULL) { + free((*t)->envp_config[i]); + (*t)->envp_config[i] = NULL; + } + } + + (*t)->envp_config_count = 0; +} + +/* + * free custom task + */ +static void free_custom_t(str_custom **t) +{ + if (*t == NULL) { + return; + } + + free_custom_env(t); + + free(*t); + *t = NULL; +} + +/* + * close all fds of current process + */ +static int close_all_fd(void) +{ + struct rlimit lim; + unsigned int i; + + if (getrlimit(RLIMIT_NOFILE, &lim) < 0) { + return -1; + } + if (lim.rlim_cur > MAX_CLOSE_FD_NUM) { + lim.rlim_cur = MAX_CLOSE_FD_NUM; + } + for (i = 0; i < lim.rlim_cur; i++) { + (void)close((int)i); + } + + return 0; +} + +static void dup2_fd_in_child_process(void) +{ + int fd = -1; + + fd = open("/dev/null", O_RDWR, 0); + if (fd >= 0) { + (void)dup2(fd, STDIN_FILENO); + (void)dup2(fd, STDOUT_FILENO); + (void)dup2(fd, STDERR_FILENO); + if (fd > STDERR_FILENO) { + (void)close(fd); + } + } +} + +/* + * exec daemon monitor + * do not wait child process exit + */ +static int execle_daemon(const char *psz_cmd, char **envp, pid_t *child_pid) +{ + pid_t pid; + char exec[EXEC_MAX] = {0}; + char **args = NULL; + int args_num; + + args_num = get_exec_and_args(psz_cmd, exec, &args); + if (args_num < 0) { + return ERROR_PARSE; + } + + /* child process inherited the lock of the parent process, maybe deadlock, + so don't operate lock between fork and execvp/execve */ + pid = fork(); + if (pid < 0) { + log_printf(LOG_ERR, "execle_daemon: fork error [%d]", errno); + free_args(args, args_num); + return ERROR_FORK; + } else if (pid == 0) { + (void)setpgrp(); + (void)prctl(PR_SET_PDEATHSIG, SIGTERM); + (void)close_all_fd(); + + dup2_fd_in_child_process(); + + if (envp == NULL) { + (void)execvp(exec, args); + } else { + (void)execve(exec, args, envp); + } + + exit(errno); + } + + if (child_pid != NULL) { + *child_pid = pid; + } + + free_args(args, args_num); + return 0; +} + +/* + * exec periodic monitor + * wait child process to exit, exec cmd cannot timeout + */ +static int execle_periodic(str_custom *t) +{ + pid_t pid; + int fd = -1; + char exec[EXEC_MAX] = {0}; + char **args = NULL; + int args_num; + + args_num = get_exec_and_args(t->start_cmd, exec, &args); + if (args_num < 0) { + return ERROR_PARSE; + } + + pid = fork(); + if (pid < 0) { + log_printf(LOG_ERR, "execle_periodic: fork error [%d]", errno); + free_args(args, args_num); + return ERROR_FORK; + } else if (pid == 0) { + (void)setpgrp(); + (void)prctl(PR_SET_PDEATHSIG, SIGTERM); + (void)close_all_fd(); + + fd = open("/dev/null", O_RDWR, 0); + if (fd >= 0) { + (void)dup2(fd, STDIN_FILENO); + (void)dup2(fd, STDOUT_FILENO); + (void)dup2(fd, STDERR_FILENO); + if (fd > STDERR_FILENO) { + (void)close(fd); + } + } + + if (t->envp == NULL) { + (void)execvp(exec, args); + } else { + (void)execve(exec, args, t->envp); + } + + exit(errno); + } + + t->pid = pid; + free_args(args, args_num); + return 0; +} + +/* + * parse environment file specified by config and add system env + */ +static bool parse_env_file(str_custom *t) +{ + bool env_ret = false; + unsigned int i; + int ret; + unsigned int len; + + env_ret = get_envp(t->enviroment_file, t->envp_config, &t->envp_config_count); + if (env_ret != true) { + return false; + } + + /* get global environment variables number */ + for (i = 0; environ[i] != NULL; i++) { + } + t->envp = malloc((i + t->envp_config_count + 1) * sizeof(char *)); + if (t->envp == NULL) { + log_printf(LOG_ERR, "malloc error."); + return false; + } + + ret = memcpy_s(t->envp, i * sizeof(char *), environ, i * sizeof(char *)); + if (ret != 0) { + log_printf(LOG_ERR, "parse_env_file: memcpy_s t->envp failed, ret: %d", ret); + return false; + } + + if (t->envp_config_count != 0) { + len = t->envp_config_count * sizeof(char *); + ret = memcpy_s(t->envp + i, len, t->envp_config, len); + if (ret != 0) { + log_printf(LOG_ERR, "parse_env_file: memcpy_s t->envp + i failed, ret: %d", ret); + return false; + } + } + /* the last args of execl functions should be NULL */ + t->envp[i + t->envp_config_count] = NULL; + return true; +} + +static void check_to_add_list(str_custom *t, custom_type type) +{ + str_custom *ori_t = NULL; + bool null_flag = true; + bool is_new_cfg = true; + struct list_head *head = (type == CUSTOM_DAEMON) ? &g_custom_daemon_head : &g_custom_periodic_head; + + list_for_each_entry(ori_t, head, list) { + null_flag = false; + if (ori_t != NULL && !strcmp(ori_t->conf_name, t->conf_name) && + ori_t->state != EXITING_STATE) { + free_custom_t(&t); + is_new_cfg = false; + break; + } + } + + if (null_flag == true || is_new_cfg == true) { + log_printf(LOG_INFO, "type[%s] conf_name[%s] is added to monitor list", + g_custom_cfg_type[t->type - 1], t->conf_name); + list_add(&t->list, head); + } +} + +static bool check_config(str_custom *t, const char *file_name, bool flag, custom_type type) +{ + int ret; + + if (flag == false || !strlen(t->start_cmd) || t->type == 0 || + (t->type == CUSTOM_PERIODIC && t->period == 0)) { + free_custom_t(&t); + return false; + } + + if (t->type != type || t->monitor_switch == false) { + free_custom_t(&t); + return true; + } + + if (strlen(t->enviroment_file) != 0) { + if (parse_env_file(t) == false) { + free_custom_t(&t); + return false; + } + } + + ret = memcpy_s(t->conf_name, MAX_CFG_NAME_LEN, file_name, strlen(file_name)); + if (ret != 0) { + log_printf(LOG_ERR, "check_config: memcpy_s t->conf_name failed, ret: %d", ret); + free_custom_t(&t); + return false; + } + + check_to_add_list(t, type); + + return true; +} + +/* + * parse custom config + */ +static bool custom_parse_config(FILE *file, const char *file_name, custom_type type) +{ + str_custom *t = NULL; + char config[MAX_CONFIG] = {0}; + bool flag = true; + int ret; + size_t len; + + t = malloc(sizeof(str_custom)); + if (t == NULL) { + return false; + } + ret = memset_s(t, sizeof(str_custom), 0, sizeof(str_custom)); + if (ret != 0) { + log_printf(LOG_ERR, "custom_parse_config: memset_s t failed, ret: %d", ret); + free(t); + return false; + } + t->state_index = -1; + + for (;;) { + if (!fgets(config, MAX_CONFIG - 1, file)) { + break; + } + + len = strlen(config); + if (len > 0 && config[len - 1] == '\n') { + config[len - 1] = '\0'; + } + + if (parse_line(t, config) == false) { + flag = false; + break; + } + } + + return check_config(t, file_name, flag, type); +} + +/* + * load task from config + */ +static bool load_task(custom_type type, bool update) +{ + struct dirent *direntp = NULL; + int config_fd = -1; + FILE *fp = NULL; + DIR *dir = NULL; + char cfg_full_name[MAX_CFG_NAME_LEN + 18] = {0}; /* 18 is length if "/etc/sysmonitor.d/" */ + int ret; + + if (!update) { + if (type == CUSTOM_DAEMON) { + init_list_head(&g_custom_daemon_head); + } else { + init_list_head(&g_custom_periodic_head); + } + } + + dir = opendir(CUSTOM_CONFIG_DIR); + if (dir == NULL) { + log_printf(LOG_ERR, "load_task: %s not exist", CUSTOM_CONFIG_DIR); + return false; + } + + direntp = readdir(dir); + while (direntp != NULL) { + if (strlen(direntp->d_name) >= MAX_CFG_NAME_LEN) { + log_printf(LOG_ERR, "load_task: config file name should be less than 128, file: %s", + direntp->d_name); + direntp = readdir(dir); + continue; + } + ret = memset_s(cfg_full_name, sizeof(cfg_full_name), 0, sizeof(cfg_full_name)); + if (ret != 0) { + log_printf(LOG_ERR, "load_task: memset_s cfg_full_name failed, ret: %d", ret); + (void)closedir(dir); + return false; + } + ret = snprintf_s(cfg_full_name, sizeof(cfg_full_name), sizeof(cfg_full_name) - 1, + "%s%s", CUSTOM_CONFIG_DIR, direntp->d_name); + if (ret == -1) { + log_printf(LOG_ERR, "load_task: snprintf_s cfg_full_name failed, ret: %d", ret); + (void)closedir(dir); + return false; + } + + fp = open_cfgfile(cfg_full_name, &config_fd); + if (fp == NULL) { + direntp = readdir(dir); + continue; + } + if (custom_parse_config(fp, direntp->d_name, type) == false) { + log_printf(LOG_ERR, "parse %s error", direntp->d_name); + } + (void)fclose(fp); + direntp = readdir(dir); + } + + (void)closedir(dir); + return true; +} + +/* + * daemon process monitor + */ +static void monitor_daemon(struct list_head *head) +{ + str_custom *t = NULL; + struct timespec ts; + + list_for_each_entry(t, head, list) { + if (t != NULL && t->pid == 0) { + (void)execle_daemon(t->start_cmd, t->envp, &t->pid); + ts.tv_nsec = PROCESS_SLEEP_INTERVAL; + ts.tv_sec = 0; + (void)nanosleep(&ts, NULL); + } + } +} + +/* + * periodic process monitor + */ +static void monitor_periodic(void) +{ + str_custom *t = NULL; + worker_task wtask = {0}; + struct timespec ts; + int ret; + + list_for_each_entry(t, &g_custom_periodic_head, list) { + if (t->state == EXITING_STATE) { + continue; + } + ret = execle_periodic(t); + if (ret == 0) { + wtask.cpid = t->pid; + wtask.time_count = 0; + wtask.state = RUNNING_STATE; + (void)worker_task_init(&wtask, &t->state_index); + t->time_count = 0; + } else { + log_printf(LOG_INFO, "execle_periodic ret [%d] error", ret); + } + + ts.tv_nsec = PROCESS_SLEEP_INTERVAL; + ts.tv_sec = 0; + (void)nanosleep(&ts, NULL); + } +} + +static bool check_cfg_exist_or_updated(const char *conf_name, const str_custom *del_task, str_custom *task) +{ + int ret; + + ret = memcpy_s(task->conf_name, MAX_CFG_NAME_LEN, conf_name, strlen(conf_name)); + if (ret != 0) { + log_printf(LOG_ERR, "find_cfg_exist_or_updated: memcpy_s task.conf_name failed, ret: %d", ret); + return false; + } + if (!strcmp(task->conf_name, del_task->conf_name)) { + if (task->monitor_switch == del_task->monitor_switch && task->monitor_switch == true && + !strcmp(task->start_cmd, del_task->start_cmd) && + !strcmp(task->enviroment_file, del_task->enviroment_file) && + task->type == del_task->type && task->type == CUSTOM_DAEMON && + task->state != EXITING_STATE) { + return true; + } + } + + return false; +} + +/* + * find same config in list + */ +static bool find_cfg_exist_or_updated(FILE *file, const char *conf_name, const str_custom *del_task) +{ + char config[MAX_CONFIG] = {0}; + str_custom task; + int ret; + + ret = memset_s(&task, sizeof(task), 0, sizeof(task)); + if (ret) { + log_printf(LOG_ERR, "find_cfg_exist_or_updated: memset_s task failed, ret: %d", ret); + return false; + } + if (file == NULL || conf_name == NULL || del_task == NULL) { + return false; + } + + for (;;) { + if (fgets(config, MAX_CONFIG - 1, file)) { + (void)parse_line(&task, config); + continue; + } + break; + } + + return check_cfg_exist_or_updated(conf_name, del_task, &task); +} + +static bool process_daemon_task_reload(str_custom *t, DIR *dir) +{ + struct dirent *direntp = NULL; + char cfg_full_name[MAX_CFG_NAME_LEN + 18] = {0}; /* 18 is length of "/etc/sysmonitor.d/" */ + int config_fd = -1; + bool find = false; + FILE *fp = NULL; + int ret; + + direntp = readdir(dir); + while (direntp != NULL) { + if (strlen(direntp->d_name) >= MAX_CFG_NAME_LEN) { + log_printf(LOG_ERR, "reload_task: config file name should be less than 128, file: %s", direntp->d_name); + direntp = readdir(dir); + continue; + } + ret = memset_s(cfg_full_name, sizeof(cfg_full_name), 0, sizeof(cfg_full_name)); + if (ret != 0) { + log_printf(LOG_ERR, "reload_task: memset_s cfg_full_name failed, ret: %d", ret); + return false; + } + ret = snprintf_s(cfg_full_name, sizeof(cfg_full_name), sizeof(cfg_full_name) - 1, + "%s%s", CUSTOM_CONFIG_DIR, direntp->d_name); + if (ret == -1) { + log_printf(LOG_ERR, "reload_task: snprintf_s cfg_full_name failed, ret: %d", ret); + return false; + } + + fp = open_cfgfile(cfg_full_name, &config_fd); + if (fp == NULL) { + direntp = readdir(dir); + continue; + } + find = find_cfg_exist_or_updated(fp, direntp->d_name, t); + (void)fclose(fp); + if (find == true) { + break; + } + direntp = readdir(dir); + } + + if (find == false) { + if (t->pid != 0) { + (void)kill(-(t->pid), SIGTERM); + t->state = EXITING_STATE; + } else { + list_del(&t->list); + free_custom_t(&t); + } + } + rewinddir(dir); + return true; +} + +/* + * reload config + * if reload config failed, then continue with old config, daemon process will not be killed + * if daemon process config is same as old config, daemon process will not restart + * if periodic process, config is same or not as old config, periodic process will restart + * if reload config has new config items, then old process will be killed and custom task should be free + */ +static bool reload_task(custom_type type) +{ + str_custom *t = NULL; + str_custom *ptr = NULL; + struct list_head *head = NULL; + DIR *dir = NULL; + + head = (type == CUSTOM_DAEMON) ? &g_custom_daemon_head : &g_custom_periodic_head; + dir = opendir(CUSTOM_CONFIG_DIR); + if (dir == NULL) { + log_printf(LOG_ERR, "reload_task: %s not exist", CUSTOM_CONFIG_DIR); + return false; + } + + list_for_each_entry_safe(t, ptr, head, list) { + if (type == CUSTOM_DAEMON) { + if (process_daemon_task_reload(t, dir) == false) { + (void)closedir(dir); + return false; + } + } else { + if (t->state != EXITING_STATE) { + list_del(&t->list); + free_custom_t(&t); + } + } + } + + (void)closedir(dir); + return load_task(type, true); +} + +static void process_worker_task_running(str_custom *t, worker_task *wtask) +{ + int ret; + int status; + + ret = waitpid(wtask->cpid, &status, WNOHANG); + if (ret == 0) { + (void)kill(-wtask->cpid, SIGTERM); + wtask->state = EXITING_STATE; + t->state = EXITING_STATE; + } else { + log_printf(LOG_INFO, "process_worker_task_running: waitpid error [%d] ", errno); + } +} + +static void process_worker_task_exiting(str_custom *t, worker_task *wtask) +{ + int ret; + int status; + + ret = waitpid(wtask->cpid, &status, WNOHANG); + if (ret == 0) { + log_printf(LOG_INFO, + "process_worker_task_exiting: task[%d] process SIGTERM timeout, use SIGKILL.", wtask->cpid); + (void)kill(-wtask->cpid, SIGKILL); + (void)waitpid(wtask->cpid, &status, 0); + } + wtask->state = EXITED_STATE; + t->state = EXITED_STATE; +} + +/* + * process periodic tasks + * when reload config, process periodic tasks in the list + * this has completion with thread monitor-worker + * when task state is running, send SIGTERM to task + * when task state is exiting, send SIGKILL to task and wait pid, free custom task + * when task state is exited, clear task_queue info + */ +static void process_worker_task(void) +{ + str_custom *t = NULL; + str_custom *n = NULL; + int index; + int ret; + + (void)sleep(1); /* wait monitor-worker thread to recycke child process */ + + (void)pthread_mutex_lock(&g_task_mtx); + list_for_each_entry_safe(t, n, &g_custom_periodic_head, list) { + index = t->state_index; + if (index >= TASK_QUEUE_SIZE || index < 0) { + log_printf(LOG_INFO, "process_worker_task: index[%d] error", index); + continue; + } + if (g_task_queue[index].state == RUNNING_STATE) { + process_worker_task_running(t, &g_task_queue[index]); + } else if (g_task_queue[index].state == EXITING_STATE) { + process_worker_task_exiting(t, &g_task_queue[index]); + list_del(&t->list); + free_custom_t(&t); + } + if (g_task_queue[index].state == EXITED_STATE) { + ret = memset_s(&g_task_queue[index], sizeof(worker_task), 0, sizeof(worker_task)); + if (ret != 0) { + log_printf(LOG_ERR, "process_worker_task: memset_s task_queue[%d] failed, ret: %d", index, ret); + (void)pthread_mutex_unlock(&g_task_mtx); + return; + } + } + } + (void)pthread_mutex_unlock(&g_task_mtx); +} + +static void handle_daemon_task_exiting(str_custom *task) +{ + int status = 0; + pid_t child_pid; + + child_pid = waitpid(task->pid, &status, WNOHANG); + if (child_pid == 0) { + log_printf(LOG_INFO, "task[%d] process SIGTERM timeout, use SIGKILL.", task->pid); + (void)kill(-(task->pid), SIGKILL); + (void)waitpid(task->pid, &status, 0); + } + list_del(&task->list); + free_custom_t(&task); +} + +static bool custom_parse_single_config_init(str_custom *t, FILE *file, custom_type type) +{ + char config[MAX_CONFIG] = {0}; + bool flag = true; + + if (t == NULL || file == NULL) { + return false; + } + + for (;;) { + if (!fgets(config, MAX_CONFIG - 1, file)) { + break; + } + if (parse_line(t, config) == false) { + flag = false; + break; + } + } + + if (flag == false || !strlen(t->start_cmd) || t->type == 0 || + (t->type == CUSTOM_PERIODIC && t->period == 0) || + t->monitor_switch == false || t->type != type) { + return false; + } + return true; +} + +/* + * parse single config + * if return false, we should free memory for t->envp_config and t->envp + */ +static bool custom_parse_single_config(FILE *file, custom_type type, str_custom *t) +{ + bool env_ret = false; + int i; + unsigned int len; + int ret; + + env_ret = custom_parse_single_config_init(t, file, type); + if (!env_ret) { + return false; + } + + /* before parse environment variables, we should free memory for envp_config and envp */ + free_custom_env(&t); + /* get environment variables */ + if (strlen(t->enviroment_file) != 0) { + env_ret = get_envp(t->enviroment_file, t->envp_config, + &t->envp_config_count); + if (!env_ret) { + return false; + } + + /* get number of global environment variables */ + for (i = 0; environ[i] != NULL; i++) {} + + t->envp = malloc((i + t->envp_config_count + 1) * sizeof(char *)); + if (t->envp == NULL) { + log_printf(LOG_INFO, "malloc error."); + return false; + } + + ret = memcpy_s(t->envp, i * sizeof(char *), environ, i * sizeof(char *)); + if (ret != EOK) { + log_printf(LOG_ERR, "custom_parse_single_config memcpy_s error [%d]", ret); + return false; + } + + if (t->envp_config_count != 0) { + len = t->envp_config_count * sizeof(char *); + ret = memcpy_s(t->envp + i, len, t->envp_config, len); + if (ret != EOK) { + log_printf(LOG_ERR, "custom_parse_single_config memcpy_s envp_config error [%d]", ret); + return false; + } + } + + t->envp[i + t->envp_config_count] = NULL; /* the last arg of execle must be NULL */ + } + return true; +} + +static bool reload_single_task(custom_type type, str_custom *t) +{ + int config_fd = -1; + int ret; + FILE *fp = NULL; + char cfg_full_name[MAX_CFG_NAME_LEN + sizeof(CUSTOM_CONFIG_DIR)] = {0}; + + if (t == NULL) { + log_printf(LOG_INFO, "Custom process is NULL"); + return false; + } + + ret = snprintf_s(cfg_full_name, sizeof(cfg_full_name), sizeof(cfg_full_name) - 1, + "%s%s", CUSTOM_CONFIG_DIR, t->conf_name); + if (ret < 0) { + log_printf(LOG_ERR, "reload_single_task snprintf_s error [%d]", ret); + return false; + } + + fp = open_cfgfile(cfg_full_name, &config_fd); + if (fp == NULL) { + log_printf(LOG_INFO, "fail to open single config file %s.", cfg_full_name); + return false; + } + + if (custom_parse_single_config(fp, type, t) == false) { + log_printf(LOG_INFO, "reload single config: parse %s error", t->conf_name); + + if (type != t->type) { + log_printf(LOG_INFO, "single custom type is changed, reload sysmonitor"); + set_thread_item_reload_flag(CUSTOM_DAEMON_ITEM, true); + set_thread_item_reload_flag(CUSTOM_PERIODIC_ITEM, true); + } + + if (t->monitor_switch == false) { + log_printf(LOG_INFO, "single custom monitor is switched off."); + } + + (void)fclose(fp); + return false; + } + + (void)fclose(fp); + return true; +} + +static void handle_daemon_task_exit(str_custom *t, int status) +{ + int exit_code = 0; + int ret; + + if (WIFEXITED(status)) { + exit_code = WEXITSTATUS(status); + if (exit_code != 0) { + log_printf(LOG_WARNING, "custom daemon monitor: name[%s] execle start_cmd[%s] error[%d]", + t->conf_name, t->start_cmd, exit_code); + } + } + + if (t->daemon_restart_times < FAIL_NUM) { + t->daemon_restart_times++; + log_printf(LOG_INFO, "custom daemon monitor: child process[%d] name %s exit code[%d], [%u] times.", + t->pid, t->conf_name, exit_code, t->daemon_restart_times); + } + + /* reload single task failed, delete from list and free task */ + ret = reload_single_task(CUSTOM_DAEMON, t); + if (ret == false) { + list_del(&t->list); + free_custom_t(&t); + return; + } + + /* initialize the pid number to avoid other process using the number */ + t->pid = 0; + ret = execle_daemon(t->start_cmd, t->envp, &t->pid); + if (ret != 0) { + log_printf(LOG_WARNING, "execle_daemon error[%d]", ret); + } +} + +static void handle_daemon_task_failed(str_custom *t, int err) +{ + int ret; + + log_printf(LOG_INFO, "custom daemon monitor: waitpid ret[%d] error", err); + if (err == ECHILD) { + t->pid = 0; + ret = execle_daemon(t->start_cmd, t->envp, &t->pid); + if (ret != 0) { + log_printf(LOG_WARNING, "execle_daemon error[%d]", ret); + } + } +} + +static void check_custom_daemon_monitor(void) +{ + str_custom *t = NULL; + str_custom *n = NULL; + int status = 0; + pid_t child_pid; + + list_for_each_entry_safe(t, n, &g_custom_daemon_head, list) { + if (t->pid == 0) { + continue; + } + + if (t->state == EXITING_STATE) { + handle_daemon_task_exiting(t); + continue; + } + child_pid = waitpid(t->pid, &status, WNOHANG); + if (t->pid == child_pid) { + handle_daemon_task_exit(t, status); + } else if (child_pid < 0) { + handle_daemon_task_failed(t, errno); + } else if ((child_pid == 0 && (t->daemon_restart_times > 0 || !t->daemon_thread_start))) { + t->daemon_restart_times = 0; + t->daemon_thread_start = 1; + log_printf(LOG_INFO, "custom daemon monitor: child process[%d] name %s started", t->pid, t->conf_name); + } + } +} + +/* + * daemon monitor start from here + */ +static void *custom_daemon_monitor_start(void *arg) +{ + bool ret = false; + unsigned int period; + int result; + + /* prctl does not return false if arg2 is right when arg1 is PR_SET_NAME */ + (void)prctl(PR_SET_NAME, "monitor-daemon"); + log_printf(LOG_INFO, "custom daemon monitor starting up"); + + set_thread_item_reload_flag(CUSTOM_DAEMON_ITEM, false); + (void)load_task(CUSTOM_DAEMON, false); + period = (unsigned int)get_thread_item_period(CUSTOM_DAEMON_ITEM); + log_printf(LOG_INFO, "custom daemon monitor, period:%u", period); + result = set_thread_check_value(THREAD_CUSTOM_DAEMON_ITEM, true, period); + if (result == -1) { + log_printf(LOG_ERR, "custom daemon monitor set check flag or period error"); + return NULL; + } + monitor_daemon(&g_custom_daemon_head); + + for (;;) { + if (get_thread_item_reload_flag(CUSTOM_DAEMON_ITEM)) { + log_printf(LOG_INFO, "custom daemon monitor, start reload"); + set_thread_item_reload_flag(CUSTOM_DAEMON_ITEM, false); + ret = reload_task(CUSTOM_DAEMON); + if (ret == true) { + monitor_daemon(&g_custom_daemon_head); + } else { + log_printf(LOG_INFO, "reload daemon custom monitor configuration failed"); + } + } + + /* daemon process exit in exception */ + check_custom_daemon_monitor(); + + result = feed_thread_status_count(THREAD_CUSTOM_DAEMON_ITEM); + if (result == -1) { + log_printf(LOG_ERR, "custom daemon monitor feed error"); + break; + } + /* daemon monitor interval is 10 seconds */ + (void)sleep(period); + } + return NULL; +} + +static void check_and_exec_periodic(str_custom *t, int index, unsigned long count) +{ + worker_task wtask = {0}; + struct timespec ts; + int ret; + + if (g_task_queue[index].state == EXITED_STATE && + ((count - t->time_count) >= t->period || t->pid == 0) && + t->state != EXITING_STATE) { + t->pid = 0; + ret = execle_periodic(t); + if (ret == 0) { + wtask.cpid = t->pid; + wtask.time_count = 0; + wtask.state = RUNNING_STATE; + worker_task_add(&wtask, index); + t->time_count = (unsigned int)count; + } else { + log_printf(LOG_INFO, "execle_periodic ret [%d] error", ret); + } + ts.tv_nsec = PROCESS_SLEEP_INTERVAL; + ts.tv_sec = 0; + (void)nanosleep(&ts, NULL); + } +} + +/* + * periodic monitor start from here + */ +static void *custom_periodic_monitor_start(void *arg) +{ + str_custom *t = NULL; + str_custom *n = NULL; + unsigned long count = 0; + int index; + int ret; + + /* prctl does not return false if arg2 is right when arg1 is PR_SET_NAME */ + (void)prctl(PR_SET_NAME, "monitor-period"); + log_printf(LOG_INFO, "custom periodic monitor starting up"); + set_thread_item_reload_flag(CUSTOM_PERIODIC_ITEM, false); + (void)load_task(CUSTOM_PERIODIC, false); + ret = set_thread_check_value(THREAD_CUSTOM_PERIODIC_ITEM, true, CISTOM_PERIODIC_TIME); + if (ret == -1) { + log_printf(LOG_ERR, "custom periodic monitor set check flag or period error"); + return NULL; + } + monitor_periodic(); + + for (;;) { + if (get_thread_item_reload_flag(CUSTOM_PERIODIC_ITEM)) { + log_printf(LOG_INFO, "custom periodic monitor, start reload"); + set_thread_item_reload_flag(CUSTOM_PERIODIC_ITEM, false); + process_worker_task(); + (void)reload_task(CUSTOM_PERIODIC); + monitor_periodic(); + count = 0; + } + + list_for_each_entry_safe(t, n, &g_custom_periodic_head, list) { + index = t->state_index; + if (index >= TASK_QUEUE_SIZE || index < 0) { + log_printf(LOG_INFO, "custom_periodic_monitor_start: index[%d] error", index); + continue; + } + + check_and_exec_periodic(t, index, count); + if (t->state == EXITING_STATE && g_task_queue[index].state == EXITED_STATE) { + list_del(&t->list); + free_custom_t(&t); + } + } + (void)sleep(1); + count++; + ret = feed_thread_status_count(THREAD_CUSTOM_PERIODIC_ITEM); + if (ret == -1) { + log_printf(LOG_ERR, "custom periodic monitor feed error"); + break; + } + } + return NULL; +} + +void custom_daemon_monitor_init(void) +{ + pthread_t tid; + + if (pthread_create(&tid, NULL, custom_daemon_monitor_start, NULL)) { + log_printf(LOG_ERR, "create daemon custom monitor thread error [%d]", errno); + return; + } + set_thread_item_tid(CUSTOM_DAEMON_ITEM, tid); +} + +void custom_periodic_monitor_init(void) +{ + pthread_t tid; + + if (pthread_create(&tid, NULL, custom_periodic_monitor_start, NULL)) { + log_printf(LOG_ERR, "create periodic custom monitor thread error [%d]", errno); + return; + } + set_thread_item_tid(CUSTOM_PERIODIC_ITEM, tid); +} + +static int worker_task_init(const worker_task *task, int *index) +{ + int i; + unsigned int count = 0; + int ret; + + (void)pthread_mutex_lock(&g_task_mtx); + for (i = 0; i < TASK_QUEUE_SIZE; i++) { + if (g_task_queue[i].cpid != 0) { + count++; + } else { + break; + } + } + + if (count >= TASK_QUEUE_SIZE) { + log_printf(LOG_INFO, "task queue is full! no index!"); + (void)pthread_mutex_unlock(&g_task_mtx); + return -1; + } else { + *index = i; + ret = memcpy_s(&g_task_queue[i], sizeof(worker_task), task, sizeof(worker_task)); + if (ret) { + log_printf(LOG_ERR, "worker_task_init: memcpy_s task_queue failed, ret: %d", ret); + (void)pthread_mutex_unlock(&g_task_mtx); + return -1; + } + } + + (void)pthread_mutex_unlock(&g_task_mtx); + return 0; +} + +static void worker_task_add(const worker_task *task, int index) +{ + int ret; + + (void)pthread_mutex_lock(&g_task_mtx); + if (index < TASK_QUEUE_SIZE && index >= 0) { + ret = memcpy_s(&g_task_queue[index], sizeof(worker_task), task, sizeof(worker_task)); + if (ret) { + log_printf(LOG_ERR, "worker_task_add: memcpy_s task_queue failed, ret: %d", ret); + } + } + (void)pthread_mutex_unlock(&g_task_mtx); +} + +static void handle_periodic_task_exiting(worker_task *task) +{ + pid_t pid; + int status; + int exit_code; + + pid = waitpid(task->cpid, &status, WNOHANG); + if (task->cpid == pid) { + task->cpid = 0; + task->time_count = 0; + task->state = EXITED_STATE; + if (WIFEXITED(status)) { + exit_code = WEXITSTATUS(status); + if (exit_code != 0) { + log_printf(LOG_WARNING, "worker_routine: periodic pid[%d] exec error[%d]", + pid, exit_code); + } + } + } else if (pid == 0) { + task->time_count++; + } else { + log_printf(LOG_INFO, "worker_routine: waitpid error [%d] ", errno); + if (errno == ECHILD) { + task->state = EXITED_STATE; + task->cpid = 0; + task->time_count = 0; + } + } + + if (task->time_count > WORKER_TASK_TIMEOUT) { + log_printf(LOG_INFO, "execute periodic monitoring timeout [%d]", task->cpid); + (void)kill(-task->cpid, SIGTERM); + task->state = EXITING_STATE; + } +} + +static void handle_periodic_task_exit(worker_task *task) +{ + pid_t pid; + int status; + + pid = waitpid(task->cpid, &status, WNOHANG); + if (pid == 0) { + log_printf(LOG_INFO, "task[%d] process SIGTERM timeout, use SIGKILL.", task->cpid); + (void)kill(-task->cpid, SIGKILL); + (void)waitpid(task->cpid, &status, 0); + } + task->state = EXITED_STATE; + task->cpid = 0; + task->time_count = 0; +} + +/* + * worker thread, check and waitpid for custom periodic task + */ +static void *worker_routine(void *arg) +{ + unsigned int i; + + /* prctl does not return false if arg2 is right when arg1 is PR_SET_NAME */ + (void)prctl(PR_SET_NAME, "monitor-worker"); + for (;;) { + (void)pthread_mutex_lock(&g_task_mtx); + for (i = 0; i < TASK_QUEUE_SIZE; i++) { + if (g_task_queue[i].cpid == 0) { + continue; + } + + if (g_task_queue[i].state == RUNNING_STATE) { + handle_periodic_task_exiting(&g_task_queue[i]); + } else if (g_task_queue[i].state == EXITING_STATE) { + handle_periodic_task_exit(&g_task_queue[i]); + } + } + (void)pthread_mutex_unlock(&g_task_mtx); + (void)sleep(1); + } + return NULL; +} + +bool worker_task_struct_init(void) +{ + int ret; + + ret = memset_s(g_task_queue, sizeof(worker_task) * TASK_QUEUE_SIZE, 0, + sizeof(worker_task) * TASK_QUEUE_SIZE); + if (ret) { + (void)printf("worker_task_struct_init: memset_s task_queue failed, ret: %d.", ret); + return false; + } + return true; +} + +/* + * worker thread init, if custom periodic monitor is not enable + * do not create worker thread + */ +bool worker_thread_init(pthread_t *tid) +{ + if (!get_thread_item_monitor_flag(CUSTOM_PERIODIC_ITEM)) { + return true; + } + + if (pthread_create(tid, NULL, worker_routine, NULL)) { + log_printf(LOG_ERR, "create worker thread error [%d]", errno); + return false; + } + + return true; +} diff --git a/sysmonitor-1.3.2/src/custom.h b/sysmonitor-1.3.2/src/custom.h new file mode 100644 index 0000000000000000000000000000000000000000..0125c5f2956251692aee2bd7c4a0b4dd4e64a43a --- /dev/null +++ b/sysmonitor-1.3.2/src/custom.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: define variable, structure and function for custom process monitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#ifndef CUSTOM_H +#define CUSTOM_H +#include + +#include "common.h" + +#define CUSTOM_CONFIG_DIR "/etc/sysmonitor.d/" +#define MAX_CFG_NAME_LEN 128 +#define MAX_CUSTOM_CMD_LEN 160 +#define MAX_ENV_CONFIG 256 /* the max number of environment variables */ +#define MAX_CLOSE_FD_NUM 1024 + +extern char **environ; + +typedef enum customtype { + CUSTOM_DAEMON = 1, + CUSTOM_PERIODIC +} custom_type; + +typedef struct str_custom_s { + struct list_head list; /* list flag */ + pid_t pid; /* pid of child process */ + custom_type type; /* custom monitor type: daemon or periodic */ + char start_cmd[MAX_CUSTOM_CMD_LEN]; /* custom monitor exec cmd */ + char conf_name[MAX_CFG_NAME_LEN]; /* custom monitor config name */ + char enviroment_file[MAX_CFG_NAME_LEN]; /* environment file name: absolute path + name */ + char **envp; /* environment variables, include current process and configed */ + char *envp_config[MAX_ENV_CONFIG]; /* environment variables, only current, exclude inherited */ + unsigned int envp_config_count; /* the number of environment variables by configed */ + unsigned int period; /* monitor period of periodic monitor */ + unsigned int time_count; /* time counts for periodic monitor */ + unsigned int daemon_restart_times; + int daemon_thread_start; + int state_index; /* index of task state */ + int state; /* task state: running, exiting, exited */ + bool monitor_switch; /* monitor switch */ +} str_custom; + +typedef struct custom_item_func_s { + char item[ITEM_LEN]; + bool (*func)(const char *item, const char *value, str_custom *t); +} custom_item_func; + +void custom_daemon_monitor_init(void); +void custom_periodic_monitor_init(void); +bool worker_thread_init(pthread_t *tid); +bool worker_task_struct_init(void); + +#endif diff --git a/sysmonitor-1.3.2/src/disk.c b/sysmonitor-1.3.2/src/disk.c new file mode 100644 index 0000000000000000000000000000000000000000..5b435ca856cda1ee451144b63c7c9fb926964387 --- /dev/null +++ b/sysmonitor-1.3.2/src/disk.c @@ -0,0 +1,1129 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: disk, inode, io_delay monitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#include "disk.h" + +#include + +#include +#include "common.h" +#include "monitor_thread.h" + +#define DISK_CFG_PATH "/etc/sysmonitor/disk" +#define DISK_INODE_CFG_PATH "/etc/sysmonitor/inode" +#define MAX_DISK_CONFIG 200 +#define MAX_DISK_NAME 64 +#define DISK_RETRY_TIMES 3 +#define MAX_DISK_ID 32 +#define DISK_STATS_COUNT 2 +#define MAX_COUNT 60 +#define DISK_ALARM_DEFAULT 90 +#define DISK_RESUME_DEFAULT 80 +#define DISK_BUFFER_LEN 1024 +#define DISK_USAGE_LEN 4 +#define MAX_DELAY_ABNORMAL 30 +#define MAX_DISK_ALARM 100 +#define MAX_DISK_RESUME 100 + +#define DELAY_INFO_BUF_LEN 500 +#define DELAY_DATA_BUF_LEN 10 + +#define DISK_STATS_DATA_COUNT 4 +typedef enum _disk_status { + NORMAL, + ALARM +} disk_status; + +typedef struct _mdisk { + struct _mdisk *next; + char disk[MAX_DISK_NAME + 1]; /* monitor disk name */ + char mount[MAX_DISK_NAME + 1]; /* mount dir name */ + int alarm; /* block or inode alarm value */ + int resume; /* block or inode resume value */ + disk_status last_status; /* last status, alarm or normal */ + int times; /* alarm times */ +} mdisk; + +typedef struct _disk_states_info { + unsigned long rio; /* read request sum */ + unsigned long wio; /* write request sum */ + unsigned long r_use; /* read spend time */ + unsigned long w_use; /* write spend time */ +} disk_states_info; + +typedef struct _disk_io_info { + bool alarm; /* alarm status */ + char disk_id[MAX_DISK_ID]; /* disk id */ + disk_states_info disk_stats[DISK_STATS_COUNT]; /* disk io status */ + unsigned long delay[MAX_COUNT]; /* disk io delay */ +} disk_io_info; + +typedef struct _local_disk { + struct _local_disk *next; + disk_io_info disk_io_info; +} local_disk; + +static mdisk *g_mdisk_head; +static mdisk *g_mdisk_inode_head; + +static int g_disk_thread_start = 1; +static int g_inode_thread_start = 1; +static int g_disk_io_delay; +static int g_disk_io_thread_start = 1; + +#define custom_list_for_each(list_head, list_node) \ + for ((list_node) = (list_head)->next; \ + (list_node) != NULL; \ + (list_node) = (list_node)->next) + +static void free_disk_list(mdisk **disklist) +{ + mdisk *t = NULL; + mdisk *disk = NULL; + + if (*disklist == NULL) { + return; + } + + disk = *disklist; + t = disk; + while (t->next != NULL) { + disk = t->next; + free(t); + t = disk; + } + free(disk); + *disklist = NULL; + return; +} + +static bool mdisk_add(const mdisk *add_disk, mdisk **disk_list) +{ + mdisk *disk = NULL; + int ret; + + if (add_disk == NULL) { + return false; + } + + disk = malloc(sizeof(mdisk)); + if (disk == NULL) { + log_printf(LOG_ERR, "malloc mdisk error [%d]", errno); + return false; + } + + ret = memcpy_s(disk, sizeof(mdisk), add_disk, sizeof(mdisk)); + if (ret != 0) { + log_printf(LOG_ERR, "mdisk_add: memcpy_s disk failed, ret: %d", ret); + free(disk); + return false; + } + + disk->next = NULL; + + if (*disk_list == NULL) { + *disk_list = disk; + } else { + disk->next = *disk_list; + *disk_list = disk; + } + return true; +} + +static void free_local_disk(local_disk *disklist) +{ + local_disk *tmp_disk = NULL; + + if (disklist == NULL) { + return; + } + + while (disklist != NULL) { + tmp_disk = disklist->next; + free(disklist); + disklist = tmp_disk; + } + return; +} + +static bool local_disk_add(local_disk *disk_head, const local_disk *add_local_disk) +{ + local_disk *local_disk_node = NULL; + int ret; + + if (add_local_disk == NULL) { + return false; + } + + local_disk_node = malloc(sizeof(local_disk)); + if (local_disk_node == NULL) { + log_printf(LOG_ERR, "malloc local disk error [%d]", errno); + return false; + } + ret = memset_s(local_disk_node, sizeof(local_disk), 0, sizeof(local_disk)); + if (ret != 0) { + log_printf(LOG_ERR, "local_disk_add: memset_s local_disk_node failed, ret: %d", ret); + free(local_disk_node); + return false; + } + ret = strcpy_s(local_disk_node->disk_io_info.disk_id, MAX_DISK_ID, add_local_disk->disk_io_info.disk_id); + if (ret != 0) { + log_printf(LOG_ERR, "local_disk_add: strcpy_s disk_io_info failed, ret: %d", ret); + free(local_disk_node); + return false; + } + local_disk_node->next = disk_head->next; + disk_head->next = local_disk_node; + return true; +} + +static int get_mount(mdisk *disk) +{ + int ret; + char buffer[DISK_BUFFER_LEN] = {0}; + char tmp_cmd[MAX_TEMPSTR] = {0}; + + ret = snprintf_s(tmp_cmd, sizeof(tmp_cmd), MAX_TEMPSTR - 1, + "df %s | awk \'{print $6 }\'| tail -1", disk->disk); + if (ret == -1) { + log_printf(LOG_ERR, "get_mount: snprintf_s tmp_cmd failed, ret: %d", ret); + return -1; + } + ret = monitor_popen(tmp_cmd, buffer, sizeof(buffer) - 1, POPEN_TIMEOUT, NULL); + if (strchr(buffer, '/') && (ret >= 0)) { + ret = memset_s(disk->mount, sizeof(disk->mount), 0, sizeof(disk->mount)); + if (ret != 0) { + log_printf(LOG_ERR, "get_mount: memset_s tmp_cmd failed, ret: %d", ret); + return -1; + } + ret = memcpy_s(disk->mount, sizeof(disk->mount), buffer, strlen(buffer) - 1); + if (ret != 0) { + log_printf(LOG_ERR, "get_mount: memcpy_s mount failed, ret: %d", ret); + return -1; + } + return 0; + } + return -1; +} + +static int get_diskname_from_config(const char *config, mdisk *disk) +{ + char key[MAX_DISK_NAME + 1] = {0}; + int ret; + + if (get_string(config, "DISK=\"", key, sizeof(key), "DISK") != 0) { + log_printf(LOG_INFO, "get_string DISK failed"); + return -1; + } + + if (check_conf_file_valid(key) == -1) { + return -1; + } + + ret = strncpy_s(disk->disk, sizeof(disk->disk), key, sizeof(disk->disk) - 1); + if (ret != 0) { + log_printf(LOG_ERR, "parse_diskline: strncpy_s disk failed, ret: %d", ret); + return -1; + } + + return 0; +} + +static int get_alarm_from_config(const char *config, mdisk *disk) +{ + char key[MAX_DISK_NAME + 1] = {0}; + + if (get_string(config, "ALARM=\"", key, sizeof(key), "ALARM") != 0) { + disk->alarm = DISK_ALARM_DEFAULT; + } else { + if (check_int(key)) { + disk->alarm = (int)strtol(key, NULL, STRTOL_NUMBER_BASE); + } else { + return -1; + } + } + return 0; +} + +static int get_resume_from_config(const char *config, mdisk *disk) +{ + char key[MAX_DISK_NAME + 1] = {0}; + + if (get_string(config, "RESUME=\"", key, sizeof(key), "RESUME") != 0) { + disk->resume = DISK_RESUME_DEFAULT; + } else { + if (check_int(key)) { + disk->resume = (int)strtol(key, NULL, STRTOL_NUMBER_BASE); + } else { + return -1; + } + } + return 0; +} + +static int check_alarm_and_resume(const mdisk *disk) +{ + if ((disk->alarm <= disk->resume) || (disk->alarm < 0) || (disk->alarm > MAX_DISK_ALARM) || + (disk->resume < 0) || (disk->resume > MAX_DISK_RESUME)) { + log_printf(LOG_ERR, "alarm:%d or resume:%d invalided", disk->alarm, disk->resume); + return -1; + } + return 0; +} + +static int parse_and_check_config(const char *config, mdisk *disk) +{ + if (get_diskname_from_config(config, disk) == -1) { + return -1; + } + + if (get_alarm_from_config(config, disk) == -1) { + return -1; + } + + if (get_resume_from_config(config, disk) == -1) { + return -1; + } + + if (check_alarm_and_resume(disk) == -1) { + return -1; + } + + return 0; +} + +/* + * parse one line in the config file + * parse item word DISK= + * verify mount points + * verify alarm and resume value + */ +static int parse_diskline(mdisk *disk, const char *config, mdisk **head) +{ + /* skip space and tab */ + while (*config == ' ' || *config == '\t') { + config++; + continue; + } + + /* comment start with '#' */ + if ((*config == '#') || (*config == '\n')) { + return 1; + } + + if (parse_and_check_config(config, disk) == -1) { + return -1; + } + + /* get mount point */ + if (get_mount(disk) == -1) { + log_printf(LOG_ERR, "get_mount:%s failed", disk->disk); + return -1; + } + + /* keep status and times before reload */ + mdisk *t = *head; + + while (t != NULL) { + if (strcmp(disk->disk, t->disk) == 0) { + disk->last_status = t->last_status; + disk->times = t->times; + break; + } + t = t->next; + } + + return 0; +} + +static bool check_list(mdisk **disk_list, int type) +{ + mdisk *disk = *disk_list; + mdisk *new_list = NULL; + mdisk *t = NULL; + bool useful = false; + + while (disk != NULL) { + t = *disk_list; + useful = true; + while (t != disk) { + if (!strcmp(disk->mount, t->mount)) { + useful = false; + log_printf(LOG_INFO, "[%s]disk:%s mount:%s alarm:%d resume:%d has monitored", + type == INODE_ITEM ? "disk inode" : "disk space", + disk->disk, disk->mount, disk->alarm, disk->resume); + break; + } + t = t->next; + } + + if (useful) { + if (mdisk_add(disk, &new_list) == false) { + free_disk_list(disk_list); + *disk_list = new_list; + return false; + } + } + + disk = disk->next; + } + + free_disk_list(disk_list); + *disk_list = new_list; + return true; +} + +static void parse_diskline_failed(char *config, unsigned int len, int ret) +{ + if (ret != -1) { + return; + } + + if (len > 0 && config[len - 1] == '\n') { + config[len - 1] = '\0'; + } + log_printf(LOG_INFO, "parse_diskline error:%s", config); +} + +static int reload_file(const char *cfg_path, mdisk **head) +{ + int ret; + FILE *fp = NULL; + int config_fd = -1; + char config[MAX_DISK_CONFIG]; + mdisk disk_tmp; + mdisk *new_disk = NULL; + int type; + + fp = open_cfgfile(cfg_path, &config_fd); + if (fp == NULL) { + return 1; + } + + for (;;) { + if (!fgets(config, sizeof(config), fp)) { + break; + } + + ret = memset_s(&disk_tmp, sizeof(disk_tmp), 0, sizeof(disk_tmp)); + if (ret != 0) { + log_printf(LOG_ERR, "reload_file: memset_s disk_tmp failed, ret: %d", ret); + goto err; + } + /* compare with old config, if exists keep old status and times */ + ret = parse_diskline(&disk_tmp, config, head); + if (ret != 0) { + parse_diskline_failed(config, (unsigned int)strlen(config), ret); + continue; + } + + /* add disk to new list */ + if (mdisk_add(&disk_tmp, &new_disk) == false) { + free_disk_list(head); + *head = new_disk; + goto err; + } + } + + /* free old list and set new list */ + free_disk_list(head); + *head = new_disk; + + if (!strcmp(DISK_CFG_PATH, cfg_path)) { + type = DISK_ITEM; + set_thread_item_reload_flag(DISK_ITEM, false); + } else { + type = INODE_ITEM; + set_thread_item_reload_flag(INODE_ITEM, false); + } + + /* check and merge same root mount point, the first config works */ + if (check_list(head, type) == false) { + goto err; + } + + (void)fclose(fp); + return 0; +err: + free_disk_list(head); + (void)fclose(fp); + return -1; +} + +static int get_disk(const mdisk *disk) +{ + char *str = NULL; + char tmp[DISK_USAGE_LEN] = {0}; + int used = -1; + char buffer[DISK_BUFFER_LEN] = {0}; + char tmp_cmd[MAX_TEMPSTR] = {0}; + int ret; + + ret = snprintf_s(tmp_cmd, sizeof(tmp_cmd), MAX_TEMPSTR - 1, + "df %s | awk \'{print $5 }\'| tail -1", disk->disk); + if (ret == -1) { + log_printf(LOG_ERR, "get_disk: snprintf_s tmp_cmd failed, ret: %d", ret); + return -1; + } + (void)monitor_popen(tmp_cmd, buffer, sizeof(buffer) - 1, POPEN_TIMEOUT, NULL); + str = strchr(buffer, '%'); + if (str != NULL) { + ret = memcpy_s(tmp, sizeof(tmp), buffer, (size_t)(str - buffer)); + if (ret != 0) { + log_printf(LOG_ERR, "get_disk: memcpy_s tmp failed, ret: %d", ret); + return -1; + } + used = (int)strtol(tmp, NULL, STRTOL_NUMBER_BASE); + return used; + } + + return used; +} + +static int get_disk_inode(const mdisk *disk) +{ + char *str = NULL; + char tmp[DISK_USAGE_LEN] = {0}; + int used = -1; + char buffer[DISK_BUFFER_LEN] = {0}; + char tmp_cmd[MAX_TEMPSTR] = {0}; + int ret; + ret = snprintf_s(tmp_cmd, sizeof(tmp_cmd), MAX_TEMPSTR - 1, + "df -i %s | awk \'{print $5 }\'| tail -1", disk->disk); + if (ret == -1) { + log_printf(LOG_ERR, "get_disk_inode: snprintf_s tmp_cmd failed, ret: %d", ret); + return -1; + } + (void)monitor_popen(tmp_cmd, buffer, sizeof(buffer) - 1, POPEN_TIMEOUT, NULL); + str = strchr(buffer, '%'); + if (str != NULL) { + ret = memcpy_s(tmp, sizeof(tmp), buffer, (size_t)(str - buffer)); + if (ret != 0) { + log_printf(LOG_ERR, "get_disk_inode: memcpy_s tmp_cmd failed, ret: %d", ret); + return -1; + } + + used = (int)strtol(tmp, NULL, STRTOL_NUMBER_BASE); + return used; + } + + return used; +} + +/* return value 0 means not alarm, 1 means alarm */ +static int check_disk(int used, mdisk *disk, int thread_start, unsigned char *alarm_type) +{ + if ((used >= disk->alarm) && (disk->last_status == NORMAL)) { + disk->times++; + if (disk->times >= DISK_RETRY_TIMES) { + disk->last_status = ALARM; + disk->times = 0; + *alarm_type = COMMON_ALARM_TYPE_OCCUR; + return 1; + } + } + + if (((used < disk->resume) && (disk->last_status == ALARM)) || + ((used < disk->resume) && thread_start)) { + disk->times = 0; + disk->last_status = NORMAL; + *alarm_type = COMMON_ALARM_TYPE_RESUME; + return 1; + } + + return 0; +} + +/* run the queue to check all the disk cfg in the list */ +static void disk_runqueue(void) +{ + mdisk *t = NULL; + int used; + unsigned char alarm_type; + + for (t = g_mdisk_head; t;) { + /* get usage of disk */ + used = get_disk(t); + if (used < 0) { + log_printf(LOG_INFO, "get_disk_used %s failed", t->disk); + t = t->next; + continue; + } + + /* check alarm or not */ + if (check_disk(used, t, g_disk_thread_start, &alarm_type)) { + if (alarm_type == COMMON_ALARM_TYPE_OCCUR) { + log_printf(LOG_WARNING, "report disk alarm, %s used:%d%% alarm:%d%%", t->disk, used, t->alarm); + } else { + log_printf(LOG_INFO, "report disk recovered, %s used:%d%% resume:%d%%", t->disk, used, t->resume); + } + } + + t = t->next; + } + + g_disk_thread_start = 0; +} + +/* run the queue to check all the disk cfg in the list */ +static void inode_runqueue(void) +{ + mdisk *t = NULL; + int used; + unsigned char alarm_type; + + for (t = g_mdisk_inode_head; t;) { + /* get inode usage */ + used = get_disk_inode(t); + if (used < 0) { + log_printf(LOG_WARNING, "get_disk_inode used %s failed", t->disk); + t = t->next; + continue; + } + + /* check alarm or not */ + if (check_disk(used, t, g_inode_thread_start, &alarm_type)) { + if (alarm_type == COMMON_ALARM_TYPE_OCCUR) { + log_printf(LOG_WARNING, "report disk inode alarm, %s used:%d%% alarm:%d%%", t->disk, used, t->alarm); + } else { + log_printf(LOG_INFO, "report disk inode recovered, %s used:%d%% resume:%d%%", t->disk, used, t->resume); + } + } + + t = t->next; + } + + g_inode_thread_start = 0; +} + +static int disk_reload_file(void) +{ + int ret; + + ret = reload_file(DISK_CFG_PATH, &g_mdisk_head); + if (ret != 0) { + log_printf(LOG_INFO, "reload disk monitor configuration failed"); + ret = set_thread_status_check_flag(THREAD_DISK_ITEM, false); + if (ret == -1) { + log_printf(LOG_ERR, "reload disk monitor set check flag error"); + } + return RET_BREAK; + } + return RET_SUCCESS; +} + +static void *disk_monitor_start(void *arg) +{ + unsigned int period; + int ret; + /* prctl does not return false if arg2 is right when arg1 is PR_SET_NAME */ + (void)prctl(PR_SET_NAME, "monitor-disk"); + log_printf(LOG_INFO, "disk monitor starting up"); + period = (unsigned int)get_thread_item_period(DISK_ITEM); + log_printf(LOG_INFO, "disk monitor period:[%u]\n", period); + ret = set_thread_check_value(THREAD_DISK_ITEM, true, period); + if (ret == -1) { + log_printf(LOG_ERR, "disk monitor set check flag or period error"); + return NULL; + } + + for (;;) { + if (get_thread_item_reload_flag(DISK_ITEM)) { + ret = disk_reload_file(); + if (ret == RET_BREAK) { + break; + } + } + disk_runqueue(); + ret = feed_thread_status_count(THREAD_DISK_ITEM); + if (ret == -1) { + log_printf(LOG_ERR, "disk monitor feed error"); + break; + } + (void)sleep(period); + } + return NULL; +} + +void disk_monitor_init(void) +{ + pthread_t tid; + + if (pthread_create(&tid, NULL, disk_monitor_start, NULL)) { + log_printf(LOG_ERR, "create disk monitor thread error [%d]", errno); + return; + } + set_thread_item_tid(DISK_ITEM, tid); +} + +static int inode_reload_file(void) +{ + int ret; + + ret = reload_file(DISK_INODE_CFG_PATH, &g_mdisk_inode_head); + if (ret != 0) { + log_printf(LOG_INFO, "reload disk inode monitor configuration failed"); + ret = set_thread_status_check_flag(THREAD_INODE_ITEM, false); + if (ret == -1) { + log_printf(LOG_ERR, "reload disk inode monitor set check flag error"); + } + return RET_BREAK; + } + return RET_SUCCESS; +} + +static void *inode_monitor_start(void *arg) +{ + int ret; + unsigned int period; + /* prctl does not return false if arg2 is right when arg1 is PR_SET_NAME */ + (void)prctl(PR_SET_NAME, "monitor-inode"); + log_printf(LOG_INFO, "disk inode monitor starting up"); + period = (unsigned int)get_thread_item_period(INODE_ITEM); + log_printf(LOG_INFO, "disk inode monitor period:%u\n", period); + ret = set_thread_check_value(THREAD_INODE_ITEM, true, period); + if (ret == -1) { + log_printf(LOG_ERR, "disk inode monitor set check flag or period error"); + return NULL; + } + + for (;;) { + if (get_thread_item_reload_flag(INODE_ITEM)) { + ret = inode_reload_file(); + if (ret == RET_BREAK) { + break; + } + } + inode_runqueue(); + ret = feed_thread_status_count(THREAD_INODE_ITEM); + if (ret == -1) { + log_printf(LOG_ERR, "disk inode monitor feed error"); + break; + } + (void)sleep(period); + } + return NULL; +} + +void inode_monitor_init(void) +{ + pthread_t tid; + + if (pthread_create(&tid, NULL, inode_monitor_start, NULL)) { + log_printf(LOG_ERR, "create disk inode monitor thread error [%d]", errno); + return; + } + set_thread_item_tid(INODE_ITEM, tid); +} + +static bool parse_line(const char *config) +{ + char item[ITEM_LEN] = {0}; + char value[VALUE_LEN] = {0}; + char *ptr = NULL; + unsigned int size; + int ret; + + while (*config == ' ' || *config == '\t') { + config++; + } + + if (*config == '#') { + return true; + } + + ptr = strstr(config, "=\""); + if (ptr != NULL) { + size = (unsigned int)(ptr - config); + if (size >= sizeof(item)) { + log_printf(LOG_ERR, "disk parse_line: item length(%u) too long(>%lu).", size, sizeof(item)); + return false; + } + ret = strncpy_s(item, sizeof(item), config, size); + if (ret != 0) { + log_printf(LOG_ERR, "disk parse_line: strncpy_s item failed, ret: %d", ret); + return false; + } + get_value(config, size, value, sizeof(value)); + if (!strlen(value)) { + return true; + } + + if (!strcmp(item, "DELAY_VALUE")) { + if (check_int(value) == false) { + return false; + } + g_disk_io_delay = (int)strtol(value, NULL, STRTOL_NUMBER_BASE); + } + } + return true; +} + +static void get_io_delay(local_disk *local_disk, unsigned int delay_id) +{ + unsigned long temp = 0; + unsigned long io_req; + + /* second value is equal or larger than first value */ + io_req = (local_disk->disk_io_info.disk_stats[1].rio - local_disk->disk_io_info.disk_stats[0].rio) + + (local_disk->disk_io_info.disk_stats[1].wio - local_disk->disk_io_info.disk_stats[0].wio); + + if (io_req != 0) { + temp = ((local_disk->disk_io_info.disk_stats[1].w_use - + local_disk->disk_io_info.disk_stats[0].w_use) + + (local_disk->disk_io_info.disk_stats[1].r_use - + local_disk->disk_io_info.disk_stats[0].r_use)) / io_req; + } + local_disk->disk_io_info.delay[delay_id] = temp; + return; +} + +static void get_disk_stats(local_disk *local_disk, unsigned int stats_id, const char *buf) +{ + int num; + + num = sscanf_s(buf, "%*lu %*lu %*s %lu %*lu %*lu %lu %lu %*lu %*lu %lu %*lu %*lu %*lu", + &local_disk->disk_io_info.disk_stats[stats_id].rio, + &local_disk->disk_io_info.disk_stats[stats_id].r_use, + &local_disk->disk_io_info.disk_stats[stats_id].wio, + &local_disk->disk_io_info.disk_stats[stats_id].w_use); + if (num < DISK_STATS_DATA_COUNT) { + log_printf(LOG_INFO, "failed get diskstats [%d]", errno); + } + return; +} + +static void display_delay_info(const local_disk *tmp_local_disk) +{ + /* + * I/O delay data ususally has 1~4 bits, there is a space between every two data. + * So 500 bytes is enough for 60 data. + */ + char delay_info[DELAY_INFO_BUF_LEN] = {0}; + size_t delay_info_size = sizeof(delay_info); + char *pointer = delay_info; + char delay_data[DELAY_DATA_BUF_LEN]; /* 10:data for one I/O delay data */ + size_t data_size; + unsigned int i; + int rc; + + for (i = 0; i < sizeof(tmp_local_disk->disk_io_info.delay) / sizeof(tmp_local_disk->disk_io_info.delay[0]); i++) { + rc = memset_s(delay_data, sizeof(delay_data), 0, sizeof(delay_data)); + if (rc != EOK) { + log_printf(LOG_ERR, "memset_s in function display_delay_info error"); + continue; + } + rc = sprintf_s(delay_data, sizeof(delay_data), "%lu", tmp_local_disk->disk_io_info.delay[i]); + if (rc < 0) { + log_printf(LOG_ERR, "sprintf_s in function display_delay_info error"); + continue; + } + data_size = strlen(delay_data); + /* magic number 1: a space after %s */ + rc = snprintf_s(pointer, delay_info_size, data_size + 1, "%s ", delay_data); + if (rc < 0) { + log_printf(LOG_ERR, "snprintf_s in function display_delay_info error"); + break; + } + delay_info_size -= rc; + /* if the remaining space can just store one '\0' in the tail, then break the loop */ + if (delay_info_size <= 1) { + break; + } + /* cover the tailed '\0' every time snprintf_s is called, only reserve the last one when break the loop */ + pointer += rc; + } + log_printf(LOG_INFO, "disk is %s, io delay data: %s", tmp_local_disk->disk_io_info.disk_id, delay_info); +} + +static void handle_io_delay_alarm(const local_disk *local_disk, unsigned int delay_abnomal, bool alarm) +{ + if (alarm) { + log_printf(LOG_WARNING, "local disk:%s IO delay is too large. I/O delay threshold is %d.", + local_disk->disk_io_info.disk_id, g_disk_io_delay); + } else { + log_printf(LOG_INFO, "local disk:%s IO delay is normal. I/O delay threshold is %d.", + local_disk->disk_io_info.disk_id, g_disk_io_delay); + } + display_delay_info(local_disk); +} + +static void check_report_alarm(local_disk *local_disk) +{ + unsigned int delay_abnormal = 0; + unsigned int i; + + for (i = 0; i < MAX_COUNT; i++) { + if (local_disk->disk_io_info.delay[i] > (unsigned long)g_disk_io_delay) { + delay_abnormal++; + } + } + + if (delay_abnormal > MAX_DELAY_ABNORMAL && local_disk->disk_io_info.alarm == false) { + handle_io_delay_alarm(local_disk, delay_abnormal, true); + local_disk->disk_io_info.alarm = true; + } else if ((delay_abnormal <= MAX_DELAY_ABNORMAL && local_disk->disk_io_info.alarm == true) || + (delay_abnormal <= MAX_DELAY_ABNORMAL && g_disk_io_thread_start == 1)) { + handle_io_delay_alarm(local_disk, delay_abnormal, false); + local_disk->disk_io_info.alarm = false; + } +} + +static bool check_and_add_disk(char *cnt_buf, int size, local_disk *local_disk_head) +{ + char *disk = NULL; + char *p_save = NULL; + local_disk local_disk_info; + local_disk *tmp_local_disk = NULL; + bool add_disk_flag = true; + int ret; + + /* output of get_local_disk.sh is split by ',' */ + disk = strtok_r(cnt_buf, ",", &p_save); + while (disk != NULL) { + ret = memset_s(&local_disk_info, sizeof(local_disk), 0, sizeof(local_disk)); + if (ret != 0) { + log_printf(LOG_ERR, "monitor_io_delay: memset_s local_disk_info failed, ret: %d", ret); + return false; + } + + ret = strcpy_s(local_disk_info.disk_io_info.disk_id, MAX_DISK_ID, disk); + if (ret != 0) { + log_printf(LOG_ERR, "monitor_io_delay: strcpy_s disk_io_info failed, ret: %d", ret); + return false; + } + + /* check exists in local disk list */ + custom_list_for_each(local_disk_head, tmp_local_disk) { + if (strcmp(local_disk_info.disk_io_info.disk_id, tmp_local_disk->disk_io_info.disk_id) == 0) { + add_disk_flag = false; + } + } + + /* if not exist in local disk list, then add to local disk list */ + if (add_disk_flag == true) { + if (local_disk_add(local_disk_head, &local_disk_info) == false) { + return false; + } + } + add_disk_flag = true; + disk = strtok_r(NULL, ",", &p_save); + } + + return true; +} + +static bool check_new_disk(local_disk *local_disk_head) +{ + char cmd[MAX_TEMPSTR] = {0}; + char cnt_buf[DISK_BUFFER_LEN] = {0}; + int ret; + ret = strcpy_s(cmd, sizeof(cmd) - 1, "/usr/libexec/sysmonitor/get_local_disk.sh"); + if (ret != 0) { + log_printf(LOG_ERR, "monitor_io_delay: strcpy_s cmd failed, ret: %d", ret); + return false; + } + + if (monitor_popen(cmd, cnt_buf, sizeof(cnt_buf) - 1, POPEN_TIMEOUT, NULL)) { + log_printf(LOG_INFO, "failed to get local disk"); + return false; + } + + if (strlen(cnt_buf) == 0) { + log_printf(LOG_INFO, "get local disk failed"); + return false; + } + + return check_and_add_disk(cnt_buf, DISK_BUFFER_LEN, local_disk_head); +} + +static bool get_disk_stats_first(local_disk *local_disk_head) +{ + local_disk *tmp_local_disk = NULL; + char cmd[MAX_TEMPSTR] = {0}; + char cnt_buf[DISK_BUFFER_LEN] = {0}; + int ret; + + custom_list_for_each(local_disk_head, tmp_local_disk) { + ret = snprintf_s(cmd, sizeof(cmd), sizeof(cmd) - 1, "cat /proc/diskstats | grep -w %s", + tmp_local_disk->disk_io_info.disk_id); + if (ret == -1) { + log_printf(LOG_ERR, "monitor_io_delay: snprintf_s cmd[1] failed, ret: %d", ret); + return false; + } + + ret = monitor_popen(cmd, cnt_buf, sizeof(cnt_buf) - 1, POPEN_TIMEOUT, NULL); + if (ret != 0) { + if (ret < 0) { + log_printf(LOG_INFO, "failed to get diskstats ID %s", tmp_local_disk->disk_io_info.disk_id); + } + continue; + } + get_disk_stats(tmp_local_disk, 0, cnt_buf); + } + return true; +} + +static bool get_disk_stats_second(local_disk *local_disk_head, unsigned int count) +{ + local_disk *tmp_local_disk = NULL; + char cmd[MAX_TEMPSTR] = {0}; + char cnt_buf[DISK_BUFFER_LEN] = {0}; + int ret; + + custom_list_for_each(local_disk_head, tmp_local_disk) { + ret = snprintf_s(cmd, sizeof(cmd), sizeof(cmd) - 1, "cat /proc/diskstats | grep -w %s", + tmp_local_disk->disk_io_info.disk_id); + if (ret == -1) { + log_printf(LOG_ERR, "monitor_io_delay:snprintf_s cmd[2] failed, ret: %d", ret); + return false; + } + ret = monitor_popen(cmd, cnt_buf, sizeof(cnt_buf) - 1, POPEN_TIMEOUT, NULL); + if (ret != 0) { + if (ret < 0) { + log_printf(LOG_INFO, "failed to get diskstats ID %s", tmp_local_disk->disk_io_info.disk_id); + } + continue; + } + + get_disk_stats(tmp_local_disk, 1, cnt_buf); + /* get disk io delay and if 1 do not have io request, svctm is 0 */ + get_io_delay(tmp_local_disk, count); + /* get data 60 times in five minutes, check alarm or not */ + if (count == MAX_COUNT - 1) { + check_report_alarm(tmp_local_disk); + } + } + + return true; +} + +static bool monitor_io_delay(unsigned int count, local_disk *local_disk_head) +{ + /* check new disk every five minutes */ + if (count == 0) { + if (check_new_disk(local_disk_head) == false) { + return false; + } + } + + /* get disk io delay first time */ + if (get_disk_stats_first(local_disk_head) == false) { + return false; + } + + (void)sleep(1); + /* get disk io delay second time after one second */ + if (get_disk_stats_second(local_disk_head, count) == false) { + return false; + } + + if (count == MAX_COUNT - 1) { + g_disk_io_thread_start = 0; + } + return true; +} + +static int io_delay_parse_and_set_config(int period) +{ + bool ret = false; + int result; + + set_thread_item_reload_flag(IO_DELAY_ITEM, false); + ret = parse_config(IO_DELAY_CONF, parse_line); + if ((ret == false) || (period < 0)) { + log_printf(LOG_INFO, "io delay monitor: configuration illegal"); + ret = false; + result = set_thread_status_check_flag(THREAD_IO_DELAY_ITEM, false); + if (result == -1) { + log_printf(LOG_ERR, "reload io delay monitor set check flag error"); + return RET_BREAK; + } + } + + if (ret) { + result = set_thread_check_value(THREAD_IO_DELAY_ITEM, true, (unsigned int)(period - 1)); + if (result == -1) { + log_printf(LOG_ERR, "io delay monitor set check flag or period error"); + return RET_BREAK; + } + return RET_SUCCESS; + } + return RET_CONTINUE; +} + +static void *io_delay_monitor_start(void *arg) +{ + unsigned int cnt = 0; + local_disk *disk_head = NULL; + int period = 0; + int result = -1; + + /* prctl does not return false if arg2 is right when arg1 is PR_SET_NAME */ + (void)prctl(PR_SET_NAME, "monitor-iodelay"); + log_printf(LOG_INFO, "local disk io delay monitor starting up"); + + disk_head = malloc(sizeof(local_disk)); + if (disk_head == NULL) { + log_printf(LOG_ERR, "malloc local_disk head error [%d]", errno); + return NULL; + } + disk_head->next = NULL; + + for (;;) { + if (get_thread_item_reload_flag(IO_DELAY_ITEM)) { + period = get_thread_item_period(IO_DELAY_ITEM); + result = io_delay_parse_and_set_config(period); + if (result == RET_BREAK) { + break; + } + } + + if (result == RET_SUCCESS) { + if (monitor_io_delay(cnt, disk_head) == false) { + goto out; + } + } + + if (cnt == MAX_COUNT - 1) { + cnt = 0; + } else { + cnt++; + } + result = feed_thread_status_count(THREAD_IO_DELAY_ITEM); + if (result == -1) { + log_printf(LOG_ERR, "io delay monitor feed error"); + break; + } + (void)sleep((unsigned int)(period - 1)); + } +out: + result = set_thread_status_check_flag(THREAD_IO_DELAY_ITEM, false); + if (result == -1) { + log_printf(LOG_ERR, "io delay monitor exit set check flag error"); + } + free_local_disk(disk_head); + return NULL; +} + +void io_delay_monitor_init(void) +{ + pthread_t tid; + + if (pthread_create(&tid, NULL, io_delay_monitor_start, NULL)) { + log_printf(LOG_ERR, "create io delay monitor thread error [%d]", errno); + return; + } + set_thread_item_tid(IO_DELAY_ITEM, tid); +} diff --git a/sysmonitor-1.3.2/src/disk.h b/sysmonitor-1.3.2/src/disk.h new file mode 100644 index 0000000000000000000000000000000000000000..c693767b8af20605ce4e72d933c8cbf6b7795b1a --- /dev/null +++ b/sysmonitor-1.3.2/src/disk.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: define variable and function for disk monitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#ifndef __DISK_H__ +#define __DISK_H__ + +#define IO_DELAY_CONF "/etc/sysmonitor/iodelay" + +void disk_monitor_init(void); +void inode_monitor_init(void); +void io_delay_monitor_init(void); + +#endif diff --git a/sysmonitor-1.3.2/src/filemonitor.c b/sysmonitor-1.3.2/src/filemonitor.c new file mode 100644 index 0000000000000000000000000000000000000000..2342c3b885c7acce7b02525c4f3dae7b93354ec3 --- /dev/null +++ b/sysmonitor-1.3.2/src/filemonitor.c @@ -0,0 +1,777 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: file monitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#include "filemonitor.h" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#define FILE_WATCH_SELECT_TIMEOUT 60 +#define FILE_WATCH_MAX_FAIL_NUM 3 +#define INOTIFY_IOC_SET_SYSMONITOR_FM 0xABAB +static int g_watched_items; +static struct list_head g_conf_head; +static struct list_head g_event_head; +static bool g_watch_flag = true; +static bool g_save_process = false; +static int g_select_timeout = FILE_WATCH_SELECT_TIMEOUT; + +void set_file_monitor_select_timeout(int timeout) +{ + if (timeout <= 0) { + return; + } + g_select_timeout = timeout; +} + +static fqueue_entry *find_queue(int wd) +{ + fqueue_entry *entry = NULL; + + list_for_each_entry(entry, &g_conf_head, list) { + if (entry->wd == wd) { + return entry; + } + } + return NULL; +} + +static fqueue_entry *find_queue_byname(const char *name) +{ + fqueue_entry *entry = NULL; + + list_for_each_entry(entry, &g_conf_head, list) { + if (!strcmp(entry->file_path, name)) { + return entry; + } + } + return NULL; +} + +static void free_fqueue(void) +{ + fqueue_entry *entry = NULL; + fqueue_entry *next = NULL; + + list_for_each_entry_safe(entry, next, &g_conf_head, list) { + list_del(&entry->list); + free(entry); + } +} + +static bool check_before_add(const char *file_path, const char *tmp_path, const char *real_path) +{ + struct stat info = {0}; + + if (access(file_path, F_OK) != 0) { + return true; + } + + if (stat(file_path, &info) != 0) { + log_printf(LOG_ERR, "stat %s error [%d]", file_path, errno); + return false; + } + /* distinguish between files and directories */ + if (S_ISDIR(info.st_mode)) { + if (find_queue_byname(tmp_path) || find_queue_byname(real_path)) { + log_printf(LOG_INFO, "File path %s is already configed, ignore this conf item.", file_path); + return false; + } + } else if (S_ISREG(info.st_mode)) { + if (find_queue_byname(real_path)) { + log_printf(LOG_INFO, "File path %s is already configed, ignore this conf item.", file_path); + return false; + } + } else { + log_printf(LOG_INFO, "%s is not a directory or regular file, can not watch it.", file_path); + return false; + } + + return true; +} + +static bool add_file_monitor(const char *file_path, char *tmp_path, const char *real_path, unsigned long wt_mask) +{ + fqueue_entry *wt_file = NULL; + int ret; + + if (check_before_add(file_path, tmp_path, real_path) == false) { + return false; + } + + wt_file = malloc(sizeof(struct _fqueue_entry)); + if (wt_file == NULL) { + log_printf(LOG_ERR, "wt_file malloc error!\n"); + return false; + } + + ret = memset_s(wt_file, sizeof(struct _fqueue_entry), 0, sizeof(struct _fqueue_entry)); + if (ret) { + log_printf(LOG_ERR, "filemonitor parse_line: memset_s wt_file failed, ret: %d", ret); + free(wt_file); + return false; + } + /* remove last / if the last of file_path is not / */ + if (file_path[strlen(file_path) - 1] != '/') { + tmp_path[strlen(tmp_path) - 1] = '\0'; + } + + ret = strcpy_s(wt_file->file_path, MAX_PATH_LEN, tmp_path); + if (ret) { + log_printf(LOG_ERR, "filemonitor parse_line: strcpy_s wt_file file_path failed, ret: %d", ret); + free(wt_file); + return false; + } + wt_file->flag = false; + wt_file->wt_mask = wt_mask; + wt_file->count = 0; + list_add(&wt_file->list, &g_conf_head); + return true; +} + +static int get_file_and_mask_from_config(const char *line, char *file_path, int size, unsigned long *mask) +{ + int ret; + char str_mask[MAX_MASK_LEN] = {0}; + char *tmp = NULL; + unsigned long wt_mask; + + ret = sscanf_s(line, "%s %s", file_path, size, str_mask, sizeof(str_mask)); + if (ret == -1) { + log_printf(LOG_ERR, "Get path and mask failed [%d]", errno); + return -1; + } + + if (!strlen(file_path) || strlen(file_path) >= MAX_PATH_LEN - 1) { + log_printf(LOG_INFO, + "The path can't be recognised. The path length should be less than 4096 characters. error."); + return -1; + } + + if (strlen(str_mask) > 0) { + /* use hex conversion */ + wt_mask = (unsigned long)strtol(str_mask, &tmp, STRTOL_HEX_NUMBER_BASE); + if (wt_mask & 0xFFFFFCFF) { + log_printf(LOG_INFO, "Mask is %s, it is more than add and delete, error.", str_mask); + return -1; + } + } else { + wt_mask = 0x200; + } + + *mask = wt_mask; + return 0; +} + +static int parse_from_file_path(const char *file_path, unsigned int file_size, char *tmp_path, unsigned int size) +{ + int ret; + unsigned int i; + int j = 0; + size_t len; + + ret = memcpy_s(tmp_path, size, file_path, file_size); + if (ret) { + log_printf(LOG_ERR, "filemonitor parse_line: memcpy_s tmp_path failed, ret: %d", ret); + return -1; + } + if (strstr(file_path, "//") != NULL) { + ret = memset_s(tmp_path, size, 0, size); + if (ret) { + log_printf(LOG_ERR, "filemonitor parse_line: memset_s tmp_path failed, ret: %d", ret); + return -1; + } + len = strlen(file_path); + for (i = 0; i < len; i++) { + if (file_path[i] == '/' && file_path[i + 1] == '/') { + continue; + } else { + tmp_path[j++] = file_path[i]; + } + } + } + + /* remove last /, realpath results do not include last / */ + if (strlen(tmp_path) > 0 && strcmp(tmp_path, "/") != 0 && tmp_path[strlen(tmp_path) - 1] == '/') { + tmp_path[strlen(tmp_path) - 1] = '\0'; + } + return 0; +} + +static int check_tmpfs_dir(const char *tmp_path, const char *file_path, unsigned long *mask) +{ + /* check /proc/ /sys/ /dev/ */ + if (!memcmp(tmp_path, "/proc/", strlen("/proc/")) || + !memcmp(tmp_path, "/sys/", strlen("/sys/")) || !memcmp(tmp_path, "/dev/", strlen("/dev/"))) { + log_printf(LOG_INFO, "(/proc /sys /dev)file %s no need to monitor.", file_path); + return -1; + } + + /* check /var/log/ only monitor delete event */ + if (!memcmp(tmp_path, "/var/log/", strlen("/var/log/"))) { + log_printf(LOG_INFO, "Watch path is in /var/log, watch %s for only delete event", file_path); + *mask = 0x200; + } + return 0; +} + +static int parse_line(const char *line) +{ + char file_path[MAX_PATH_LEN] = {0}; + char real_path[PATH_MAX] = {0}; + char tmp_path[MAX_PATH_LEN] = {0}; + unsigned long wt_mask; + int ret; + + while (*line == ' ' || *line == '\t') { + line++; + continue; + } + + if (*line == '#') { + return 0; + } + + if (*line == '\0') { + return 0; + } + + ret = get_file_and_mask_from_config(line, file_path, MAX_PATH_LEN, &wt_mask); + if (ret != 0) { + return -1; + } + + ret = parse_from_file_path(file_path, MAX_PATH_LEN, tmp_path, MAX_PATH_LEN); + if (ret != 0) { + return -1; + } + + if (!access(file_path, F_OK)) { + if (realpath(file_path, real_path) == NULL) { + log_printf(LOG_ERR, "realpath error [%d]", errno); + return -1; + } + if (!strlen(tmp_path) || (strcmp(real_path, tmp_path) && strcmp(real_path, file_path))) { + log_printf(LOG_ERR, "%s should be absolute path.", file_path); + return -1; + } + } + + /* add / to last to check /proc/ /sys/ /dev/ and /var/log/ */ + if (tmp_path[strlen(tmp_path) - 1] != '/') { + tmp_path[strlen(tmp_path)] = '/'; + } + + ret = check_tmpfs_dir(tmp_path, file_path, &wt_mask); + if (ret == -1) { + return 0; + } + + if (add_file_monitor(file_path, tmp_path, real_path, wt_mask) == false) { + return -1; + } + + return 0; +} + +static void parse_conf(FILE *fp) +{ + char conf_line[MAX_LINE_LEN] = {0}; + + for (;;) { + if (!fgets(conf_line, MAX_LINE_LEN, fp)) { + break; + } + if (strlen(conf_line) == 1 || strlen(conf_line) == 0) { + continue; + } + if (conf_line[strlen(conf_line) - 1] == '\n') { + conf_line[strlen(conf_line) - 1] = '\0'; + } + if (strlen(conf_line) >= (MAX_LINE_LEN - 1)) { + log_printf(LOG_INFO, "Config file line len is invalid. [%s]", conf_line); + continue; + } + if (parse_line(conf_line)) { + log_printf(LOG_ERR, "Parse line error. [%s]", conf_line); + } + } + return; +} + +static int fm_load_config(void) +{ + FILE *fp = NULL; + struct dirent *entry_dirent = NULL; + int config_fd = 0; + DIR *dirp = NULL; + int ret = -1; + char cfg_full_name[FM_MAX_CFG_NAME_LEN + sizeof(FM_MONITOR_CONFIG_DIR)] = {0}; + + fp = fopen(FM_MONITOR_CONF, "r"); + if (fp != NULL) { + parse_conf(fp); + (void)fclose(fp); + ret = 0; + } + + dirp = opendir(FM_MONITOR_CONFIG_DIR); + if (dirp == NULL) { + log_printf(LOG_INFO, "%s not exist", FM_MONITOR_CONFIG_DIR); + return ret; + } + + for (;;) { + entry_dirent = readdir(dirp); + if (entry_dirent == NULL) { + break; + } + + if (strlen(entry_dirent->d_name) >= FM_MAX_CFG_NAME_LEN) { + log_printf(LOG_ERR, "file monitor:config file name is too long. file: %s", entry_dirent->d_name); + continue; + } + ret = memset_s(cfg_full_name, sizeof(cfg_full_name), 0, sizeof(cfg_full_name)); + if (ret != 0) { + log_printf(LOG_ERR, "fm_load_config memset_s cfg_full_name error [%d]", ret); + continue; + } + ret = snprintf_s(cfg_full_name, sizeof(cfg_full_name), sizeof(cfg_full_name) - 1, + "%s%s", FM_MONITOR_CONFIG_DIR, entry_dirent->d_name); + if (ret < 0) { + log_printf(LOG_ERR, "fm_load_config snprintf_s cfg_full_name error [%d]", ret); + continue; + } + + fp = open_cfgfile(cfg_full_name, &config_fd); + if (fp == NULL) { + continue; + } + + parse_conf(fp); + (void)fclose(fp); + } + (void)closedir(dirp); + + return 0; +} + +static int open_inotify_fd(void) +{ + int fd = -1; + int ret; + + g_watched_items = 0; + g_save_process = false; + fd = inotify_init1(IN_CLOEXEC); + if (fd < 0) { + log_printf(LOG_ERR, "Init file monitor thread error [%d]", errno); + return fd; + } + + ret = ioctl(fd, INOTIFY_IOC_SET_SYSMONITOR_FM); + if (ret == 0) { + log_printf(LOG_INFO, "ioctl set inotify save process info success."); + g_save_process = true; + } + return fd; +} + +/* Close the open file descriptor that was opened with inotify_init() */ +static void close_inotify_fd(int fd) +{ + if (fd < 0) { + return; + } + if (close(fd) < 0) { + log_printf(LOG_ERR, "Close file monitor thread error [%d]", errno); + } + + g_watched_items = 0; +} + +struct event_msg { + unsigned int flag; + const char *name; +}; + +static const struct event_msg g_event_msg[] = { + { IN_DELETE, "deleted" }, + { IN_CREATE, "added" }, +}; + +static int set_event_msg(const queue_entry *event, char *msg, size_t size, const char *file_path) +{ + int ret; + unsigned int i; + unsigned int flag = event->inot_ev.mask & (IN_ALL_EVENTS | IN_UNMOUNT | IN_Q_OVERFLOW | IN_IGNORED); + bool b_dir = (event->inot_ev.mask & IN_ISDIR) ? true : false; + + for (i = 0; i < array_size(g_event_msg); i++) { + if (g_event_msg[i].flag == flag) { + if (!g_save_process) { + ret = snprintf_s(msg, size, size - 1, "%s \"%s\" under \"%s\" was %s.", + b_dir ? "Subdir" : "Subfile", event->inot_ev.name, file_path, g_event_msg[i].name); + } else { + ret = snprintf_s(msg, size, size - 1, + "%s \"%s\" under \"%s\" was %s, comm: %s[%d], parent comm: %s[%d]", + b_dir ? "Subdir" : "Subfile", event->inot_ev.name, file_path, g_event_msg[i].name, + event->info.comm, event->info.pid, event->info.parent_comm, event->info.parent_pid); + } + if (ret == -1) { + log_printf(LOG_ERR, "snprintf_s event[%u] msg failed.", flag); + } + return ret; + } + } + return -1; +} + +static int handle_del_self_and_ignore(const queue_entry *event, fqueue_entry *conf, + char *alarm_msg, size_t size) +{ + int ret; + + if (!access(conf->file_path, F_OK)) { + /* file exist, log info and add watch again */ + if (!g_save_process) { + ret = snprintf_s(alarm_msg, size, size - 1, + "File \"%s\" was deleted. It's maybe changed", conf->file_path); + } else { + ret = snprintf_s(alarm_msg, size, size - 1, + "File \"%s\" was deleted. It's maybe changed. comm: %s[%d], parent comm: %s[%d]", + conf->file_path, event->info.comm, event->info.pid, event->info.parent_comm, event->info.parent_pid); + } + conf->flag = false; + g_watch_flag = false; + } else { + if (!g_save_process) { + ret = snprintf_s(alarm_msg, size, size - 1, + "File \"%s\" was deleted", conf->file_path); + } else { + ret = snprintf_s(alarm_msg, size, size - 1, + "File \"%s\" was deleted. comm: %s[%d], parent comm: %s[%d]", conf->file_path, + event->info.comm, event->info.pid, event->info.parent_comm, event->info.parent_pid); + } + } + return ret; +} + +static void handle_event(const queue_entry *event) +{ + char alarm_msg[PARAS_LEN] = {0}; + int cur_event_wd = event->inot_ev.wd; + fqueue_entry *conf = find_queue(cur_event_wd); + + if (conf == NULL) { + log_printf(LOG_ERR, "Monitor a event not in conf file, the wd is %d", cur_event_wd); + return; + } + + switch (event->inot_ev.mask & (IN_ALL_EVENTS | IN_UNMOUNT | IN_Q_OVERFLOW | IN_IGNORED)) { + case IN_DELETE: + case IN_CREATE: + set_event_msg(event, alarm_msg, sizeof(alarm_msg), conf->file_path); + break; + + case IN_DELETE_SELF: + case IN_IGNORED: + handle_del_self_and_ignore(event, conf, alarm_msg, sizeof(alarm_msg)); + break; + + default: + snprintf_s((char *)alarm_msg, sizeof(alarm_msg), + sizeof(alarm_msg) - 1, "UNKNOWN EVENT on \"%s\".", conf->file_path); + break; + } + log_printf(LOG_INFO, "%s", alarm_msg); +} + +static void handle_events(void) +{ + queue_entry *f_event = NULL; + queue_entry *next = NULL; + int count = 0; + + list_for_each_entry_safe(f_event, next, &g_event_head, list) { + log_printf(LOG_INFO, "%dth event handled", ++count); + handle_event(f_event); + list_del(&f_event->list); + free(f_event); + } +} +static int check_size(size_t event_size, size_t q_event_size, size_t buffer_i) +{ + if (event_size == 0 || q_event_size == 0) { + log_printf(LOG_INFO, "read_events: event_size or q_event_size is not right."); + return RET_BREAK; + } + if (event_size > EVENT_BUF - 1 - buffer_i) { + log_printf(LOG_INFO, "read_events: not enough buffer for event."); + return RET_BREAK; + } + + return RET_SUCCESS; +} + +static int add_event_list(const char *buffer, ssize_t r) +{ + size_t buffer_i = 0; + struct inotify_event *pevent = NULL; + queue_entry *event = NULL; + size_t event_size, q_event_size; + size_t info_size = sizeof(inotify_event_process_info); + int count = 0; + int ret; + + while (buffer_i < (size_t)r) { + pevent = (struct inotify_event *)&buffer[buffer_i]; + event_size = offsetof(struct inotify_event, name) + pevent->len; + q_event_size = offsetof(struct _queue_entry, inot_ev.name) + pevent->len; + ret = check_size(event_size, q_event_size, buffer_i); + if (ret == RET_BREAK) { + break; + } + event = malloc(q_event_size); + if (event == NULL) { + log_printf(LOG_ERR, "event malloc error!\n"); + break; + } + ret = memset_s(event, q_event_size, 0, q_event_size); + if (ret) { + log_printf(LOG_ERR, "read_events: memset_s event failed, ret: %d", ret); + free(event); + break; + } + ret = memcpy_s(&(event->inot_ev), event_size, pevent, event_size); + if (ret) { + log_printf(LOG_ERR, "read_events: memcpy_s event inot_ev failed, ret: %d", ret); + free(event); + break; + } + buffer_i += event_size; + if (!g_save_process) { + list_add(&event->list, &g_event_head); + count++; + continue; + } + if (info_size > EVENT_BUF - 1 - buffer_i) { + log_printf(LOG_INFO, "read_events: not enough buffer for event process info."); + free(event); + break; + } + ret = memcpy_s(&(event->info), info_size, (inotify_event_process_info *)&buffer[buffer_i], info_size); + if (ret) { + log_printf(LOG_ERR, "read_events: memcpy_s event info failed, ret: %d", ret); + free(event); + break; + } + buffer_i += info_size; + list_add(&event->list, &g_event_head); + count++; + } + + if (count > 0) { + log_printf(LOG_INFO, "%d events queued", count); + } + + return count; +} + +static int read_events(int fd) +{ + char *buffer = NULL; + ssize_t r; + int ret; + + buffer = malloc(EVENT_BUF); + if (buffer == NULL) { + log_printf(LOG_ERR, "buffer malloc error!\n"); + return -1; + } + ret = memset_s(buffer, EVENT_BUF, 0, EVENT_BUF); + if (ret) { + log_printf(LOG_ERR, "read_events: memset_s buffer failed, ret: %d", ret); + free(buffer); + return -1; + } + r = read(fd, buffer, EVENT_BUF); + if (r <= 0) { + free(buffer); + return (int)r; + } + + ret = add_event_list(buffer, r); + free(buffer); + buffer = NULL; + return ret; +} + +static int event_check(int fd) +{ + fd_set rfds; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + struct timeval timeout; + + timeout.tv_sec = g_select_timeout; + timeout.tv_usec = 0; + return select(FD_SETSIZE, &rfds, NULL, NULL, &timeout); +} + +static int watch_dir(int fd, const char *dirname, unsigned long mask) +{ + int wd; + + wd = inotify_add_watch(fd, dirname, (unsigned int)mask); + if (wd <= 0) { + if (fflush(stdout) == EOF) { + log_printf(LOG_INFO, "fflush failed, check filesystem"); + } + } else { + g_watched_items++; + } + + return wd; +} + +static void fm_add_watch(int fd) +{ + int wd; + int i_fd = fd; + fqueue_entry *entry = NULL; + + g_watch_flag = true; + list_for_each_entry(entry, &g_conf_head, list) { + if (entry->flag) { + continue; + } + wd = watch_dir(i_fd, entry->file_path, entry->wt_mask); + if (wd > 0) { + entry->wd = wd; + entry->flag = true; + log_printf(LOG_INFO, "file name is \"%s\", watch event is 0x%lX", entry->file_path, entry->wt_mask); + entry->count = 0; + } else { + if (entry->count < FILE_WATCH_MAX_FAIL_NUM) { + log_printf(LOG_INFO, "Cannot add watch for \"%s\" with event mask 0x%lX", + entry->file_path, entry->wt_mask); + } + g_watch_flag = false; + entry->count++; + } + } +} + +static int handle_filemonitor_reload(int *inotify_fd) +{ + int fd = *inotify_fd; + + if (!get_thread_item_reload_flag(FILE_ITEM)) { + return 0; + } + + set_thread_item_reload_flag(FILE_ITEM, false); + log_printf(LOG_INFO, "Conf file is modified, reload conf and watch again."); + close_inotify_fd(fd); + fd = open_inotify_fd(); + if (fd <= 0) { + *inotify_fd = -1; + return -1; + } + + free_fqueue(); + + if (fm_load_config()) { + log_printf(LOG_INFO, "Reload file monitor configuration failed."); + } + + fm_add_watch(fd); + if (g_watched_items == 0) { + log_printf(LOG_INFO, "No watcher add to FD."); + } + + *inotify_fd = fd; + return 0; +} + +static void *file_monitor_start(void *arg) +{ + int inotify_fd = -1; + + /* prctl does not return false if arg2 is right when arg1 is PR_SET_NAME */ + (void)prctl(PR_SET_NAME, "monitor-file"); + log_printf(LOG_INFO, "file monitor starting up"); + + set_thread_item_reload_flag(FILE_ITEM, false); + inotify_fd = open_inotify_fd(); + if (inotify_fd <= 0) { + return NULL; + } + init_list_head(&g_conf_head); + init_list_head(&g_event_head); + + if (fm_load_config()) { + log_printf(LOG_INFO, "load file monitor configuration failed"); + } + fm_add_watch(inotify_fd); + if (g_watched_items == 0) { + log_printf(LOG_INFO, "No watcher add to FD"); + } + for (;;) { + if (handle_filemonitor_reload(&inotify_fd) == -1) { + break; + } + + if (g_watch_flag == false) { + fm_add_watch(inotify_fd); + } + + if (event_check(inotify_fd) > 0) { + int r; + + r = read_events(inotify_fd); + if (r < 0) { + break; + } else { + handle_events(); + } + } else { + continue; + } + } + + close_inotify_fd(inotify_fd); + free_fqueue(); + return NULL; +} + +void file_monitor_init(void) +{ + pthread_t tid; + + if (pthread_create(&tid, NULL, file_monitor_start, NULL)) { + log_printf(LOG_ERR, "create file monitor thread error [%d]", errno); + return; + } + set_thread_item_tid(FILE_ITEM, tid); +} diff --git a/sysmonitor-1.3.2/src/filemonitor.h b/sysmonitor-1.3.2/src/filemonitor.h new file mode 100644 index 0000000000000000000000000000000000000000..8f006dc2e792df70c5f8d9c8671211096f2663b1 --- /dev/null +++ b/sysmonitor-1.3.2/src/filemonitor.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: define variable, structure and function for file monitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#ifndef __FILE_MONITOR_H +#define __FILE_MONITOR_H + +#include +#include +#include +#include +#include + +#include "common.h" + +#define FM_MONITOR_CONF "/etc/sysmonitor/file" +#define FM_MONITOR_CONFIG_DIR "/etc/sysmonitor/file.d/" +#define FM_MAX_CFG_NAME_LEN 128 +/* PATH_MAX 4096, security function requires less 1 bit */ +#define MAX_PATH_LEN 4097 +/* PATH_MAX 4096, operation code and blank */ +#define MAX_LINE_LEN 4116 +#define MAX_MASK_LEN 16 +#define EVENT_BUF 16384 + +typedef struct _fqueue_entry { + struct list_head list; + char file_path[MAX_PATH_LEN]; + unsigned long wt_mask; + int wd; + bool flag; + unsigned int count; /* counts of watch failed */ +} fqueue_entry; + +typedef struct _inotify_event_process_info { + int pid; + int parent_pid; + char comm[TASK_COMM_LEN]; + char parent_comm[TASK_COMM_LEN]; +} inotify_event_process_info; + +typedef struct _queue_entry { + struct list_head list; + inotify_event_process_info info; + struct inotify_event inot_ev; +} queue_entry; + +void file_monitor_init(void); +void set_file_monitor_select_timeout(int timeout); + +#endif diff --git a/sysmonitor-1.3.2/src/fsmonitor.c b/sysmonitor-1.3.2/src/fsmonitor.c new file mode 100644 index 0000000000000000000000000000000000000000..521b746ef7cd0c67e50e3717037ae502fbcf5624 --- /dev/null +++ b/sysmonitor-1.3.2/src/fsmonitor.c @@ -0,0 +1,301 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: ext3/ext4 file system monitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#include "fsmonitor.h" + +#include +#include +#include +#include + +#include +#include + +#include +#include "common.h" +#include "monitor_thread.h" + +static struct nlmsghdr *g_nlh; +static int g_sockfd = -1; + +static void resume_ext_alarm(void) +{ + int ret, i, j, start; + char out[PATH_MAX] = {0}; + char dev[PATH_MAX] = {0}; + char devpath[FILE_SYSTEM_LENTH] = {0}; + const int timeout = 3; + + ret = monitor_popen("cat /proc/mounts | grep -E -w 'ext3|ext4' |awk '{print $1}'", + out, sizeof(out), timeout, NULL); + if (ret < 0) { + log_printf(LOG_ERR, "get system mounts failed"); + return; + } + + start = 0; + out[PATH_MAX - 1] = 0; + + /* search for dev name and report resume */ + for (i = 0; out[i] != 0; i++) { + if (out[i] != '\n') { + continue; + } + + if (i == 0) { + log_printf(LOG_INFO, "no ext disk found"); + break; + } + + /* so here is the end of line, get this line for one dev */ + j = i - start; + if (j >= FILE_SYSTEM_LENTH) { + log_printf(LOG_ERR, "dev name too long ,something error"); + return; + } else if (j <= 0) { + continue; + } + + ret = memcpy_s(devpath, FILE_SYSTEM_LENTH, out + start, (unsigned int)j); + if (ret != 0) { + log_printf(LOG_ERR, "resume ext alarm memcpy_s error [%d]", ret); + } + devpath[j] = 0; + start = i + 1; + + /* chang /dev/mapper/xxx to /dev/dm-xx for real path */ + ret = memset_s(dev, sizeof(dev), 0, sizeof(dev)); + if (ret != 0) { + log_printf(LOG_ERR, "resume ext alarm memset_s error [%d]", ret); + } + if (realpath(devpath, dev) == NULL) { + log_printf(LOG_ERR, "get real path for %s failed", devpath); + continue; + } + + /* + * we get the dev name, record resume log , + * dev name is /dev/dm-x or /dev/sdx, simply cut first 5 char for name only + */ + log_printf(LOG_INFO, "%s ext-fs resume.", dev + FIRST_FIVE_DEV_CHAR); + } + + return; +} + +static void resume_alarm(void) +{ + char out[MAX_TEMPSTR] = {0}; + int ret; + const unsigned int sleep_time = 10; + const int timeout = 3; + + ret = monitor_popen("systemctl is-system-running", out, sizeof(out), timeout, NULL); + if (ret < 0) { + log_printf(LOG_ERR, "get system status error"); + return; + } + /* do not resume alarm if system is already started */ + if (strstr(out, "running") || strstr(out, "degraded")) { + log_printf(LOG_INFO, "do not resume alarm if system is already started"); + return; + } + (void)sleep(sleep_time); + resume_ext_alarm(); + + return; +} + +static void clean_res(void) +{ + if (g_nlh != NULL) { + free(g_nlh); + g_nlh = NULL; + } + if (g_sockfd >= 0) { + (void)close(g_sockfd); + g_sockfd = -1; + } +} + +static int set_sockfd(void) +{ + int ret; + struct sockaddr_nl local; + + g_sockfd = socket(PF_NETLINK, (int)SOCK_RAW | SOCK_CLOEXEC, NETLINK_FILESYSTEM); + if (g_sockfd < 0) { + if (errno == EPROTONOSUPPORT) { + set_thread_item_tid(FS_ITEM, 0); + log_printf(LOG_INFO, "the kernel do not support filesystem monitor"); + } else { + log_printf(LOG_INFO, "create NETLINK_FILESYSTEM socket failed [%d]", errno); + } + goto err; + } + + ret = memset_s(&local, sizeof(local), 0, sizeof(local)); + if (ret != 0) { + log_printf(LOG_ERR, "fs_monitor_ext3_4: memset_s local failed, ret: %d", ret); + goto err; + } + + local.nl_family = PF_NETLINK; + local.nl_pid = (unsigned int)getpid(); + local.nl_groups = FS_ERROR_GRP_EXT3; + + if (bind(g_sockfd, (struct sockaddr *)&local, sizeof(local))) { + log_printf(LOG_ERR, "bind NETLINK_FILESYSTEM socket failed [%d]", errno); + goto err; + } + + return 0; + +err: + if (g_sockfd >= 0) { + (void)close(g_sockfd); + g_sockfd = -1; + } + return -1; +} + +static int alloc_for_nlmsghdr(void) +{ + int ret; + + g_nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(sizeof(struct ext4_err_msg))); + if (g_nlh == NULL) { + log_printf(LOG_ERR, "NETLINK_FILESYSTEM: can not allocate err_msg!"); + return -1; + } + + ret = memset_s(g_nlh, NLMSG_SPACE(sizeof(struct ext4_err_msg)), 0, NLMSG_SPACE(sizeof(struct ext4_err_msg))); + if (ret != 0) { + log_printf(LOG_ERR, "fs_monitor_ext3_4: memset_s nlh failed, ret: %d", ret); + free(g_nlh); + g_nlh = NULL; + return -1; + } + + g_nlh->nlmsg_len = (unsigned int)NLMSG_SPACE(sizeof(struct ext4_err_msg)); + g_nlh->nlmsg_pid = pthread_self() << THREAD_PID_OFFSET | (unsigned int)getpid(); + g_nlh->nlmsg_flags = 0; + + return 0; +} + +static int handle_fs_monitor_msg(void) +{ + struct msghdr msg; + struct iovec iov; + int ret; + ssize_t recv_ret; + struct ext4_err_msg *err_msg = NULL; + + ret = memset_s(&iov, sizeof(iov), 0, sizeof(iov)); + if (ret != 0) { + log_printf(LOG_ERR, "fs_monitor_ext3_4: memset_s iov failed, ret: %d", ret); + return -1; + } + + iov.iov_base = (void *)g_nlh; + iov.iov_len = g_nlh->nlmsg_len; + + ret = memset_s(&msg, sizeof(msg), 0, sizeof(msg)); + if (ret != 0) { + log_printf(LOG_ERR, "fs_monitor_ext3_4: memset_s msg failed, ret: %d", ret); + return -1; + } + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + for (;;) { + recv_ret = recvmsg(g_sockfd, &msg, 0); + if (recv_ret < 0) { + if (errno != EINTR) { + log_printf(LOG_ERR, "recvmsg from NETLINK_FILESYSTEM socket failed [%d]", errno); + return -1; + } + continue; + } + + err_msg = (struct ext4_err_msg *)NLMSG_DATA(g_nlh); + if (err_msg != NULL && (err_msg->magic == EXT3_ERROR_MAGIC || err_msg->magic == EXT4_ERROR_MAGIC)) { + if (err_msg->s_flags & MS_RDONLY) { + log_printf(LOG_INFO, "%s filesystem error. Remount filesystem read-only.", err_msg->s_id); + } else { + log_printf(LOG_ERR, "fs_monitor_ext3_4: %s filesystem error. flag is %lu.", err_msg->s_id, err_msg->s_flags); + } + } + } + return 0; +} + +static void fs_monitor_ext3_4(void) +{ + int ret; + + /* prctl does not return false if arg2 is right when arg1 is PR_SET_NAME */ + (void)prctl(PR_SET_NAME, "monitor-fs"); + log_printf(LOG_INFO, "filesystem monitor starting up"); + + ret = set_sockfd(); + if (ret != 0) { + goto err; + } + + ret = alloc_for_nlmsghdr(); + if (ret) { + goto err; + } + + ret = handle_fs_monitor_msg(); + if (ret != 0) { + goto err; + } + +err: + clean_res(); +} + +static void *fs_monitor_start(void *arg) +{ + int ret; + + ret = set_thread_check_value(THREAD_FS_ITEM, true, FILE_SYSTEM_PERIOD); + if (ret == -1) { + log_printf(LOG_ERR, "file system monitor set check flag or period error"); + return NULL; + } + resume_alarm(); + ret = feed_thread_status_count(THREAD_FS_ITEM); + if (ret == -1) { + log_printf(LOG_ERR, "file system monitor feed error"); + return NULL; + } + fs_monitor_ext3_4(); + return NULL; +} + +void fs_monitor_init(void) +{ + pthread_t tid; + + if (pthread_create(&tid, NULL, fs_monitor_start, NULL)) { + log_printf(LOG_ERR, "create file system monitor thread error [%d]", errno); + return; + } + set_thread_item_tid(FS_ITEM, tid); +} diff --git a/sysmonitor-1.3.2/src/fsmonitor.h b/sysmonitor-1.3.2/src/fsmonitor.h new file mode 100644 index 0000000000000000000000000000000000000000..828d51868e1598db3911e8bd7e6a6e77000b0950 --- /dev/null +++ b/sysmonitor-1.3.2/src/fsmonitor.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: define variable, structure and function for ext3/ext4 file system monitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#ifndef EX3_NETLINK_H +#define EX3_NETLINK_H + +#define EXT4_ERROR_MAGIC 0xAE43125U +#define EXT3_ERROR_MAGIC 0xAE32014U +#define NETLINK_FILESYSTEM 28 + +#define FILE_SYSTEM_LENTH 64 +#define FIRST_FIVE_DEV_CHAR 5 +#define FILE_SYSTEM_PERIOD 20 + +enum fs_error_group { + FS_ERROR_GRP_EXT3 = 1 +}; + +/* this struct same as ext4_err_msg in kernel fs/ext4/ext4.h */ +struct ext4_err_msg { + unsigned int magic; + char s_id[32]; + unsigned long s_flags; + int ext4_errno; +}; + +void fs_monitor_init(void); + +#endif diff --git a/sysmonitor-1.3.2/src/monitor_thread.c b/sysmonitor-1.3.2/src/monitor_thread.c new file mode 100644 index 0000000000000000000000000000000000000000..d15efa9eb4423a98fe49d9ca20e048aa703985a7 --- /dev/null +++ b/sysmonitor-1.3.2/src/monitor_thread.c @@ -0,0 +1,561 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: monitors the running status of each subthread. + * Author: zhangguangzhi + * Create: 2020-9-17 + */ + + +#include "monitor_thread.h" + +#include + +static thread_status g_thread_status[THREAD_MONITOR_ITEMS_CNT]; +static struct list_head g_thread_ps_parallel_head; +static bool g_check_thread_monitor = true; +static unsigned int g_max_failure_num = CHECK_THREAD_FAILURE_NUM; +static pthread_mutex_t g_parallel_mtx = PTHREAD_MUTEX_INITIALIZER; + +void init_ps_parallel_head(void) +{ + init_list_head(&g_thread_ps_parallel_head); +} + +bool check_thread_monitor(const char *item, const char *value) +{ + if (strcmp(value, "on") == 0) { + g_check_thread_monitor = true; + } else if (strcmp(value, "off") == 0) { + g_check_thread_monitor = false; + } else { + log_printf(LOG_ERR, "item:[%s] set value error", item); + return false; + } + return true; +} + +bool check_thread_failure_num(const char *item, const char *value) +{ + g_max_failure_num = (unsigned int)strtol(value, NULL, STRTOL_NUMBER_BASE); + if (check_int(value) == false || g_max_failure_num < CHECK_THREAD_FAILURE_NUM_MIN || + g_max_failure_num > CHECK_THREAD_FAILURE_NUM_MAX) { + log_printf(LOG_ERR, "item:[%s] set check_thread_failure_num error", item); + return false; + } + return true; +} + + +int thread_status_struct_init(void) +{ + int ret; + + if (!g_check_thread_monitor) { + return 0; + } + + ret = memset_s(g_thread_status, sizeof(thread_status) * THREAD_MONITOR_ITEMS_CNT, 0, + sizeof(thread_status) * THREAD_MONITOR_ITEMS_CNT); + if (ret != 0) { + log_printf(LOG_ERR, "thread status init memset_s error, ret:%d", ret); + return -1; + } + init_ps_parallel_head(); + return 0; +} + +/* if monitor is off, need to clear count, check_num and check_failure_num + * for next check + */ +void clear_thread_status(monitor_thread_item item) +{ + if (!g_check_thread_monitor) { + return; + } + + if (item >= THREAD_MONITOR_ITEMS_CNT) { + log_printf(LOG_ERR, "clear thread status error, item:%d", item); + return; + } + + g_thread_status[item].count = 0; + g_thread_status[item].check_num = 0; + g_thread_status[item].check_failure_num = 0; + return; +} + +void clear_all_thread_status(void) +{ + monitor_thread_item item; + if (!g_check_thread_monitor) { + return; + } + + for (item = 0; item < THREAD_MONITOR_ITEMS_CNT; item++) { + g_thread_status[item].check_num = 0; + g_thread_status[item].check_failure_num = 0; + /* file system only feed once, don't clear it's count */ + if (item != THREAD_FS_ITEM) { + g_thread_status[item].count = 0; + } + } + return; +} + +/* + * feed thread status, item's count + 1, + * return: -1:error; 0: feed success or no need to feed + */ +int feed_thread_status_count(monitor_thread_item item) +{ + if (!g_check_thread_monitor) { + return 0; + } + + if (item >= THREAD_MONITOR_ITEMS_CNT) { + log_printf(LOG_ERR, "feed thread status count error, item:%d", item); + return -1; + } + + if (!g_thread_status[item].check_flag) { + return 0; + } + g_thread_status[item].count += 1; + return 0; +} + +/* clear the thread status when the check_flag changes. */ +static void check_flag_and_clear(monitor_thread_item item, bool new_flag) +{ + /* if check_flag change form true to false, need to clear */ + if (!new_flag && g_thread_status[item].check_flag) { + clear_thread_status(item); + } +} + +int set_thread_status_check_flag(monitor_thread_item item, bool flag) +{ + if (!g_check_thread_monitor) { + return 0; + } + + if (item >= THREAD_MONITOR_ITEMS_CNT) { + log_printf(LOG_ERR, "set thread status check flag error, item:%d", item); + return -1; + } + + /* clear the thread status when check_flag change */ + check_flag_and_clear(item, flag); + g_thread_status[item].check_flag = flag; + return 0; +} + +int set_thread_status_period(monitor_thread_item item, unsigned int period) +{ + if (!g_check_thread_monitor) { + return 0; + } + + if (item >= THREAD_MONITOR_ITEMS_CNT) { + log_printf(LOG_ERR, "set thread status period error, item:%d", item); + return -1; + } + g_thread_status[item].period = period; + return 0; +} + +int set_thread_check_value(monitor_thread_item item, bool flag, unsigned int period) +{ + int ret; + + if (!g_check_thread_monitor) { + return 0; + } + + ret = set_thread_status_check_flag(item, flag); + if (ret == -1) { + return -1; + } + + ret = set_thread_status_period(item, period); + if (ret == -1) { + return -1; + } + return 0; +} + +/* + * check thread item count and failure time + * return 0:success, -1: error,need to restart sysmonitor + */ +static int check_thread_is_normal(monitor_thread_item item) +{ + unsigned int count; + + count = g_thread_status[item].count; + if (count == 0) { + /* failed to feed dog */ + g_thread_status[item].check_failure_num += 1; + g_thread_status[item].check_num += 1; + } else { + /* success to feed dog, clear the status for next check */ + clear_thread_status(item); + return 0; + } + + if (g_thread_status[item].check_failure_num >= g_max_failure_num) { + log_printf(LOG_ERR, "need to restart sysmonitor, item:[%d] failure:[%u], count:[%u], max failure num:[%u]", + item, g_thread_status[item].check_failure_num, g_thread_status[item].count, g_max_failure_num); + return -1; + } + + return 0; +} + +/* check the specified item */ +static int check_thread_item_status(monitor_thread_item item) +{ + int ret = 0; + unsigned int check_num; + unsigned int check_period; + + check_period = g_thread_status[item].period * (g_thread_status[item].check_failure_num + 1); + /* if check period is 0, return for next check */ + if (check_period == 0) { + return 0; + } + /* exclude check_num is 0 */ + check_num = (g_thread_status[item].check_num + 1) * SYSMONITOR_PERIOD; + if (check_num >= check_period) { + ret = check_thread_is_normal(item); + } else { + g_thread_status[item].check_num += 1; + } + return ret; +} + +static int check_fs_failure_time(monitor_thread_item item) +{ + g_thread_status[item].check_failure_num += 1; + log_printf(LOG_INFO, "fs check status failed, item[%d] failure:[%u], count:[%u], max failure num:[%u]", + item, g_thread_status[item].check_failure_num, g_thread_status[item].count, g_max_failure_num); + if (g_thread_status[item].check_failure_num >= g_max_failure_num) { + log_printf(LOG_ERR, "fs need to restart sysmonitor, item[%u] failure:[%u], count[%u]", + item, g_thread_status[item].check_failure_num, g_thread_status[item].count); + return -1; + } + return 0; +} + +/* check fs item status */ +static int check_thread_fs_item_status(monitor_thread_item item) +{ + unsigned int count; + unsigned int check_num; + unsigned int check_period; + int ret; + + check_period = g_thread_status[item].period * (g_thread_status[item].check_failure_num + 1); + /* if check period is 0, return for next check */ + if (check_period == 0) { + return 0; + } + + count = g_thread_status[item].count; + /* check success, don't need to check the item, set check_flag false and clear status */ + if (count > 0) { + log_printf(LOG_INFO, "item[%d] check thread status success", item); + ret = set_thread_status_check_flag(item, false); + if (ret == -1) { + log_printf(LOG_INFO, "item[%d] check thread set flag error", item); + return -1; + } + return 0; + } + + check_num = (g_thread_status[item].check_num + 1) * SYSMONITOR_PERIOD; + if (check_num >= check_period) { + ret = check_fs_failure_time(item); + if (ret == -1) { + return -1; + } + } + g_thread_status[item].check_num += 1; + return 0; +} + + +static int processs_parallel_add_node(pthread_t id) +{ + int ret; + thread_ps_parallel_status *tmp = NULL; + + tmp = malloc(sizeof(thread_ps_parallel_status)); + if (tmp == NULL) { + return -1; + } + ret = memset_s(tmp, sizeof(thread_ps_parallel_status), 0, sizeof(thread_ps_parallel_status)); + if (ret != 0) { + log_printf(LOG_ERR, "ps parallel add list memset_s tmp failed, ret: %d.", ret); + free(tmp); + return -1; + } + tmp->status.check_flag = true; + tmp->thread_id = id; + log_printf(LOG_INFO, "ps parallel add list set status, id:%lu, check_flag:%d", + tmp->thread_id, tmp->status.check_flag); + (void)pthread_mutex_lock(&g_parallel_mtx); + list_add(&tmp->list, &g_thread_ps_parallel_head); + (void)pthread_mutex_unlock(&g_parallel_mtx); + return 0; +} + +static int process_parallel_del_node(pthread_t id) +{ + thread_ps_parallel_status *tmp = NULL; + + (void)pthread_mutex_lock(&g_parallel_mtx); + list_for_each_entry(tmp, &g_thread_ps_parallel_head, list) { + if (tmp != NULL && tmp->thread_id == id) { + log_printf(LOG_INFO, "ps parallel del list status, id:%lu", id); + list_del(&tmp->list); + free(tmp); + (void)pthread_mutex_unlock(&g_parallel_mtx); + return 0; + } + } + (void)pthread_mutex_unlock(&g_parallel_mtx); + log_printf(LOG_ERR, "ps parallel del list status error, id:%lu", id); + return -1; +} + +/* + * ps parallel check item is dynamically added, + * when flag is true: need to malloc and add list; + * when flag is false: need to del list and free memory + * return 0:success, -1: error + */ +int set_ps_parallel_check_flag(monitor_thread_item item, bool flag, pthread_t id) +{ + int ret; + + if (!g_check_thread_monitor) { + return 0; + } + + if (item != THREAD_PS_PARALLEL_ITEM) { + log_printf(LOG_ERR, "set thread ps parallel check flag wrong item:%d, id:%lu", item, id); + return -1; + } + + if (flag) { + ret = processs_parallel_add_node(id); + if (ret == -1) { + return -1; + } + } else { + ret = process_parallel_del_node(id); + if (ret == -1) { + return -1; + } + } + + return 0; +} + +int feed_thread_ps_parallel_count(monitor_thread_item item, pthread_t id) +{ + thread_ps_parallel_status *tmp = NULL; + + if (!g_check_thread_monitor) { + return 0; + } + + if (item != THREAD_PS_PARALLEL_ITEM) { + log_printf(LOG_ERR, "feed thread ps parallel status wrong item:%d, id:%lu", item, id); + return -1; + } + + (void)pthread_mutex_lock(&g_parallel_mtx); + list_for_each_entry(tmp, &g_thread_ps_parallel_head, list) { + if (tmp != NULL && tmp->thread_id == id) { + tmp->status.count += 1; + (void)pthread_mutex_unlock(&g_parallel_mtx); + return 0; + } + } + (void)pthread_mutex_unlock(&g_parallel_mtx); + log_printf(LOG_ERR, "ps parallel id:[%lu] feed failed", id); + return -1; +} + +int set_thread_ps_parallel_period(monitor_thread_item item, pthread_t id, unsigned int period) +{ + thread_ps_parallel_status *tmp = NULL; + + if (!g_check_thread_monitor) { + return 0; + } + + if (item != THREAD_PS_PARALLEL_ITEM) { + log_printf(LOG_ERR, "set thread ps parallel period wrong item:%d, id:%lu", item, id); + return -1; + } + + (void)pthread_mutex_lock(&g_parallel_mtx); + list_for_each_entry(tmp, &g_thread_ps_parallel_head, list) { + if (tmp != NULL && tmp->thread_id == id) { + tmp->status.period = period; + (void)pthread_mutex_unlock(&g_parallel_mtx); + return 0; + } + } + (void)pthread_mutex_unlock(&g_parallel_mtx); + + log_printf(LOG_ERR, "ps parallel id:[%lu] set period[%u] failed", id, period); + return -1; +} + +int set_ps_parallel_check_value(monitor_thread_item item, bool flag, pthread_t id, unsigned int period) +{ + int ret; + + if (!g_check_thread_monitor) { + return 0; + } + + ret = set_ps_parallel_check_flag(item, flag, id); + if (ret == -1) { + return -1; + } + + ret = set_thread_ps_parallel_period(item, id, period); + if (ret == -1) { + return -1; + } + + return 0; +} + +static int check_thread_ps_parallel_status(void) +{ + thread_ps_parallel_status *tmp = NULL; + pthread_t id; + unsigned int count; + unsigned int check_num; + unsigned int check_period; + + (void)pthread_mutex_lock(&g_parallel_mtx); + list_for_each_entry(tmp, &g_thread_ps_parallel_head, list) { + if (tmp == NULL) { + continue; + } + + if (!tmp->status.check_flag) { + continue; + } + + check_period = tmp->status.period * (tmp->status.check_failure_num + 1); + if (check_period == 0) { + continue; + } + + /* check success */ + count = tmp->status.count; + if (count != 0) { + tmp->status.count = 0; + tmp->status.check_num = 0; + tmp->status.check_failure_num = 0; + continue; + } + + check_num = (tmp->status.check_num + 1) * SYSMONITOR_PERIOD; + if (check_num >= check_period) { + id = tmp->thread_id; + tmp->status.check_failure_num += 1; + log_printf(LOG_ERR, "ps parallel check failed, id:[%lu], failure:[%u], count:[%u], max failure num:[%u]", + id, tmp->status.check_failure_num, count, g_max_failure_num); + if (tmp->status.check_failure_num >= g_max_failure_num) { + log_printf(LOG_ERR, "ps parallell need to restart sysmonitor, id:[%lu], failure:[%u], count[%u]", + id, tmp->status.check_failure_num, count); + (void)pthread_mutex_unlock(&g_parallel_mtx); + return -1; + } + } + tmp->status.check_num += 1; + } + (void)pthread_mutex_unlock(&g_parallel_mtx); + return 0; +} + +/* + * check all thread status + * return 0:success, -1: error,need to restart sysmonitor + */ +static int check_thread_running_status(void) +{ + int item; + int ret; + bool check_flag = false; + + /* no need to check thread status */ + if (!g_check_thread_monitor) { + return 0; + } + + for (item = 0; item < THREAD_MONITOR_ITEMS_CNT; item++) { + /* don't need to check FILE_ITEM and THREAD_SYS_EVENT_ITEM */ + if (item == THREAD_FILE_ITEM || item == THREAD_SYS_EVENT_ITEM) { + continue; + } + + /* need to check ps parallel before check g_thread_status check_flag, because it's check_flag is never set */ + if (item == THREAD_PS_PARALLEL_ITEM) { + ret = check_thread_ps_parallel_status(); + if (ret == -1) { + return -1; + } + continue; + } + + check_flag = g_thread_status[item].check_flag; + if (!check_flag) { + continue; + } + + if (item == THREAD_FS_ITEM) { + ret = check_thread_fs_item_status(item); + if (ret == -1) { + return -1; + } + continue; + } + /* check status */ + ret = check_thread_item_status(item); + if (ret == -1) { + return -1; + } + } + return 0; +} + +int check_thread_status(void) +{ + int ret; + ret = check_thread_running_status(); + if (ret == -1) { + ret = lovs_system(RESTART_MONITOR); + if (ret != 0) { + return -1; + } + } + return 0; +} diff --git a/sysmonitor-1.3.2/src/monitor_thread.h b/sysmonitor-1.3.2/src/monitor_thread.h new file mode 100644 index 0000000000000000000000000000000000000000..30a9b7253f3e15e620d6e623967ca65ef44d5775 --- /dev/null +++ b/sysmonitor-1.3.2/src/monitor_thread.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: monitors the running status of each subthread. + * Author: zhangguangzhi + * Create: 2020-9-17 + */ + + +#ifndef MONITOR_THREAD_H +#define MONITOR_THREAD_H + +#include "common.h" + +#include + +#define RESTART_MONITOR "systemctl restart sysmonitor &> /dev/null" +#define THREAD_SYSALARM_HRAET_PERIOD 5 +#define POLL_TIME 1000 +#define CISTOM_PERIODIC_TIME 4 +#define CHECK_THREAD_FAILURE_NUM 3 /* default check failure num is 3, range is 2-10 */ +#define CHECK_THREAD_FAILURE_NUM_MAX 10 +#define CHECK_THREAD_FAILURE_NUM_MIN 2 + +typedef enum monitor_thread_item_type { + THREAD_PS_ITEM, + THREAD_FS_ITEM, /* check only once when start thread */ + THREAD_FILE_ITEM, /* no need to check */ + THREAD_DISK_ITEM, + THREAD_INODE_ITEM, + THREAD_CUSTOM_DAEMON_ITEM, + THREAD_CUSTOM_PERIODIC_ITEM, + THREAD_IO_DELAY_ITEM, + THREAD_SYSTEM_ITEM, + THREAD_SYS_EVENT_ITEM, /* no need to check */ + THREAD_ZOMBIE_ITEM, + THREAD_PS_PARALLEL_ITEM, /* add new item for check ps parallel thread status */ + THREAD_HEART_ITEM, /* add new item for check heart thread status */ + THREAD_MONITOR_ITEMS_CNT +} monitor_thread_item; + +typedef struct thread_status_s { + unsigned int count; /* thread feed dog count */ + unsigned int period; /* thread run period */ + unsigned int check_num; /* check thread count value, value increases by 1 every 2s */ + unsigned int check_failure_num; /* check thread count value, value increases by 1 every 2s */ + bool check_flag; /* check flag, need to check item or not */ +} thread_status; + +typedef struct thread_ps_parallel_status_s { + thread_status status; + pthread_t thread_id; /* check for ps parallel status */ + struct list_head list; +} thread_ps_parallel_status; + +int thread_status_struct_init(void); +int check_thread_status(void); +int feed_thread_status_count(monitor_thread_item item); +int set_thread_status_period(monitor_thread_item item, unsigned int period); +int set_thread_status_check_flag(monitor_thread_item item, bool flag); +int set_thread_check_value(monitor_thread_item item, bool flag, unsigned int period); +void clear_thread_status(monitor_thread_item item); +void clear_all_thread_status(void); +int set_ps_parallel_check_flag(monitor_thread_item item, bool flag, pthread_t id); +int feed_thread_ps_parallel_count(monitor_thread_item item, pthread_t id); +int set_thread_ps_parallel_period(monitor_thread_item item, pthread_t id, unsigned int period); +int set_ps_parallel_check_value(monitor_thread_item item, bool flag, pthread_t id, unsigned int period); +bool check_thread_monitor(const char *item, const char *value); +bool check_thread_failure_num(const char *item, const char *value); +void init_ps_parallel_head(void); + +#endif \ No newline at end of file diff --git a/sysmonitor-1.3.2/src/process.c b/sysmonitor-1.3.2/src/process.c new file mode 100644 index 0000000000000000000000000000000000000000..8ee41b12cb51036bdcbc4f5696ca7c8653248a8f --- /dev/null +++ b/sysmonitor-1.3.2/src/process.c @@ -0,0 +1,1073 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: process monitor, process memory usage monitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#include "process.h" + +#include +#include + +#include +#include "monitor_thread.h" + +#define OUT_BUF_LEN 30 +#define CMD_TIMEOUT 3 +#define SERIAL_MONITOR 0 +#define PARALLEL_MONITOR 1 +/* The thread name is restricted to 16 characters, including the terminating null byte ('\0'). */ +#define THREAD_NAME_MAX_LENTH 16 +#define PS_CONFIG_DIR "/etc/sysmonitor/process" +#define DELAY_TIME 300 +#define DELAY_INTERVAL 3 +#define THREAD_TIME 200 +#define DEFAULT_USER "root" + +#define MIN_PROCESS_RESTART_TIMEOUT 30 +#define MAX_PROCESS_RESTART_TIMEOUT 300 +#define MIN_RECALL_PERIOD 0 +#define MAX_RECALL_PERIOD 1440 + +/* process recover failed, recall recover cmd period, unit: minute */ +#define PS_RECALL_PERIOD 1 + +#define PS_N1_RECALL_STEP 2 + +static struct list_head g_head; +static unsigned int g_serial_task_num; +static bool g_can_ps_exit = true; +static bool g_flag_process_delay = true; /* monitor process will check systemd status when flag sets true */ +static int g_process_alarm_supress_num = 5; +static int g_process_restart_tiemout = DEFALUT_PROCESS_RESTART_TIMEOUT; +static int g_process_recall_period = PS_RECALL_PERIOD; + +static void *ps_create_parallel_thread(void *arg); +typedef void (*PARSE_FUNC)(const char *item, const char *value, mtask *task); + +struct item_value_func { + char item[ITEM_LEN]; + void (*func)(const char *item, const char *value, mtask *task); +}; + +static void parse_monitor_period(const char *item, const char *value, mtask *task) +{ + (void)parse_value_int(item, value, &task->monitor_period); +} + +static void parse_monitor_mode(const char *item, const char *value, mtask *task) +{ + if (!strcmp(value, "parallel")) { + task->monitor_mode = PARALLEL_MONITOR; + } else if (!strcmp(value, "serial")) { + task->monitor_mode = SERIAL_MONITOR; + } else { + log_printf(LOG_INFO, "%s config illegal, check %s.", item, value); + } +} + +static void parse_monitor_check_as_param(const char *item, const char *value, mtask *task) +{ + (void)parse_value_bool(item, value, &task->chk_result_as_param); +} + +static void parse_user(const char *item, const char *value, mtask *task) +{ + (void)parse_value_string(item, value, task->user, MAX_PS_CONFIG_VALUE); +} + +static void parse_name(const char *item, const char *value, mtask *task) +{ + (void)parse_value_string(item, value, task->name, MAX_PS_CONFIG_VALUE); +} + +static void parse_recover_command(const char *item, const char *value, mtask *task) +{ + (void)parse_value_string(item, value, task->recover_cmd, MAX_PS_CONFIG_VALUE); +} + +static void parse_monitor_command(const char *item, const char *value, mtask *task) +{ + (void)parse_value_string(item, value, task->monitor_cmd, MAX_PS_CONFIG_VALUE); +} + +static void parse_stop_command(const char *item, const char *value, mtask *task) +{ + (void)parse_value_string(item, value, task->stop_cmd, MAX_PS_CONFIG_VALUE); +} + +static void parse_alarm_command(const char *item, const char *value, mtask *task) +{ + (void)parse_value_string(item, value, task->alarm_cmd, MAX_PS_CONFIG_VALUE); +} + +static void parse_alarm_recover_command(const char *item, const char *value, mtask *task) +{ + (void)parse_value_string(item, value, task->alarm_recover_cmd, MAX_PS_CONFIG_VALUE); +} + +static void parse_use_cmd_alarm(const char *item, const char *value, mtask *task) +{ + (void)parse_value_bool(item, value, &task->use_cmd_alarm); +} + +static struct item_value_func g_ps_opt_array[] = { + { "MONITOR_PERIOD", parse_monitor_period }, + { "MONITOR_MODE", parse_monitor_mode }, + { "CHECK_AS_PARAM", parse_monitor_check_as_param }, + { "USER", parse_user }, + { "NAME", parse_name }, + { "RECOVER_COMMAND", parse_recover_command }, + { "MONITOR_COMMAND", parse_monitor_command }, + { "STOP_COMMAND", parse_stop_command }, + { "ALARM_COMMAND", parse_alarm_command }, + { "ALARM_RECOVER_COMMAND", parse_alarm_recover_command }, + { "USE_CMD_ALARM", parse_use_cmd_alarm } +}; + +static char *strtrim(char *config, int length) +{ + char *end = NULL; + char *sp = NULL; + char *ep = NULL; + int len; + + sp = config; + end = config + length - 1; + ep = end; + + while (sp <= end && (*sp == ' ' || *sp == '\t')) { + sp++; + } + while (ep >= sp && (*ep == ' ' || *ep == '\t' || *ep == '\n')) { + ep--; + } + len = (ep < sp) ? 0 : (int)(ep - sp) + 1; + sp[len] = '\0'; + + return sp; +} + +static void free_monitor_list(void) +{ + mtask *n = NULL; + mtask *t = NULL; + + list_for_each_entry_safe(t, n, &g_head, list) { + list_del(&t->list); + free(t); + } +} + +static bool get_value_from_config(const char *config, char *value, unsigned int value_len) +{ + int ret; + size_t size; + + while (*config == ' ' || *config == '\t') { + config++; + } + + if (*config != '=') { + return true; + } + + config++; + while (*config == ' ' || *config == '\t') { + config++; + } + + if (*config == '\0') { + return true; + } + + size = strlen(config); + if (size >= value_len) { + log_printf(LOG_ERR, "get_value_from_config: config size should be less than %u.", value_len); + return false; + } + ret = strncpy_s(value, value_len, config, size); + if (ret != 0) { + log_printf(LOG_ERR, "get_value_from_config: strncpy_s value failed, ret: %d", ret); + return false; + } + + if (value[strlen(value) - 1] == '\n') { + value[strlen(value) - 1] = '\0'; + } + return true; +} + +static bool parse_line(mtask *task, char *config, int length) +{ + char value[MAX_PS_CONFIG_VALUE] = {0}; + char item[ITEM_LEN] = {0}; + unsigned int i; + int ret; + PARSE_FUNC func = NULL; + + config = strtrim(config, length); + /* # means comment */ + if (*config == '#') { + return true; + } + + if (check_conf_file_valid(config) == -1) { + return false; + } + + for (i = 0; i < array_size(g_ps_opt_array); i++) { + if (strstr(config, g_ps_opt_array[i].item) == config) { + config += strlen(g_ps_opt_array[i].item); + ret = strcpy_s(item, ITEM_LEN - 1, g_ps_opt_array[i].item); + if (ret) { + log_printf(LOG_ERR, "parse_line: strcpy_s item failed, ret: %d.", ret); + return false; + } + func = g_ps_opt_array[i].func; + break; + } + } + + /* Not match item, and return. */ + if (strlen(item) == 0) { + return true; + } + + if (get_value_from_config(config, value, sizeof(value)) == false) { + return false; + } + + if (strlen(value) == 0) { + return true; + } + + if (func != NULL) { + func(item, value, task); + } + return true; +} + +/* + * check if process is exist + * for systemd service, check with "systemctl status *.service | grep -w Active:" + * for normal process, first use monitor_cmd to check, if check failed, + * check if the process binary is exist + */ +static int check_process_exist(const mtask *t, bool check_binary) +{ + int ret; + char tmp_cmd[MAX_PS_CONFIG_VALUE] = {0}; + bool systemd_service = false; + + if (check_binary) { + ret = snprintf_s(tmp_cmd, sizeof(tmp_cmd), sizeof(tmp_cmd) - 1, + "which %s > /dev/null 2>&1", t->name); + } else { + if (strstr(t->monitor_cmd, "systemctl") && strstr(t->monitor_cmd, "status")) { + ret = snprintf_s(tmp_cmd, sizeof(tmp_cmd), sizeof(tmp_cmd) - 1, + "%s | grep -w Active:", t->monitor_cmd); + systemd_service = true; + } else { + ret = snprintf_s(tmp_cmd, sizeof(tmp_cmd), sizeof(tmp_cmd) - 1, "%s", t->monitor_cmd); + } + } + + if (ret == -1) { + log_printf(LOG_ERR, "check_process_exist: snprintf_s for check command failed"); + return ret; + } + + ret = monitor_cmd(DEFAULT_USER_ID, tmp_cmd, POPEN_TIMEOUT, NULL, true); + if (ret == 0) { + log_printf(LOG_INFO, "add %s to process monitor list", t->name); + } else if (ret < 0) { + log_printf(LOG_INFO, "execute %s error %d", tmp_cmd, ret); + } else { + if (systemd_service) { + log_printf(LOG_INFO, "The service %s may not exist, please check", t->name); + } else { + if (!check_binary) { + return check_process_exist(t, true); + } + log_printf(LOG_INFO, "The executable file %s may not exist in PATH, please check", t->name); + } + } + + return ret; +} + +/* + * check service exist, check twice, check internal is 2 seconds + */ +static bool check_service_exist(const mtask *t) +{ + int ret; + int i; + struct timespec ts; + + ts.tv_nsec = 0; + ts.tv_sec = PROCESS_CHECK_TIME; + + for (i = 0; i < PROCESS_CHECK_NUM; i++) { + ret = check_process_exist(t, false); + if (ret == 0) { + return true; + } else if (ret < 0) { + (void)nanosleep(&ts, NULL); + } else { + return false; + } + } + log_printf(LOG_INFO, "add %s to process monitor list failed", t->name); + return false; +} + +static bool ps_check_config_illegal(mtask *task) +{ + int ret; + struct passwd *usrinfo = NULL; + + if (!strlen(task->name) || !strlen(task->user)) { + log_printf(LOG_INFO, "someitems is empty on process monitor! \"NAME:%s;USER:%s.\"", + task->name, task->user); + return false; + } + + if (!strlen(task->monitor_cmd)) { + ret = snprintf_s(task->monitor_cmd, MAX_PS_CONFIG_VALUE, MAX_PS_CONFIG_VALUE - 1, + "pgrep -f $(which %s)", task->name); + if (ret == -1) { + log_printf(LOG_INFO, "ps_check_config_illegal: snprintf for monitor cmd failed."); + return false; + } + } + + if (task->monitor_mode == PARALLEL_MONITOR && task->monitor_period == 0) { + log_printf(LOG_INFO, "ps_check_config_illegal: MONITOR_PERIOD should not be 0 when MONITOR_MODE is parallel."); + return false; + } + + /* Check the user exists in the system */ + usrinfo = getpwnam((const char *)task->user); + if (usrinfo == NULL) { + log_printf(LOG_ERR, "error: user %s not exsit in system", task->user); + return false; + } + + task->uid = usrinfo->pw_uid; + + return true; +} + +static bool ps_parse_config(FILE *file) +{ + mtask *t = NULL; + bool config_ok = false; + int ret; + char config[MAX_CONFIG] = {0}; + + t = malloc(sizeof(mtask)); + if (t == NULL) { + return false; + } + + ret = memset_s(t, sizeof(mtask), 0, sizeof(mtask)); + if (ret) { + log_printf(LOG_ERR, "ps_parse_config: memset_s mtask failed, ret: %d.", ret); + free(t); + return false; + } + + t->start = true; + t->monitor_mode = SERIAL_MONITOR; + /* Parallel monitor period is seted with global configure by default */ + t->monitor_period = (unsigned int)(get_thread_item_period(PS_ITEM)); + + for (;;) { + if (fgets(config, MAX_CONFIG, file)) { + if (parse_line(t, config, (int)strlen(config)) == false) { + free(t); + return false; + } + continue; + } + break; + } + + /* + * even we do not set this configuration, use root permission instead + */ + if (t->user[0] == '\0') { + ret = strcpy_s(t->user, MAX_PS_CONFIG_VALUE, DEFAULT_USER); + if (ret) { + log_printf(LOG_ERR, "ps_parse_config: strcpy_s user failed, ret: %d.", ret); + free(t); + return false; + } + } + + config_ok = ps_check_config_illegal(t); + if (config_ok == false) { + free(t); + return false; + } + + if (check_service_exist(t)) { + if (t->monitor_mode == PARALLEL_MONITOR) { + if (pthread_create(&t->thread_id, NULL, ps_create_parallel_thread, t)) { + log_printf(LOG_ERR, "create process monitor thread error [%d]", errno); + free(t); + return false; + } + } + if (t->monitor_mode == SERIAL_MONITOR) + g_serial_task_num++; + + list_add(&t->list, &g_head); + return true; + } + + free(t); + return true; +} + +static DIR *open_cfgdir(void) +{ + struct stat sb; + DIR *dir = NULL; + int ret; + + dir = opendir(PS_CONFIG_DIR); + if (dir == NULL) { + log_printf(LOG_WARNING, "%s not exist", PS_CONFIG_DIR); + return NULL; + } + + ret = memset_s(&sb, sizeof(sb), 0, sizeof(sb)); + if (ret != 0) { + log_printf(LOG_WARNING, "open_cfgdir: memset_s sb failed, ret: %d.", ret); + (void)closedir(dir); + return NULL; + } + if (stat(PS_CONFIG_DIR, &sb) < 0) { + log_printf(LOG_WARNING, "stat %s error [%d]", PS_CONFIG_DIR, errno); + (void)closedir(dir); + return NULL; + } + /* config file mode should be 700 */ + if (sb.st_mode & (S_IRWXG | S_IRWXO)) { + log_printf(LOG_WARNING, "%s: bad file mode", PS_CONFIG_DIR); + (void)closedir(dir); + return NULL; + } + if (chdir(PS_CONFIG_DIR) != 0) { + log_printf(LOG_WARNING, "chdir error [%d]", errno); + (void)closedir(dir); + return NULL; + } + + return dir; +} + +/* read the config file to load all task needed to be monitor */ +static bool load_task(void) +{ + struct dirent *direntp = NULL; + int config_fd = -1; + FILE *fp = NULL; + DIR *dir = NULL; + + init_list_head(&g_head); + + dir = open_cfgdir(); + if (dir == NULL) { + return false; + } + + g_serial_task_num = 0; + direntp = readdir(dir); + while (direntp != NULL) { + fp = open_cfgfile(direntp->d_name, &config_fd); + if (fp == NULL) { + direntp = readdir(dir); + continue; + } + if (ps_parse_config(fp) == false) { + log_printf(LOG_INFO, "parse %s error", direntp->d_name); + } + (void)fclose(fp); + direntp = readdir(dir); + } + + (void)closedir(dir); + return true; +} + +/* recover the task if the task is abnormal */ +static void recover_task(int chk_ret_code, const mtask *task) +{ + char recover_cmd[MAX_CONFIG]; + int ret; + + if (task->chk_result_as_param == true) { + ret = snprintf_s(recover_cmd, MAX_CONFIG, + MAX_CONFIG - 1, "%s %d", task->recover_cmd, chk_ret_code); + } else { + ret = snprintf_s(recover_cmd, MAX_CONFIG, MAX_CONFIG - 1, "%s", task->recover_cmd); + } + + if (ret == -1) { + log_printf(LOG_ERR, "recover_task: snprintf_s recover cmd failed."); + return; + } + + ret = monitor_cmd(task->uid, recover_cmd, g_process_restart_tiemout, + task->stop_cmd, false); + if (ret != 0) { + log_printf(LOG_INFO, "use \"%s\" recover failed,errno %d", recover_cmd, ret); + } +} + +static void check_task_report_alarm_by_cmd(mtask *task) +{ + int ret; + + if ((int)task->fail % g_process_alarm_supress_num != 0) { + return; + } + + if (!strlen(task->alarm_cmd)) { + log_printf(LOG_INFO, "%s is abnormal %d times, But alarm-cmd is null,will not alarm[warn]", + task->name, g_process_alarm_supress_num); + } else { + ret = monitor_cmd(task->uid, task->alarm_cmd, POPEN_TIMEOUT, NULL, false); + if (ret == 0) { + log_printf(LOG_INFO, "%s is abnormal %d times, use cmd \"%s\" to alarm", task->name, + g_process_alarm_supress_num, task->alarm_cmd); + } else { + log_printf(LOG_INFO, "%s is abnormal %d times, use cmd \"%s\" to alarm failed,errno [%d]", + task->name, g_process_alarm_supress_num, task->alarm_cmd, ret); + } + } +} + +static void check_task_report_recover(mtask *task) +{ + int ret; + + task->fail = 0; + if (!strlen(task->alarm_recover_cmd)) { + log_printf(LOG_INFO, "%s is recovered, But recover-cmd is null,will not alarm[warn]", task->name); + } else { + ret = monitor_cmd(task->uid, task->alarm_recover_cmd, POPEN_TIMEOUT, NULL, false); + if (ret == 0) { + task->resend_recover_cmd = false; + log_printf(LOG_INFO, "%s is recovered, use \"%s\" to alarm", task->name, task->alarm_recover_cmd); + } else { + task->resend_recover_cmd = true; + log_printf(LOG_INFO, "%s is recovered, use \"%s\" to alarm faied, errno [%d]", + task->name, task->alarm_recover_cmd, ret); + } + } +} + +static int process_monitor_cmd(const mtask *task) +{ + char tmp[MAX_PS_CONFIG_VALUE] = {0}; + int ret; + bool bash_cmd = false; + + ret = snprintf_s(tmp, MAX_PS_CONFIG_VALUE, MAX_PS_CONFIG_VALUE - 1, "pgrep -f $(which %s)", task->name); + if (ret == -1) { + log_printf(LOG_ERR, "process_monitor_cmd: snprintf_s for monitor command failed."); + return -1; + } + + if (strcmp(task->monitor_cmd, tmp) == 0) { + bash_cmd = true; + } + + return monitor_cmd(task->uid, task->monitor_cmd, POPEN_TIMEOUT, NULL, bash_cmd); +} + +static void handle_task_monitor_failed_cmd(mtask *task, int monitor_ret) +{ + task->resend_recover_cmd = false; + task->fail++; + if (strlen(task->recover_cmd)) { + log_printf(LOG_WARNING, "%s is abnormal, check cmd return %d, use \"%s\" to recover", + task->name, monitor_ret, task->recover_cmd); + recover_task(monitor_ret, task); + if (!process_monitor_cmd(task)) { + check_task_report_recover(task); + return; + } + } else { + log_printf(LOG_WARNING, "%s is abnormal, check cmd return %d, recover cmd is null, will not recover", + task->name, monitor_ret); + } + + check_task_report_alarm_by_cmd(task); + + if (task->fail == 0xffffffff) { + task->fail = 0x1; + } +} + +static void handle_task_report_recover_cmd(mtask *task) +{ + if (task->resend_recover_cmd || task->start || task->fail > 0) { + check_task_report_recover(task); + } +} + +/* + * Check process status. + * Repo alarm by alarm cmd (configure by /etc/sysmonitor/process/XXX) when process is recovered + */ +static void check_task_repo_cmd(mtask *task) +{ + int ret; + + ret = process_monitor_cmd(task); + if (ret > 0) { + handle_task_monitor_failed_cmd(task, ret); + } else if (ret < 0) { + log_printf(LOG_ERR, "execute MONITOR_COMMAND[%s] error [%d]", task->monitor_cmd, ret); + task->fail++; + task->start = false; + return; + } else { + handle_task_report_recover_cmd(task); + } + + task->start = false; +} + +static void clean_task_abnormal_info(mtask *task) +{ + task->fail = 0; + task->time_count = 0; + task->n1_recall = 0; + task->n2_recall = 0; +} + +/* + * after recover failed for FAIL_NUM times, recover interval increases + * when task->fail < FAIL_NUM, recover every mon_period, defalut is 3s + * when task->fail = FAIL_NUM, report task abnormal alarm + * when task->fail > FAIL_NUM, use task->time_count to calculate recover period + * task->fail > FAIL_NUM: recover period increases like this: + * 2 mon_peirod (6s), 3 mon_period (9s), 4 mon_period (12s), 5 mon_period (15s) + * 6 mon_peirod (18s) + * after n1_recall, defalut is 1 minute, recover every minute. + */ +static void handle_task_recover_extend(mtask *task, int monitor_ret) +{ + unsigned int mon_period; + + if (!strlen(task->recover_cmd)) { + log_printf(LOG_INFO, "%s is abnormal, check cmd return %d, recover cmd is null, will not recover", + task->name, monitor_ret); + return; + } + + if (task->monitor_mode == PARALLEL_MONITOR) { + mon_period = task->monitor_period; + } else { + mon_period = (unsigned int)get_thread_item_period(PS_ITEM); + } + + task->time_count++; + if ((task->time_count == (task->n1_recall + 1) * (task->n1_recall + PS_N1_RECALL_STEP) / PS_N1_RECALL_STEP) && + (task->time_count * mon_period <= (unsigned int)g_process_recall_period * 60)) { + log_printf(LOG_INFO, "%s is abnormal, check cmd return %d, use \"%s\" to recover", + task->name, monitor_ret, task->recover_cmd); + recover_task(monitor_ret, task); + task->n1_recall++; + } else if (task->time_count * mon_period >= (unsigned int)g_process_recall_period * 60 * (task->n2_recall + 1)) { + log_printf(LOG_INFO, "%s is abnormal, check cmd return %d, use \"%s\" to recover", + task->name, monitor_ret, task->recover_cmd); + recover_task(monitor_ret, task); + task->n2_recall++; + } +} + +static void handle_task_alarm(mtask *task, int monitor_ret) +{ + log_printf(LOG_INFO, "%s is abnormal, check cmd return %d", task->name, monitor_ret); + + task->fail++; + task->start = false; +} + +static void handle_task_report_recover(mtask *task) +{ + clean_task_abnormal_info(task); + task->start = false; + + log_printf(LOG_INFO, "%s is recovered", task->name); +} + +static void handle_task_check_failed_pri(mtask *task, int monitor_ret) +{ + int ret; + + task->fail++; + if (!strlen(task->recover_cmd)) { + log_printf(LOG_INFO, "%s is abnormal, check cmd return %d, recover cmd is null, will not recover", + task->name, monitor_ret); + return; + } + + log_printf(LOG_INFO, "%s is abnormal, check cmd return %d, use \"%s\" to recover", + task->name, monitor_ret, task->recover_cmd); + recover_task(monitor_ret, task); + ret = process_monitor_cmd(task); + if (ret == 0) { + if (task->start) { + handle_task_report_recover(task); + return; + } + clean_task_abnormal_info(task); + log_printf(LOG_INFO, "%s is recovered", task->name); + } +} + +static void handle_task_monitor_failed(mtask *task, int monitor_ret) +{ + if (task->fail < FAIL_NUM) { + handle_task_check_failed_pri(task, monitor_ret); + } else if (task->fail == FAIL_NUM) { + handle_task_alarm(task, monitor_ret); + } else { + handle_task_recover_extend(task, monitor_ret); + } +} + +/* + * Check process status. Repo alarm by sysalarm service when process is recoverd. + */ +static void check_task_repo_alarm(mtask *task) +{ + int ret; + + ret = process_monitor_cmd(task); + if (ret > 0) { + handle_task_monitor_failed(task, ret); + } else if (ret < 0) { + log_printf(LOG_ERR, "execute MONITOR_COMMAND[%s] error [%d]", task->monitor_cmd, ret); + } else if ((!ret && task->fail > 0) || (!ret && task->start)) { + handle_task_report_recover(task); + } +} + +/* check if the task is abnormal */ +static void check_task(mtask *task) +{ + if (task->use_cmd_alarm == false) { + check_task_repo_alarm(task); + } else { + check_task_repo_cmd(task); + } +} + +static int ps_parallel_check_task(long *exe_time, mtask *task, const char *tname) +{ + int ret; + struct timespec time_start; + struct timespec time_end; + + if (*exe_time >= (long)task->monitor_period) { + if (clock_gettime(CLOCK_MONOTONIC, &time_start) != 0) { + log_printf(LOG_ERR, "get clock time faild,monitor %s thread will exit", tname); + return RET_BREAK; + } + + check_task(task); + + if (clock_gettime(CLOCK_MONOTONIC, &time_end) != 0) { + log_printf(LOG_ERR, "get clock time faild,monitor %s thread will exit", tname); + return RET_BREAK; + } + + *exe_time = time_end.tv_sec - time_start.tv_sec; + if (*exe_time >= (long)task->monitor_period) { + ret = feed_thread_ps_parallel_count(THREAD_PS_PARALLEL_ITEM, task->thread_id); + if (ret == -1) { + return RET_BREAK; + } + return RET_CONTINUE; + } + } + ret = feed_thread_ps_parallel_count(THREAD_PS_PARALLEL_ITEM, task->thread_id); + if (ret == -1) { + return RET_BREAK; + } + *exe_time += 1; + return RET_SUCCESS; +} + +static void *ps_create_parallel_thread(void *arg) +{ + char tname[THREAD_NAME_MAX_LENTH] = {0}; + mtask *task = arg; + long exe_time = (long)task->monitor_period; + int ret; + char *tmp = NULL; + unsigned int period; + + ret = pthread_detach(pthread_self()); + if (ret) { + log_printf(LOG_ERR, "ps_create_parallel_thread: pthread_detach failed, ret: %d.", ret); + return NULL; + } + + tmp = task->name; + if (strlen(task->name) >= THREAD_NAME_MAX_LENTH) { + tmp = task->name + strlen(task->name) - THREAD_NAME_MAX_LENTH + 1; + } + + ret = strncpy_s(tname, sizeof(tname), tmp, sizeof(tname) - 1); + if (ret) { + log_printf(LOG_ERR, "ps_create_parallel_thread: strncpy_s tname failed, ret: %d.", ret); + return NULL; + } + (void)prctl(PR_SET_NAME, tname); + period = POPEN_TIMEOUT * PARALLEL_POPEN_TIMEOUT_NUM + (unsigned int)g_process_restart_tiemout + 1; + ret = set_ps_parallel_check_value(THREAD_PS_PARALLEL_ITEM, true, task->thread_id, period); + if (ret == -1) { + log_printf(LOG_ERR, "ps create parallel thread: set check flag or period failed"); + return NULL; + } + + for (;;) { + if (get_thread_item_reload_flag(PS_ITEM) && g_can_ps_exit == true) { + break; + } + + ret = ps_parallel_check_task(&exe_time, task, tname); + if (ret == RET_BREAK) { + break; + } else if (ret == RET_SUCCESS) { + (void)sleep(1); + } + } + ret = set_ps_parallel_check_flag(THREAD_PS_PARALLEL_ITEM, false, task->thread_id); + if (ret == -1) { + log_printf(LOG_ERR, "ps create parallel thread exit, set check flag failed"); + } + task->thread_id = 0; + return NULL; +} + +/* run the queue to check all the task in the list */ +static void serial_monitor_runqueue(void) +{ + mtask *t = NULL; + mtask *n = NULL; + struct timespec ts; + + ts.tv_nsec = PROCESS_SLEEP_INTERVAL; + ts.tv_sec = 0; + + list_for_each_entry(t, &g_head, list) { + if (t->monitor_mode == SERIAL_MONITOR) { + check_task(t); + (void)nanosleep(&ts, NULL); + } + } + + list_for_each_entry_safe(t, n, &g_head, list) { + if (t->monitor_mode == PARALLEL_MONITOR && t->thread_id == 0) { + list_del(&t->list); + free(t); + } + } +} + +static bool reload_task(void) +{ + mtask *t = NULL; + bool all_parallel_thread_exit = false; + bool ret = false; + + /* + * Waiting for all parallel monitoring thread to exit. + * Notes: There is not needed to free list, pthread will exit and free list when receive reload-signal. + * Related func: ps_create_parallel_thread(). + */ + for (;;) { + all_parallel_thread_exit = true; + list_for_each_entry(t, &g_head, list) { + if (t->monitor_mode == PARALLEL_MONITOR && t->thread_id != 0) { + all_parallel_thread_exit = false; + (void)sleep(1); + break; + } + } + + if (all_parallel_thread_exit == true) { + break; + } + } + + set_thread_item_reload_flag(PS_ITEM, false); + /* Free serial-moitor list */ + free_monitor_list(); + + g_can_ps_exit = false; + ret = load_task(); + return ret; +} + +static void check_system_state(void) +{ + char *cmd = "systemctl is-system-running"; + char out[OUT_BUF_LEN] = {0}; + int i = 0; + + if (get_log_interface_flag() == DAEMON_SYSLOG && g_flag_process_delay == true) { + for (;;) { + (void)monitor_popen(cmd, out, sizeof(out) - 1, CMD_TIMEOUT, NULL); + if (strstr(out, "running") || strstr(out, "degraded")) { + break; + } + + (void)sleep(DELAY_INTERVAL); + i++; + if (i >= DELAY_TIME / DELAY_INTERVAL) { + log_printf(LOG_INFO, "wait system running over %d seconds. break!", DELAY_TIME); + break; + } + } + } +} + +static unsigned int get_process_check_period(void) +{ + unsigned int period; + period = POPEN_TIMEOUT * POPEN_TIMEOUT_NUM + PROCESS_CHECK_NUM * PROCESS_CHECK_TIME + + (unsigned int)get_thread_item_period(PS_ITEM) + (unsigned int)g_process_restart_tiemout + PROCESS_OTHER_TIME; + return period; +} + +static void *ps_monitor_start(void *arg) +{ + bool ret = false; + struct timespec ts; + int time_reduce; + unsigned int period; + int result; + + /* prctl does not return false if arg2 is right when arg1 is PR_SET_NAME */ + (void)prctl(PR_SET_NAME, "monitor-process"); + log_printf(LOG_INFO, "process monitor starting up"); + check_system_state(); + log_printf(LOG_INFO, "process monitor started"); + + set_thread_item_reload_flag(PS_ITEM, false); + period = get_process_check_period(); + result = set_thread_check_value(THREAD_PS_ITEM, true, period); + if (result == -1) { + log_printf(LOG_ERR, "process monitor set check flag or period error"); + return NULL; + } + log_printf(LOG_INFO, "process monitor, period:%u", period); + ret = load_task(); + for (;;) { + if (get_thread_item_reload_flag(PS_ITEM)) { + log_printf(LOG_INFO, "process monitor, start reload"); + ret = reload_task(); + if (ret == false) { + log_printf(LOG_INFO, "reload process monitor configuration failed"); + } + + g_can_ps_exit = true; + log_printf(LOG_INFO, "reload process monitor end"); + } + + if (ret == true) { + serial_monitor_runqueue(); + } + + /* time_reduce unit: ms */ + time_reduce = get_thread_item_period(PS_ITEM) * 1000 - THREAD_TIME * (int)g_serial_task_num; + if (time_reduce > 0) { + ts.tv_nsec = (time_reduce % 1000) * 1000 * 1000; + ts.tv_sec = time_reduce / 1000; + (void)nanosleep(&ts, NULL); + } + result = feed_thread_status_count(THREAD_PS_ITEM); + if (result == -1) { + log_printf(LOG_ERR, "process monitor feed error"); + break; + } + } + return NULL; +} + +void ps_monitor_init(void) +{ + pthread_t tid; + + if (pthread_create(&tid, NULL, ps_monitor_start, NULL)) { + log_printf(LOG_ERR, "create process monitor thread error [%d]", errno); + return; + } + set_thread_item_tid(PS_ITEM, tid); +} + +bool parse_process_monitor_delay(const char *item, const char *value) +{ + if (!strcmp(value, "off")) { + g_flag_process_delay = false; + } else if (strcmp(value, "on")) { + log_printf(LOG_INFO, "%s set error", item); + return false; + } + return true; +} + +bool parse_process_alarm_supress(const char *value) +{ + g_process_alarm_supress_num = (int)strtol(value, NULL, STRTOL_NUMBER_BASE); + if (check_int(value) == false || g_process_alarm_supress_num <= 0) { + log_printf(LOG_INFO, "PROCESS_ALARM_SUPRESS_NUM set error"); + return false; + } + return true; +} + +bool parse_process_restart_tiemout(const char *value) +{ + g_process_restart_tiemout = (int)strtol(value, NULL, STRTOL_NUMBER_BASE); + if (check_int(value) == false || + g_process_restart_tiemout < MIN_PROCESS_RESTART_TIMEOUT || + g_process_restart_tiemout > MAX_PROCESS_RESTART_TIMEOUT) { + log_printf(LOG_INFO, "PROCESS_RESTART_TIMEOUT set error, the value must between %d and %d", + MIN_PROCESS_RESTART_TIMEOUT, MAX_PROCESS_RESTART_TIMEOUT); + return false; + } + return true; +} + +bool parse_process_recall_period(const char *value) +{ + g_process_recall_period = (int)strtol(value, NULL, STRTOL_NUMBER_BASE); + if (check_int(value) == false || g_process_recall_period <= MIN_RECALL_PERIOD || + g_process_recall_period > MAX_RECALL_PERIOD) { + log_printf(LOG_INFO, "PROCESS_RECALL_PERIOD set error"); + return false; + } + return true; +} diff --git a/sysmonitor-1.3.2/src/process.h b/sysmonitor-1.3.2/src/process.h new file mode 100644 index 0000000000000000000000000000000000000000..3742f9c45c9063837cc8d583753866b99a12a8b6 --- /dev/null +++ b/sysmonitor-1.3.2/src/process.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: define variable, structure and function for process monitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#ifndef PROCESS_H +#define PROCESS_H +#include + +#include "common.h" + +#define MAX_PS_CONFIG_VALUE 200 +#define PROCESS_CHECK_TIME 2 +#define PROCESS_CHECK_NUM 2 +#define POPEN_TIMEOUT_NUM 5 /* total number of timeouts in a check period */ +#define PARALLEL_POPEN_TIMEOUT_NUM 3 /* total number of timeouts in a check period */ +#define PROCESS_OTHER_TIME 100 /* extra time required, the value is the empirical value. */ + +typedef struct _mtask { + int monitor_mode; + uid_t uid; + char user[MAX_PS_CONFIG_VALUE]; + char name[MAX_PS_CONFIG_VALUE]; /* monitor task name */ + char recover_cmd[MAX_PS_CONFIG_VALUE]; /* recover command */ + char monitor_cmd[MAX_PS_CONFIG_VALUE]; /* monitor command */ + char stop_cmd[MAX_PS_CONFIG_VALUE]; /* stop command, when exec monitor_cmd timeout, exec stop_cmd */ + char alarm_cmd[MAX_PS_CONFIG_VALUE]; /* alarm command, when use_cmd_alarm, use this to alarm */ + char alarm_recover_cmd[MAX_PS_CONFIG_VALUE]; /* alarm recover command, when use_cmd_alarm, use this to recover */ + bool chk_result_as_param; /* monitor_cmd ret set as param of recover_cmd */ + bool resend_recover_cmd; /* flag used to mark resend recover-cmd */ + bool start; /* task is start or not */ + bool use_cmd_alarm; /* use alarm_cmd to alarm and alarm_recover_cmd to recover */ + pthread_t thread_id; + unsigned int monitor_period; /* only use at PARALLEL_MONITOR mode */ + unsigned int fail; /* fail times */ + unsigned int time_count; /* monitor period counts after recover failed 3 times */ + unsigned int n1_recall; /* recall times when timeout less than RECALL_PERIOD */ + unsigned int n2_recall; /* recall times when timeout exceed RECALL_PERIOD */ + struct list_head list; +} mtask; + +void ps_monitor_init(void); +bool parse_process_monitor_delay(const char *item, const char *value); +bool parse_process_alarm_supress(const char *value); +bool parse_process_restart_tiemout(const char *value); +bool parse_process_recall_period(const char *value); + +#endif diff --git a/sysmonitor-1.3.2/src/sys_event.c b/sysmonitor-1.3.2/src/sys_event.c new file mode 100644 index 0000000000000000000000000000000000000000..61701ec4ed147d4d82766f1f23f6763c8b22cb63 --- /dev/null +++ b/sysmonitor-1.3.2/src/sys_event.c @@ -0,0 +1,1017 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: sysmonitor event monitor, handle msg from sysmonitor module + * Author: xuchunmei + * Create: 2019-3-21 + */ +#include "sys_event.h" + +#include +#include +#include +#include +#include "common.h" +#include "monitor_thread.h" + +#define SYS_EVENT_FD_PATH "/proc/sysmonitor" +#define PROC_FDTHRESHOLD "/proc/fdthreshold" +#define RROC_FDENABLE "/proc/fdenable" +#define SIGCATCHMAK "/sys/module/sysmonitor/parameters/sigcatchmask" +#define NETRATELIMIT_BURST "/sys/module/sysmonitor/parameters/netratelimit_burst" +#define NET_RATELIMIT_BURST_MAX 100 +#define PR_FD_ALARM_MAX 100 + +#define SIGNAL_COUNT 31 +#define SIG_NAME_LEN 12 +#define MAX_EVENT 20 +#define AUP 0x0001 +#define ADOWN 0x0002 +#define ANEWADDR 0x0004 +#define ADELADDR 0x0008 +#define IP_ADDR_LEN 64 +#define FIB_INFO_LEN 256 +#define CMD_LEN 100 + +typedef struct system_event_info_s { + bool monitor; + bool alarm; +} system_event_info; + +typedef struct _netask { + char dev[MAX_DEV]; + unsigned int event; + struct list_head list; +} netask; + +static int g_sys_event_fd = -1; +static unsigned long g_signo; +static unsigned long g_pr_alarm_ratio = 80; /* process fd usage alarm value, default 80 */ +static struct list_head g_net_head; + +#define FD_MONITOR_LOG_FILE "/var/log/fd_monitor.log" +#define FD_LOG_FILE_MAX_SIZE (512 * 1024) +char g_fd_log_path[LOG_FILE_LEN] = {0}; +static int g_fd_log_fd = -1; +static pthread_mutex_t g_fd_log_fd_mutex = PTHREAD_MUTEX_INITIALIZER; + +static system_event_info g_sys_event_info[SYS_EVENT_CNT]; + +static char g_signal_string[SIGNAL_COUNT][SIG_NAME_LEN] = { + "SIGHUP", "SIGINT", "SIGQUIT", "SIGILL", "SIGTRAP", "SIGABRT", "SIGBUS", "SIGFPE", + "SIGKILL", "SIGUSR1", "SIGSEGV", "SIGUSR2", "SIGPIPE", "SIGALRM", "SIGTERM", "SIGSTKFLT", + "SIGCHLD", "SIGCONT", "SIGSTOP", "SIGTSTP", "SIGTTIN", "SIGTTOU", "SIGURG", "SIGXCPU", + "SIGXFSZ", "SIGVTALRM", "SIGPROF", "SIGWINCH", "SIGIO", "SIGPWR", "SIGSYS", +}; + +static int g_net_ratelimit_burst = 5; +static int g_poll_timeout = POLL_TIMEOUT_DEFAULT; + +void set_poll_timeout(int timeout) +{ + if (timeout <= 0) { + return; + } + g_poll_timeout = timeout; +} + +void close_sys_event_fd(void) +{ + if (g_sys_event_fd >= 0) { + (void)close(g_sys_event_fd); + g_sys_event_fd = -1; + } +} + +bool parse_net_ratelimit_burst(const char *value) +{ + g_net_ratelimit_burst = (int)strtol(value, NULL, STRTOL_NUMBER_BASE); + if (check_int(value) == false || g_net_ratelimit_burst < 0 || + g_net_ratelimit_burst > NET_RATELIMIT_BURST_MAX) { + log_printf(LOG_INFO, "NET_RATE_LIMIT_BURST set error"); + return false; + } + return true; +} + +bool parse_fd_monitor_log_path(const char *value) +{ + int ret; + + if (!check_log_path(value)) { + return false; + } + + ret = strncpy_s(g_fd_log_path, LOG_FILE_LEN, value, LOG_FILE_LEN - 1); + if (ret) { + log_printf(LOG_ERR, "parse fd monitor log path failed."); + return false; + } + return true; +} + +static void close_fd_log_fd(void) +{ + if (g_fd_log_fd >= 0) { + (void)close(g_fd_log_fd); + g_fd_log_fd = -1; + } +} + +static int rewrite_log_file(int *fd) +{ + off_t file_size; + int new_fd = -1; + char file_name[MAX_TEMPSTR] = {0}; + char msg_buffer[MAX_LOG_LEN] = {0}; + int ret; + + ret = snprintf_s(file_name, sizeof(file_name), sizeof(file_name) - 1, "%s.old", g_fd_log_path); + if (ret == -1) { + log_printf(LOG_ERR, "rewrite_log_file: snprintf_s file_name failed"); + return -1; + } + ret = snprintf_s(msg_buffer, sizeof(msg_buffer), sizeof(msg_buffer) - 1, + "#################fd info####################\n%-23s%-12s%-24s%-12s\n", "TIME", "PID", "CMD", "FD"); + if (ret == -1) { + log_printf(LOG_ERR, "rewrite_log_file: snprintf_s msg_buffer failed"); + return -1; + } + + file_size = lseek(*fd, 0, SEEK_END); + if (file_size == 0) { + if (write(*fd, msg_buffer, strlen(msg_buffer)) == ERR) { + log_printf(LOG_ERR, "write log to %s failed,error num [%d]", g_fd_log_path, errno); + return -1; + } + } + + if (file_size >= FD_LOG_FILE_MAX_SIZE) { + (void)close(*fd); + if (rename(g_fd_log_path, file_name) != 0) { + log_printf(LOG_ERR, "rename %s failed,err:%s.\n", g_fd_log_path, strerror(errno)); + } + new_fd = open(g_fd_log_path, O_CREAT | O_RDWR | O_APPEND | O_CLOEXEC, LOG_FILE_PERMISSION); + if (new_fd < 0) { + return -1; + } + /* wirte title to log file */ + if (write(new_fd, msg_buffer, strlen(msg_buffer)) == ERR) { + log_printf(LOG_ERR, "write log to %s failed,error num [%d]", g_fd_log_path, errno); + (void)close(new_fd); + return -1; + } + *fd = new_fd; + } + return 0; +} + +static void write_log2file(const char *log_msg) +{ + int ret; + char msg[MAX_LOG_LEN + MAX_TEMPSTR] = {0}; + time_t cur_time; + struct tm ret_t; + struct tm *t = NULL; + + ret = memset_s(&ret_t, sizeof(ret_t), 0, sizeof(ret_t)); + if (ret) { + log_printf(LOG_ERR, "write_log2file: memset_s ret_t failed, ret: %d", ret); + return; + } + + cur_time = time((time_t)0); + + t = localtime_r(&cur_time, &ret_t); + if (t == NULL) { + return; + } + + ret = snprintf_s(msg, MAX_LOG_LEN + MAX_TEMPSTR, MAX_LOG_LEN + MAX_TEMPSTR - 1, + "%04d-%02d-%02d %02d:%02d:%02d %s\n", + ret_t.tm_year + TM_YEAR_BEGIN, ret_t.tm_mon + 1, ret_t.tm_mday, ret_t.tm_hour, + ret_t.tm_min, ret_t.tm_sec, log_msg); + if (ret == -1) { + log_printf(LOG_ERR, "write_log2file: snprintf_s aMsgBuffer failed"); + return; + } + + (void)pthread_mutex_lock(&g_fd_log_fd_mutex); + + ret = faccessat(0, g_fd_log_path, F_OK, 0); + if (ret != 0) { + close_fd_log_fd(); + g_fd_log_fd = open(g_fd_log_path, O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, LOG_FILE_PERMISSION); + if (g_fd_log_fd < 0) { + (void)pthread_mutex_unlock(&g_fd_log_fd_mutex); + log_printf(LOG_ERR, "create file %s failed.", g_fd_log_path); + return; + } + } + + ret = rewrite_log_file(&g_fd_log_fd); + if (ret != 0) { + (void)pthread_mutex_unlock(&g_fd_log_fd_mutex); + return; + } + + if (write(g_fd_log_fd, msg, strlen(msg)) < 0) { + log_printf(LOG_ERR, "write log to %s failed.", g_fd_log_path); + } + + (void)pthread_mutex_unlock(&g_fd_log_fd_mutex); +} + +static void fd_log_printf(const char *format, ...) +{ + char msg_buffer[MAX_LOG_LEN] = {0}; + int ret; + + va_list arg_list; + + va_start(arg_list, format); + ret = vsnprintf_s(msg_buffer, sizeof(msg_buffer), sizeof(msg_buffer) - 1, format, arg_list); + if (ret == -1) { + log_printf(LOG_ERR, "fd_log_printf: vsnprintf_s msg_buffer failed"); + va_end(arg_list); + return; + } + + va_end(arg_list); + write_log2file(msg_buffer); +} + +static bool parse_signal_value(const char *item, const char *value) +{ + unsigned int i; + + for (i = 0; i < SIGNAL_COUNT; i++) { + if (!strcmp(item, g_signal_string[i])) { + if (!strcmp(value, "on")) { + g_signo |= (unsigned long)(1ul << i); + return true; + } else if (strcmp(value, "off")) { + log_printf(LOG_INFO, "%s set error", g_signal_string[i]); + return false; + } + } + } + + log_printf(LOG_INFO, "%s not supported", item); + return false; +} + +static bool parse_process_fd_value(const char *item, const char *value) +{ + if (!strlen(value)) { + return false; + } + + if (!strcmp(item, "PR_FD_ALARM")) { + if (check_int(value) == false) { + return false; + } + if (strtol(value, NULL, STRTOL_NUMBER_BASE) == 0 || + strtol(value, NULL, STRTOL_NUMBER_BASE) == PR_FD_ALARM_MAX) { + return false; + } + g_pr_alarm_ratio = (unsigned long)strtol(value, NULL, STRTOL_NUMBER_BASE); + return true; + } + + return true; +} + +struct config_parse_func { + char config_file[ITEM_LEN]; + bool (*parse_line_func)(const char *config); + void (*check_config)(bool parse_ok); +}; + +static bool parse_line(const char *config, int type) +{ + char item[ITEM_LEN] = {0}; + char value[VALUE_LEN] = {0}; + char *ptr = NULL; + unsigned int size; + int ret; + + while (*config == ' ' || *config == '\t') { + config++; + } + + if (*config == '#') { + return true; + } + + ptr = strstr(config, "=\""); + if (ptr != NULL) { + size = (unsigned int)(ptr - config); + if (size >= sizeof(item)) { + log_printf(LOG_ERR, "parse_line: item length(%u) too long(>%lu).", size, sizeof(item)); + return false; + } + ret = strncpy_s(item, sizeof(item), config, size); + if (ret != 0) { + log_printf(LOG_ERR, "parse_line: strncpy_s item failed, ret: %d", ret); + return false; + } + get_value(config, size, value, sizeof(value)); + + if (type == SIGNAL) { + return parse_signal_value(item, value); + } else if (type == FDSTAT) { + return parse_process_fd_value(item, value); + } + } + return true; +} + +static bool parse_signal_line(const char *config) +{ + return parse_line(config, SIGNAL); +} + +static bool parse_process_fd_line(const char *config) +{ + return parse_line(config, FDSTAT); +} + +static netask *find_netask(const char *dev) +{ + netask *t = NULL; + + list_for_each_entry(t, &g_net_head, list) { + if (!strcmp(t->dev, dev)) { + return t; + } + } + return NULL; +} + +static void free_netask_list(void) +{ + netask *t = NULL; + netask *n = NULL; + + list_for_each_entry_safe(t, n, &g_net_head, list) { + list_del(&t->list); + free(t); + } +} + +static bool set_rtnetlink_group(const char *event, netask *e) +{ + if (!strcmp(event, "UP")) { + e->event = e->event | AUP; + } else if (!strcmp(event, "DOWN")) { + e->event = e->event | ADOWN; + } else if (!strcmp(event, "NEWADDR")) { + e->event = e->event | ANEWADDR; + } else if (!strcmp(event, "DELADDR")) { + e->event = e->event | ADELADDR; + } else if (!strlen(event)) { + e->event = e->event | AUP | ADOWN | ANEWADDR | ADELADDR; + } else { + log_printf(LOG_ERR, "event %s not supported", event); + return false; + } + + return true; +} + +static netask *alloc_for_netask(const char *dev, bool *find) +{ + netask *e = NULL; + int ret; + + e = find_netask(dev); + if (e != NULL) { + return e; + } + + *find = false; + e = malloc(sizeof(netask)); + if (e == NULL) { + log_printf(LOG_ERR, "malloc error"); + return NULL; + } + + ret = memset_s(e, sizeof(netask), 0, sizeof(netask)); + if (ret != 0) { + log_printf(LOG_ERR, "network parse_line: memset_s e failed, ret: %d", ret); + goto err; + } + + ret = strncpy_s(e->dev, sizeof(e->dev), dev, sizeof(e->dev) - 1); + if (ret != 0) { + log_printf(LOG_ERR, "network parse_line: strncpy_s e dev failed, ret: %d", ret); + goto err; + } + return e; + +err: + free(e); + e = NULL; + + return NULL; +} + +static bool parse_and_set_event(const char *config, netask *e, bool find) +{ + char event[MAX_EVENT] = {0}; + int i = 0; + + while (*config != '\n') { + if (i > MAX_EVENT - 1) { + log_printf(LOG_ERR, "event too long"); + goto err; + } + event[i++] = *config; + config++; + } + + if (set_rtnetlink_group(event, e) == false) { + goto err; + } + + if (find == false) { + list_add(&e->list, &g_net_head); + } + + return true; + +err: + if (find == false) { + free(e); + e = NULL; + } + return false; +} + +static bool parse_network_line(const char *config) +{ + char dev[MAX_DEV] = {0}; + netask *e = NULL; + bool find = true; + int i; + + if (strlen(config) == 0) { + log_printf(LOG_ERR, "The length of netcard monitor configuration is 0"); + return false; + } + + if (config[strlen(config) - 1] != '\n') { + log_printf(LOG_ERR, "The configuration line of netcard monitor is too long"); + return false; + } + + while (*config == ' ' || *config == '\t') { + config++; + } + + if (*config == '#' || *config == '\n') { + return true; + } + + i = 0; + for (;;) { + if (*config == ' ' || *config == '\t' || *config == '\n') { + break; + } + if (i > MAX_DEV - 1) { + log_printf(LOG_ERR, "netcard name too long (>16)"); + return false; + } + dev[i] = *config; + config++; + i++; + } + + e = alloc_for_netask(dev, &find); + if (e == NULL) { + return false; + } + + while (*config == ' ' || *config == '\t') { + config++; + } + + return parse_and_set_event(config, e, find); +} + +static void sig_set_mask(void) +{ + char cmd[CMD_LEN] = {0}; + int ret; + + ret = snprintf_s(cmd, sizeof(cmd), sizeof(cmd) - 1, "%lu", g_signo); + if (ret == -1) { + log_printf(LOG_ERR, "sig_set_mask: snprintf_s cmd failed"); + return; + } + + ret = set_value_to_file(cmd, SIGCATCHMAK); + if (ret == -1) { + log_printf(LOG_ERR, "sig_set_mask: set_value_to_file failed"); + return; + } + + log_printf(LOG_INFO, "set signo mask %lu", g_signo); +} + +static void check_signal_config(bool parse_ok) +{ + log_printf(LOG_INFO, "signal monitor starting up"); + sig_set_mask(); +} + +static int chang_kernel_interface_value(void) +{ + int ret; + char buf[MAX_TEMPSTR] = {0}; + + ret = snprintf_s(buf, sizeof(buf), sizeof(buf) - 1, "%lu", g_pr_alarm_ratio); + if (ret == -1) { + log_printf(LOG_ERR, "chang_kernel_interface_value: snprintf_s buf failed"); + return -1; + } + + ret = set_value_to_file(buf, PROC_FDTHRESHOLD); + if (ret == -1) { + log_printf(LOG_ERR, "chang_kernel_interface_value: set_value_to_file failed"); + return -1; + } + + ret = set_value_to_file("1", RROC_FDENABLE); + if (ret == -1) { + log_printf(LOG_ERR, "chang_kernel_interface_value: set_value_to_file failed"); + return -1; + } + + return ret; +} + +static void check_process_fd_config(bool parse_ok) +{ + int ret; + + log_printf(LOG_INFO, "process fd num monitor starting up"); + if (parse_ok == false) { + log_printf(LOG_INFO, "[error]process fd num monitor: configuration illegal, will use defalut value"); + } + + ret = chang_kernel_interface_value(); + if (ret != 0) { + log_printf(LOG_INFO, "process fd num monitor: echo value to interface failed."); + } +} + +static void set_net_ratelimit(void) +{ + char buf[CMD_LEN] = {0}; + int ret; + + ret = snprintf_s(buf, CMD_LEN, CMD_LEN - 1, "%d", g_net_ratelimit_burst); + if (ret == -1) { + log_printf(LOG_ERR, "set_net_ratelimit: snprintf_s buf failed."); + return; + } + ret = set_value_to_file(buf, NETRATELIMIT_BURST); + if (ret == -1) { + log_printf(LOG_ERR, "set net ratelimit to kernel module failed: %d", ret); + return; + } + log_printf(LOG_INFO, "set net ratelimit %d", g_net_ratelimit_burst); +} + +static void check_network_config(bool parse_ok) +{ + log_printf(LOG_INFO, "netcard monitor starting up"); + + if (parse_ok == false) { + log_printf(LOG_INFO, "read netcard monitor configuration error"); + } + + set_net_ratelimit(); +} + +static struct config_parse_func g_config_func[SYS_EVENT_CNT] = { + { "/etc/sysmonitor/signal", parse_signal_line, check_signal_config }, + { "/etc/sysmonitor/process_fd_conf", parse_process_fd_line, check_process_fd_config }, + { "/etc/sysmonitor/network", parse_network_line, check_network_config } +}; + +static void parse_sys_event_config(void) +{ + unsigned int i; + bool config_ok = false; + + for (i = 0; i < array_size(g_config_func); i++) { + if (g_sys_event_info[i].monitor == false) { + continue; + } + config_ok = parse_config(g_config_func[i].config_file, g_config_func[i].parse_line_func); + g_config_func[i].check_config(config_ok); + } +} + +static int handle_signo_callchain(const signo_mesg *sg_msg, char *palarm_msg, size_t size, unsigned int len) +{ + int ret; + + if (sg_msg->send_chain_comm[0][0] == '\0') { + ret = 0; + } else if (sg_msg->send_chain_comm[1][0] == '\0') { + ret = snprintf_s(palarm_msg + size - len, len, len - 1, + "(%s[%d])", + sg_msg->send_chain_comm[0], sg_msg->send_chain_pid[0]); + } else if (sg_msg->send_chain_comm[2][0] == '\0') { + ret = snprintf_s(palarm_msg + size - len, len, len - 1, + "(%s[%d]<-%s[%d])", + sg_msg->send_chain_comm[0], sg_msg->send_chain_pid[0], + sg_msg->send_chain_comm[1], sg_msg->send_chain_pid[1]); + } else if (sg_msg->send_chain_comm[3][0] == '\0') { + ret = snprintf_s(palarm_msg + size - len, len, len - 1, + "(%s[%d]<-%s[%d]<-%s[%d])", + sg_msg->send_chain_comm[0], sg_msg->send_chain_pid[0], + sg_msg->send_chain_comm[1], sg_msg->send_chain_pid[1], + sg_msg->send_chain_comm[2], sg_msg->send_chain_pid[2]); + } else { + ret = snprintf_s(palarm_msg + size - len, len, len - 1, + "(%s[%d]<-%s[%d]<-%s[%d]<-%s[%d])", + sg_msg->send_chain_comm[0], sg_msg->send_chain_pid[0], + sg_msg->send_chain_comm[1], sg_msg->send_chain_pid[1], + sg_msg->send_chain_comm[2], sg_msg->send_chain_pid[2], + sg_msg->send_chain_comm[3], sg_msg->send_chain_pid[3]); + } + return ret; +} + +static int handle_signo_msg(const sysmonitor_event_msg *event_msg) +{ + signo_mesg *sg_msg = NULL; + int ret; + unsigned int len; + char alarm_msg[PARAS_LEN]; + + sg_msg = (signo_mesg *)event_msg->msg; + if ((sg_msg->signo <= SIGNAL_COUNT) && (g_signo & (1ul << (sg_msg->signo - 1)))) { + + ret = snprintf_s((char *)alarm_msg, sizeof(alarm_msg), sizeof(alarm_msg) - 1, + "comm:%s exe:%s[%d](parent comm:%s parent exe:%s[%d]) send %s to comm:%s exe:%s[%d].", + sg_msg->send_comm, sg_msg->send_exe, sg_msg->send_pid, + sg_msg->send_parent_comm, sg_msg->send_parent_exe, sg_msg->send_parent_pid, + g_signal_string[sg_msg->signo - 1], sg_msg->recv_comm, sg_msg->recv_exe, sg_msg->recv_pid); + if (ret == -1 && alarm_msg[0] == '\0') { + log_printf(LOG_ERR, "sig_monitor_start: snprintf_s alarm_msg failed."); + return -1; + } + + len = sizeof(alarm_msg) - ret; + ret = handle_signo_callchain(sg_msg, alarm_msg, sizeof(alarm_msg), len); + if (ret == -1) { + log_printf(LOG_ERR, "sig_monitor_start: snprintf_s alarm_msg failed."); + return -1; + } + } + + return 0; +} + +static int handle_fdstat_msg(const sysmonitor_event_msg *event_msg) +{ + struct fdstat *fdinfo = NULL; + + fdinfo = (struct fdstat *)event_msg->msg; + log_printf(LOG_INFO, "pid [%d] cmd [%s] fd more than [%u]", fdinfo->pid, fdinfo->comm, fdinfo->total_fd_num); + fd_log_printf("%-12d%-24s%-12u", fdinfo->pid, fdinfo->comm, fdinfo->total_fd_num); + + return 0; +} + +struct net_event_handle_func { + int event; + int (*func)(const netmonitor_info *info); +}; + +static int handle_net_device_event(const netmonitor_info *info) +{ + unsigned int net_event = AUP | ADOWN; + netask *e = NULL; + + if (!list_empty(&g_net_head)) { + e = find_netask(info->dev); + if (e == NULL) { + return 0; + } + net_event = e->event; + } + + if ((info->event == UP) && (net_event & AUP)) { + log_printf(LOG_INFO, "%s: device is up, comm: %s[%d], parent comm: %s[%d]", + info->dev, info->comm, info->pid, info->parent_comm, info->parent_pid); + } else if ((info->event == DOWN) && (net_event & ADOWN)) { + log_printf(LOG_INFO, "%s: device is down, comm: %s[%d], parent comm: %s[%d]", + info->dev, info->comm, info->pid, info->parent_comm, info->parent_pid); + } + + return 0; +} + +static int handle_address_event(const netmonitor_info *info) +{ + char ip_addr[IP_ADDR_LEN] = {0}; + bool ipv6 = false; + netask *e = NULL; + unsigned int net_event = AUP | ADOWN | ANEWADDR | ADELADDR; + + if (!list_empty(&g_net_head)) { + e = find_netask(info->dev); + if (e == NULL) { + return 0; + } + net_event = e->event; + } + + if (info->event == NEWADDR6 || info->event == DELADDR6) { + ipv6 = true; + } + + if (!ipv6) { + if (!inet_ntop(AF_INET, (void *)&info->addr.in, ip_addr, sizeof(ip_addr))) { + log_printf(LOG_INFO, "convert ipv4 address failed"); + return -1; + } + } else { + if (!inet_ntop(AF_INET6, (void *)&info->addr.in6, ip_addr, sizeof(ip_addr))) { + log_printf(LOG_INFO, "convert ipv6 address failed"); + return -1; + } + } + + if ((info->event == NEWADDR || info->event == NEWADDR6) && (net_event & ANEWADDR)) { + log_printf(LOG_INFO, "%s: ip[%s] prefixlen[%d] is added, comm: %s[%d], parent comm: %s[%d]", + info->dev, ip_addr, info->plen, info->comm, info->pid, info->parent_comm, info->parent_pid); + } else if ((info->event == DELADDR || info->event == DELADDR6) && (net_event & ADELADDR)) { + log_printf(LOG_INFO, "%s: ip[%s] prefixlen[%d] is deleted, comm: %s[%d], parent comm: %s[%d]", + info->dev, ip_addr, info->plen, info->comm, info->pid, info->parent_comm, info->parent_pid); + } + + return 0; +} + +struct event_msg { + int event; + const char *name; +}; + +static const struct event_msg g_event_msg[] = { + { FIB_DEL, "Fib4 deleting" }, + { FIB_ADD, "Fib4 insert" }, + { FIB_REPLACE, "Fib4 replace" }, + { FIB_APPEND, "Fib4 append" }, + { FIB6_DEL, "Fib6 deleting" }, + { FIB6_ADD, "Fib6 insert" }, + { FIB6_REPLACE, "Fib6 replace" }, + { FIB6_APPEND, "Fib6 append" }, +}; + +static int handle_fib_event(const netmonitor_info *info) +{ + char ip_addr[IP_ADDR_LEN] = {0}; + char fib_info[FIB_INFO_LEN] = {0}; + unsigned int i; + int ret; + struct in_addr ipv4_addr = info->addr.in; + + if (!inet_ntop(AF_INET, (void *)&ipv4_addr, ip_addr, sizeof(ip_addr))) { + log_printf(LOG_INFO, "convert ipv4 address failed"); + return -1; + } + + for (i = 0; i < array_size(g_event_msg); i++) { + if (g_event_msg[i].event == info->event) { + ret = snprintf_s(fib_info, FIB_INFO_LEN, FIB_INFO_LEN - 1, + "%s table=%d %s/%d, comm: %s[%d], parent comm: %s[%d]", + g_event_msg[i].name, info->tb_id, ip_addr, info->plen, + info->comm, info->pid, info->parent_comm, info->parent_pid); + if (ret == -1 && fib_info[0] == '\0') { + log_printf(LOG_ERR, "snprintf_s msg failed, ret: %d", ret); + return -1; + } + log_printf(LOG_INFO, "%s", fib_info); + return 0; + } + } + + return 0; +} + +static int handle_fib6_event(const netmonitor_info *info) +{ + char ip_addr[IP_ADDR_LEN] = {0}; + char fib_info[FIB_INFO_LEN] = {0}; + unsigned int i; + int ret; + + if (!inet_ntop(AF_INET6, (void *)&info->addr.in6, ip_addr, sizeof(ip_addr))) { + log_printf(LOG_INFO, "convert ipv6 address failed"); + return -1; + } + + for (i = 0; i < array_size(g_event_msg); i++) { + if (g_event_msg[i].event == info->event) { + ret = snprintf_s(fib_info, FIB_INFO_LEN, FIB_INFO_LEN - 1, + "%s %s/%d, comm: %s[%d], parent comm: %s[%d]", + g_event_msg[i].name, ip_addr, info->plen, + info->comm, info->pid, info->parent_comm, info->parent_pid); + if (ret == -1 && fib_info[0] == '\0') { + log_printf(LOG_ERR, "snprintf_s msg failed, ret: %d", ret); + return -1; + } + log_printf(LOG_INFO, "%s", fib_info); + return 0; + } + } + + return 0; +} + +static const struct net_event_handle_func g_net_event_array[] = { + { UP, handle_net_device_event }, + { DOWN, handle_net_device_event }, + { DELADDR, handle_address_event }, + { NEWADDR, handle_address_event }, + { DELADDR6, handle_address_event }, + { NEWADDR6, handle_address_event }, + { FIB_DEL, handle_fib_event }, + { FIB_ADD, handle_fib_event }, + { FIB_REPLACE, handle_fib_event }, + { FIB_APPEND, handle_fib_event }, + { FIB6_DEL, handle_fib6_event }, + { FIB6_ADD, handle_fib6_event }, + { FIB6_REPLACE, handle_fib6_event }, + { FIB6_APPEND, handle_fib6_event } +}; + +static int handle_net_msg(const sysmonitor_event_msg *event_msg) +{ + netmonitor_info *info = NULL; + unsigned int i; + + info = (netmonitor_info *)event_msg->msg; + for (i = 0; i < array_size(g_net_event_array); i++) { + if (info->event == g_net_event_array[i].event) { + return g_net_event_array[i].func(info); + } + } + return 0; +} + +struct event_msg_handle_func { + int type; + int (*handler)(const sysmonitor_event_msg *msg); +}; + +static const struct event_msg_handle_func g_msg_handler[SYS_EVENT_CNT] = { + { SIGNAL, handle_signo_msg }, + { FDSTAT, handle_fdstat_msg }, + { NETWORK, handle_net_msg } +}; + +static int handle_sys_event_msg(const sysmonitor_event_msg *msg) +{ + unsigned int i; + + for (i = 0; i < array_size(g_msg_handler); i++) { + if (g_msg_handler[i].type == msg->type) { + if (g_sys_event_info[i].monitor == false) { + return 0; + } + return g_msg_handler[i].handler(msg); + } + } + return 0; +} + +static void *sys_event_monitor_start(void *arg) +{ + struct pollfd pollfd; + sysmonitor_event_msg msg; + int ret; + ssize_t read_ret; + + (void)prctl(PR_SET_NAME, "monitor-sysent"); + log_printf(LOG_INFO, "system event starting up"); + + init_list_head(&g_net_head); + + g_fd_log_fd = open(g_fd_log_path, O_WRONLY | O_APPEND | O_CREAT, LOG_FILE_PERMISSION); + if (g_fd_log_fd < 0) { + log_printf(LOG_INFO, "open %s failed, fd monitor info will not log, errno[%d]\n", g_fd_log_path, errno); + } + + g_sys_event_fd = open(SYS_EVENT_FD_PATH, O_CLOEXEC); + if (g_sys_event_fd < 0) { + set_thread_item_tid(SYS_EVENT_ITEM, 0); + log_printf(LOG_INFO, "sys_event: open %s failed, sysmonitor init module failed.", SYS_EVENT_FD_PATH); + goto err; + } + + pollfd.fd = g_sys_event_fd; + pollfd.events = POLLIN; + pollfd.revents = 0; + + for (;;) { + if (get_thread_item_reload_flag(SYS_EVENT_ITEM)) { + log_printf(LOG_INFO, "system event monitor, start reload"); + free_netask_list(); + parse_sys_event_config(); + set_thread_item_reload_flag(SYS_EVENT_ITEM, false); + } + + ret = poll(&pollfd, 1, g_poll_timeout); + if (ret < 0) { + log_printf(LOG_ERR, "poll from sys event fd error[%d]", ret); + break; + } else if (ret == 0) { + /* poll timeout */ + continue; + } + + read_ret = read(g_sys_event_fd, &msg, sizeof(msg)); + if (read_ret < 0) { + if (errno != EINTR) { + log_printf(LOG_INFO, "read from sys event fd error[%d]", errno); + break; + } + continue; + } + + ret = handle_sys_event_msg(&msg); + if (ret != 0) { + break; + } + } +err: + close_sys_event_fd(); + close_fd_log_fd(); + free_netask_list(); + return NULL; +} + +void sys_event_item_init_early(void) +{ + int i; + int ret; + + for (i = 0; i < SYS_EVENT_CNT; i++) { + g_sys_event_info[i].monitor = true; + g_sys_event_info[i].alarm = false; + } + + /* set default fd monitor log path */ + ret = strncpy_s(g_fd_log_path, LOG_FILE_LEN, FD_MONITOR_LOG_FILE, LOG_FILE_LEN - 1); + if (ret != 0) { + log_printf(LOG_ERR, "init fd monitor log path[%s] failed, ret: %d", FD_MONITOR_LOG_FILE, ret); + } +} + +void sys_event_item_init(void) +{ + int i; + + set_thread_item_monitor_flag(SYS_EVENT_ITEM, false); + for (i = 0; i < SYS_EVENT_CNT; i++) { + if (g_sys_event_info[i].monitor == true) { + set_thread_item_monitor_flag(SYS_EVENT_ITEM, true); + break; + } + } +} + +bool sys_event_monitor_parse(const char *item, const char *value, int type, bool monitor) +{ + return parse_value_bool(item, value, + monitor ? &g_sys_event_info[type].monitor : &g_sys_event_info[type].alarm); +} + +void sys_event_monitor_init(void) +{ + pthread_t tid; + + if (pthread_create(&tid, NULL, sys_event_monitor_start, NULL)) { + log_printf(LOG_ERR, "create sys event monitor thread error [%d]", errno); + return; + } + + set_thread_item_tid(SYS_EVENT_ITEM, tid); +} diff --git a/sysmonitor-1.3.2/src/sys_event.h b/sysmonitor-1.3.2/src/sys_event.h new file mode 100644 index 0000000000000000000000000000000000000000..5d14af9b15e3bf8ea292b13c17fbf8484cea8720 --- /dev/null +++ b/sysmonitor-1.3.2/src/sys_event.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: define structure for sysmonitor event, this is same as sysmonitor module defined + * Author: xuchunmei + * Create: 2019-3-21 + */ +#ifndef SYS_EVENT_H +#define SYS_EVENT_H + +#include +#include +#include +#include +#include +#include "common.h" + +#define EVENT_MSG_SIZE 1024 +#define MAX_DEV 16 + +struct fdstat { + pid_t pid; + unsigned int total_fd_num; + char comm[TASK_COMM_LEN]; +}; + +#define CALL_CHAIN_NUM 4 + +typedef struct _signo_msg { + unsigned long signo; + pid_t send_pid; + char send_comm[TASK_COMM_LEN]; + char send_exe[NAME_MAX]; + pid_t send_parent_pid; + char send_parent_comm[TASK_COMM_LEN]; + char send_parent_exe[NAME_MAX]; + pid_t recv_pid; + char recv_comm[TASK_COMM_LEN]; + char recv_exe[NAME_MAX]; + pid_t send_chain_pid[CALL_CHAIN_NUM]; + char send_chain_comm[CALL_CHAIN_NUM][TASK_COMM_LEN]; +} signo_mesg; + +enum netmonitor_event { + UP, + DOWN, + DELADDR, + NEWADDR, + DELADDR6, + NEWADDR6, + FIB_DEL, + FIB_ADD, + FIB_REPLACE, + FIB_APPEND, + FIB6_DEL, + FIB6_ADD, + FIB6_REPLACE, + FIB6_APPEND +}; + +typedef struct _netmonitor_info { + int event; + pid_t pid; + char comm[TASK_COMM_LEN]; + pid_t parent_pid; + char parent_comm[TASK_COMM_LEN]; + char dev[MAX_DEV]; + int plen; + int tb_id; + union nf_inet_addr addr; +} netmonitor_info; + +enum sysmonitor_event_type { + SIGNAL, + FDSTAT, + NETWORK, + SYS_EVENT_CNT +}; + +typedef struct _sysmonitor_event_msg { + int type; + char msg[EVENT_MSG_SIZE]; +} sysmonitor_event_msg; + + +bool sys_event_monitor_parse(const char *item, const char *value, int type, bool monitor); +void close_sys_event_fd(void); +void sys_event_item_init_early(void); +void sys_event_item_init(void); +void sys_event_monitor_init(void); +bool parse_net_ratelimit_burst(const char *value); +bool parse_fd_monitor_log_path(const char *value); +void set_poll_timeout(int timeout); +#endif diff --git a/sysmonitor-1.3.2/src/sys_resources.c b/sysmonitor-1.3.2/src/sys_resources.c new file mode 100644 index 0000000000000000000000000000000000000000..fdf52561e07d1e1a4d02a37adb2ff090f9ce6074 --- /dev/null +++ b/sysmonitor-1.3.2/src/sys_resources.c @@ -0,0 +1,2195 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: system resources monitor, include cpu, memory, process count, and system fd num + * Author: xuchunmei + * Create: 2019-2-14 + */ +#include "sys_resources.h" + +#include +#include +#include +#include +#include +#include "monitor_thread.h" + +#define MEM_STAT_TIMES 3 +#define PSCNT_ALARM_RATIO 90 +#define PSCNT_RESUME_RATIO 80 +#define PSCNT_ALARM_VALUE 1600 +#define PSCNT_RESUME_VALUE 1500 +#define CACHE_TWEAK_FACTOR 64 +#define SMLBUFSIZ (256 + CACHE_TWEAK_FACTOR) +#define trimz(x) ((tz = (long long)(x)) < 0 ? 0 : tz) +#define SYS_RES_MONITOR_PERIOD_MIN 1 +#define MEM_INFO_BUFFER 4096 +#define ALARM_RATIO_DEFAULT 90.0 +#define RESUME_RATIO_DEFAULT 80.0 +#define MONITOR_PERIOD_DEFAULT 60 +#define SYSFD_ALARM_VALUE 80.0 +#define SYSFD_RESUME_VALUE 70.0 +#define SYSFD_MONITOR_PERIOD 600 +#define CPU_STAT_PERIOD 300 +/* monitor_count init value set to -1, when monitor startup, will exec item monitor */ +#define MONITOR_COUNT_INIT (-1) +#define PROC_SYSRQ_TRIGGER "/proc/sysrq-trigger" + +#ifndef isdigit +#define isdigit(c) ((c) >= '0' && (c) <= '9') +#endif +#define FD_TMP_BUF 400 +#define FD_PATH_MAX 50 +#define PS_CMD_MAX 300 +#define PID_BUF_LEN 30 +#define FD_BUF_LEN 50 +#define TOPFD_PROCESS_NUM 3 +#define TOP_PROCESS_THREADS_NUM 10 +#define TOP_PROCESS_THREADS_NUM_MAX 1024 +#define PSCNT_RET_TRUE 1 +#define PSCNT_RET_CONTINUE 0 +#define PSCNT_RET_ERROR (-1) +#define PSCNT_COUNT_FOR_PROCESS 1 +#define RATIO 100 + +#define FLOAT_VALUE_LEN 8 +#define DOMAIN_DESC_LEN 256 +#define MAX_DOMAIN_CPU_COUNT 256 +#ifdef __x86_64__ +#define DOMAIN_CPU_LEN 1024 /* x86 supports max 8192 CPUs */ +#else +#define DOMAIN_CPU_LEN 128 /* arm64 supports max 1024 CPUs */ +#endif +#define CHAR_BITS 8 +#define REPORT_CMD_LEN 160 +#define REPORT_CMD_TIMEOUT 60 +#define COMMON_ALARM_MAX 100 +#define COMMON_RESUME_MAX 100 + +typedef struct _ps_fd_info { + char cmd[PS_CMD_MAX + 1]; /* process name, max 300 characters */ + unsigned long num; /* num of process opened */ + char pid[PID_BUF_LEN]; /* pid of process */ +} ps_fd_info; + +typedef struct _ps_threads_info { + char cmd[PS_CMD_MAX + 1]; /* process name, max 300 characters */ + unsigned long num; /* num of process threads opened */ + char pid[PID_BUF_LEN]; /* pid of process */ +} ps_threads_info; + +typedef struct system_monitor_info_s { + float alarm_value; + float resume_value; + unsigned int monitor_period; + unsigned int stat_period; + int monitor_count; + bool monitor; + bool alarm; + bool config_ok; + bool status; + void (*monitor_func)(void); +} system_monitor_info; + +typedef struct str_cpu_s { + unsigned long long u, n, s, i, w, x, y, z; /* as represented in /proc/stat */ + unsigned long long u_sav, s_sav, n_sav, i_sav, w_sav, x_sav, y_sav, z_sav; /* in the order of our display */ +} str_cpu; + +typedef struct _cpu_domain { + struct _cpu_domain *next; + float alarm_value; + float resume_value; + unsigned int cpu_num; + bool status; + bool broken; /* when some cpu is offline, set broken to true */ + bool first_collected; /* cpustat has been refreshed for the first time */ + unsigned char desc[DOMAIN_DESC_LEN]; /* store DOMAIN config */ + unsigned char cpus[DOMAIN_CPU_LEN]; /* store domain cpu id by bit */ + str_cpu cpustat; +} cpu_domain; + +struct item_value_func { + char item[ITEM_LEN]; + bool (*func)(const char *item, const char *value, int type); +}; + +struct config_parse_func { + char config_file[ITEM_LEN]; + bool (*parse_line_func)(const char *config); + bool (*check_config)(bool parse_ok); +}; + +struct mem_info { + unsigned long total; + unsigned long cached; + unsigned long sreclaimable; + unsigned long free; + unsigned long buffers; + unsigned long shmem; +}; + +static cpu_domain *g_domain_head = NULL; +static cpu_domain *g_new_domain_list = NULL; +static bool g_monitor_domain_flag = false; /* deal with change of monitor mode */ +static bool g_has_reported_flag = false; /* report_cmd execute most once during one monitor */ +static char g_cpu_report_cmd[REPORT_CMD_LEN] = {0}; + +static system_monitor_info g_system_monitor_info[SYSTEM_MONITOR_ITEM_CNT]; +static unsigned int g_sys_res_period; +static struct mem_info g_mem_info; +static float g_pscnt_alarm_ratio = PSCNT_ALARM_RATIO; +static float g_pscnt_resume_ratio = PSCNT_RESUME_RATIO; +static unsigned int g_pscnt_threads_top_num = TOP_PROCESS_THREADS_NUM; +static bool g_pscnt_threads_status_flag = false; +static bool g_pscnt_threads_create_flag = true; +static ps_threads_info *g_top_process_threads = NULL; + +static void get_ps_cmd(char *cmd, const char *pid, size_t cmd_len); +static int get_file_nr(unsigned long *file_nr, unsigned long *file_max); +/* + * check config before parse + * for pscnt and system fd num, alarm and resume should be int + */ +static bool check_before_parse(const char *value, int type) +{ + if (type == PSCNT || type == SYSTEM_FDCNT) { + return check_int(value); + } + return true; +} + +static bool parse_alarm(const char *item, const char *value, int type) +{ + return check_before_parse(value, type) && + parse_value_float(item, value, &g_system_monitor_info[type].alarm_value); +} + +static bool parse_resume(const char *item, const char *value, int type) +{ + return check_before_parse(value, type) && + parse_value_float(item, value, &g_system_monitor_info[type].resume_value); +} + +static bool parse_monitor_period(const char *item, const char *value, int type) +{ + return parse_value_int(item, value, &g_system_monitor_info[type].monitor_period); +} + +static bool parse_stat_period(const char *item, const char *value, int type) +{ + return parse_value_int(item, value, &g_system_monitor_info[type].stat_period); +} + +static bool parse_alarm_ratio(const char *item, const char *value, int type) +{ + if (type != PSCNT) { + return false; + } + + return parse_value_float(item, value, &g_pscnt_alarm_ratio); +} + +static bool parse_resume_ratio(const char *item, const char *value, int type) +{ + if (type != PSCNT) { + return false; + } + + return parse_value_float(item, value, &g_pscnt_resume_ratio); +} + +static bool parse_threads_top_num(const char *item, const char *value, int type) +{ + if (type != PSCNT) { + return false; + } + + return parse_value_int(item, value, &g_pscnt_threads_top_num); +} + +static bool clear_report_cmd(void) +{ + int ret = 0; + + ret = memset_s(g_cpu_report_cmd, REPORT_CMD_LEN, 0, REPORT_CMD_LEN); + if (ret) { + log_printf(LOG_ERR, "clear_report_cmd: memset_s g_cpu_report_cmd failed, ret: %d", ret); + return false; + } + + return true; +} + +static bool parse_report_command(const char *item, const char *value, int type) +{ + if (type != CPU) { + return false; + } + + if (clear_report_cmd() == false) { + return false; + } + + if (strlen(value) == 0) { + return true; + } + + if (check_conf_file_valid(value) == -1) { + return false; + } + + return parse_value_string(item, value, g_cpu_report_cmd, REPORT_CMD_LEN); +} + +static void free_domain_list(cpu_domain **domainlist) +{ + cpu_domain *t = NULL; + cpu_domain *domain = NULL; + + if (*domainlist == NULL) { + return; + } + + domain = *domainlist; + t = domain; + while (t->next != NULL) { + domain = t->next; + free(t); + t = domain; + } + free(domain); + *domainlist = NULL; +} + +static bool domain_add(const cpu_domain *add_domain, cpu_domain **domain_list) +{ + int ret = 0; + cpu_domain *domain = NULL; + + if (add_domain == NULL) { + return false; + } + + domain = malloc(sizeof(cpu_domain)); + if (domain == NULL) { + log_printf(LOG_ERR, "malloc cpu_domain error [%d]", errno); + return false; + } + + ret = memcpy_s(domain, sizeof(cpu_domain), add_domain, sizeof(cpu_domain)); + if (ret != 0) { + log_printf(LOG_ERR, "domain_add: memcpy_s domain failed, ret: %d", ret); + free(domain); + return false; + } + + domain->next = NULL; + + if (*domain_list == NULL) { + *domain_list = domain; + } else { + domain->next = *domain_list; + *domain_list = domain; + } + return true; +} + +static void free_set_domain_head(void) +{ + cpu_domain *t = NULL; + + free_domain_list(&g_domain_head); + g_domain_head = g_new_domain_list; + g_new_domain_list = NULL; + + if (g_system_monitor_info[CPU].monitor && g_system_monitor_info[CPU].config_ok) { + t = g_domain_head; + while (t != NULL) { + log_printf(LOG_INFO, "[cpu monitor]domain:%s alarm:%4.1f%% resume:%4.1f%% has monitored", + t->desc, t->alarm_value, t->resume_value); + t = t->next; + } + } +} + +static bool check_and_set_cpuid(cpu_domain *domain, const unsigned int cpu) +{ + unsigned int index = 0; + unsigned int offset = 0; + int nprocs = get_nprocs_conf(); + cpu_domain *t = NULL; + + if (nprocs < 0) { + log_printf(LOG_ERR, "failed to get number of system processors"); + return false; + } + + index = cpu / CHAR_BITS; + /* check cpu id valid */ + if (cpu >= (unsigned int)nprocs || index >= DOMAIN_CPU_LEN) { + log_printf(LOG_ERR, "invalid CPU ID: %u", cpu); + return false; + } + + /* check cpu id repeated */ + offset = CHAR_BITS - cpu % CHAR_BITS - 1; + if ((domain->cpus[index] >> offset) & 1) { + log_printf(LOG_ERR, "repeated CPU ID %u in DOMAIN %s", cpu, domain->desc); + return false; + } + + t = g_new_domain_list; + while (t != NULL) { + if ((t->cpus[index] >> offset) & 1) { + log_printf(LOG_ERR, "repeated CPU ID %u in DOMAIN %s", cpu, domain->desc); + return false; + } + t = t->next; + } + + domain->cpus[index] |= (unsigned char)((unsigned int)1 << offset); + domain->cpu_num++; + if (domain->cpu_num > MAX_DOMAIN_CPU_COUNT) { + log_printf(LOG_ERR, "cpu num exceeds %d in one domain", MAX_DOMAIN_CPU_COUNT); + return false; + } + return true; +} + +static bool get_domain_cpuid_dash(cpu_domain *domain, char *domain_value, unsigned int size) +{ + char *p_cpu = NULL; + char *p_save = NULL; + unsigned int cpu_start; + unsigned int cpu_end; + unsigned int i; + + if (size == 0) { + return false; + } + + p_cpu = strtok_r(domain_value, "-", &p_save); + if (p_cpu != NULL) { + if (!parse_value_int("DOMAIN", p_cpu, &cpu_start) || !parse_value_int("DOMAIN", p_save, &cpu_end)) { + return false; + } + if (cpu_start >= cpu_end) { + log_printf(LOG_ERR, "invalid CPU range: %u-%u", cpu_start, cpu_end); + return false; + } + /* first check border to increase efficiency */ + if (!check_and_set_cpuid(domain, cpu_start) || !check_and_set_cpuid(domain, cpu_end)) { + return false; + } + + for (i = cpu_start + 1; i < cpu_end; i++) { + if (!check_and_set_cpuid(domain, i)) { + return false; + } + } + return true; + } + + log_printf(LOG_ERR, "DOMAIN config illegal, check %s.", domain_value); + return false; +} + +static bool get_domain_cpuid_comma(cpu_domain *domain, char *domain_value, unsigned int size) +{ + unsigned int cpu = 0; + char *p_cpu = NULL; + char *p_save = NULL; + + if (size == 0) { + return false; + } + + p_cpu = strtok_r(domain_value, ",", &p_save); + if (p_cpu == NULL) { + log_printf(LOG_ERR, "DOMAIN config illegal, check %s.", domain_value); + return false; + } + + while (p_cpu != NULL) { + /* contains X-Y */ + if (strstr(p_cpu, "-") != NULL) { + if (!get_domain_cpuid_dash(domain, p_cpu, (unsigned int)strlen(p_cpu))) { + return false; + } + p_cpu = strtok_r(NULL, ",", &p_save); + continue; + } + /* only contains N1,N2 */ + if (!parse_value_int("DOMAIN", p_cpu, &cpu) || !check_and_set_cpuid(domain, cpu)) { + return false; + } + p_cpu = strtok_r(NULL, ",", &p_save); + } + return true; +} + +static bool regs_check_domain(const char *domain_value) +{ + regex_t reg; + int flags = REG_EXTENDED; + const char *pattern = "^[0-9]+(-[0-9]+)?(,[0-9]+(-[0-9]+)?)*$"; + bool ret = true; + + if (regcomp(®, pattern, flags)) { + return false; + } + + if (regexec(®, domain_value, 0, NULL, 0)) { + log_printf(LOG_ERR, "DOMAIN config illegal, check %s.", domain_value); + ret = false; + } + + regfree(®); + return ret; +} + +static bool get_domain_cpuid(cpu_domain *domain, char *domain_value, unsigned int size) +{ + int ret = 0; + + ret = strncpy_s((char *)domain->desc, DOMAIN_DESC_LEN, domain_value, DOMAIN_DESC_LEN - 1); + if (ret != 0) { + log_printf(LOG_ERR, "get_domain_cpuid: strncpy_s domain_value failed, ret: %d", ret); + return false; + } + + domain->cpu_num = 0; + if (regs_check_domain(domain_value) == false) { + return false; + } + + /* parse format "N1,N2,N3,X-Y" */ + return get_domain_cpuid_comma(domain, domain_value, size); +} + +static bool get_domain_alarm(cpu_domain *domain, const char *config) +{ + char key[FLOAT_VALUE_LEN] = {0}; + int ret = 0; + + ret = get_string(config, "ALARM=\"", key, sizeof(key), "ALARM"); + if (ret > 0) { + domain->alarm_value = ALARM_RATIO_DEFAULT; + return true; + } + if (ret < 0) { + return false; + } + + if (!parse_value_float(NULL, key, &(domain->alarm_value))) { + log_printf(LOG_ERR, "invalid CPU alarm value: %s", key); + return false; + } + return true; +} + +static bool get_domain_resume(cpu_domain *domain, const char *config) +{ + char key[FLOAT_VALUE_LEN] = {0}; + int ret = 0; + + ret = get_string(config, "RESUME=\"", key, sizeof(key), "RESUME"); + if (ret > 0) { + domain->resume_value = RESUME_RATIO_DEFAULT; + return true; + } + if (ret < 0) { + return false; + } + + if (!parse_value_float(NULL, key, &(domain->resume_value))) { + log_printf(LOG_ERR, "invalid CPU resume value: %s", key); + return false; + } + return true; +} + +static bool parse_domainline(char *domain_value, unsigned int size, cpu_domain *domain, const char *config) +{ + if (get_domain_cpuid(domain, domain_value, size) == false) { + return false; + } + + if (get_domain_alarm(domain, config) == false) { + return false; + } + + if (get_domain_resume(domain, config) == false) { + return false; + } + + if (domain->alarm_value < 0 || domain->alarm_value > COMMON_ALARM_MAX || domain->resume_value < 0 || + domain->resume_value > COMMON_RESUME_MAX || domain->resume_value >= domain->alarm_value) { + log_printf(LOG_ERR, "invalid CPU alarm/resume value: %4.1f%%,%4.1f%%", domain->alarm_value, + domain->resume_value); + return false; + } + + /* keep remaining domain status before reload */ + cpu_domain *t = g_domain_head; + while (t != NULL) { + if (memcmp(domain->cpus, t->cpus, DOMAIN_CPU_LEN) == 0) { + domain->status = t->status; + domain->cpustat = t->cpustat; + domain->first_collected = t->first_collected; + domain->broken = t->broken; + break; + } + t = t->next; + } + + return true; +} + +static bool parse_domain(const char *config, char *domain_value, unsigned int size, int type) +{ + int ret; + cpu_domain domain_tmp; + + if (type != CPU) { + return false; + } + + ret = memset_s(&domain_tmp, sizeof(domain_tmp), 0, sizeof(domain_tmp)); + if (ret != 0) { + log_printf(LOG_ERR, "parse_domain: memset_s domain_tmp failed, ret: %d", ret); + return false; + } + + if (!parse_domainline(domain_value, size, &domain_tmp, config)) { + return false; + } + + return domain_add(&domain_tmp, &g_new_domain_list); +} + +static const struct item_value_func g_item_array[] = { + { "ALARM", parse_alarm }, + { "MONITOR_PERIOD", parse_monitor_period }, + { "PERIOD", parse_monitor_period }, + { "RESUME", parse_resume }, + { "STAT_PERIOD", parse_stat_period }, + { "SYS_FD_ALARM", parse_alarm }, + { "SYS_FD_RESUME", parse_resume }, + { "SYS_FD_PERIOD", parse_monitor_period }, + { "ALARM_RATIO", parse_alarm_ratio }, + { "RESUME_RATIO", parse_resume_ratio }, + { "SHOW_TOP_PROC_NUM", parse_threads_top_num }, + { "REPORT_COMMAND", parse_report_command } +}; + +static bool parse_line(const char *config, int type) +{ + char item[ITEM_LEN] = {0}; + char value[MAX_CONFIG] = {0}; + char *ptr = NULL; + unsigned int size; + int ret; + unsigned int i; + + while (*config == ' ' || *config == '\t') { + config++; + } + + if (*config == '#') { + return true; + } + + ptr = strstr(config, "=\""); + if (ptr != NULL) { + size = (unsigned int)(ptr - config); + if (size >= sizeof(item)) { + log_printf(LOG_ERR, "parse_line: item length(%u) too long(>%u).", size, sizeof(item)); + return false; + } + ret = strncpy_s(item, sizeof(item), config, size); + if (ret != 0) { + log_printf(LOG_ERR, "parse_line: strncpy_s item failed, ret: %d", ret); + return false; + } + get_value(config, size, value, sizeof(value)); + if (!strlen(value)) { + return true; + } + for (i = 0; i < array_size(g_item_array); i++) { + if (strcmp(item, g_item_array[i].item) == 0 && g_item_array[i].func != NULL) { + return g_item_array[i].func(item, value, type); + } + } + if (strcmp(item, "DOMAIN") == 0) { + return parse_domain(config, value, sizeof(value), type); + } + } + return true; +} + +static bool parse_cpu_line(const char *config) +{ + return parse_line(config, CPU); +} + +static bool parse_mem_line(const char *config) +{ + return parse_line(config, MEM); +} + +static bool parse_sysfd_line(const char *config) +{ + return parse_line(config, SYSTEM_FDCNT); +} + +static bool parse_pscnt_line(const char *config) +{ + return parse_line(config, PSCNT); +} + +static bool check_config_common(int type) +{ + if (g_system_monitor_info[type].alarm_value < 0 || + g_system_monitor_info[type].alarm_value > COMMON_ALARM_MAX || + g_system_monitor_info[type].resume_value < 0 || + g_system_monitor_info[type].resume_value > COMMON_RESUME_MAX || + g_system_monitor_info[type].resume_value >= g_system_monitor_info[type].alarm_value || + g_system_monitor_info[type].monitor_period == 0) { + return false; + } + return true; +} + +static void log_item_info(const char *item, bool config_ok) +{ + if (config_ok == false) { + log_printf(LOG_INFO, "%s monitor: configuration illegal", item); + } else { + log_printf(LOG_INFO, "%s monitor starting up", item); + } +} + +static bool check_cpu_config(bool parse_ok) +{ + bool ret = parse_ok && check_config_common(CPU) && (g_system_monitor_info[CPU].stat_period != 0); + + log_item_info("cpu", ret); + return ret; +} + +static bool check_mem_config(bool parse_ok) +{ + bool ret = parse_ok && check_config_common(MEM); + + log_item_info("memory", ret); + return ret; +} + +static bool check_pscnt_ratio(void) +{ + if (g_pscnt_alarm_ratio < 0 || g_pscnt_alarm_ratio > COMMON_ALARM_MAX || g_pscnt_resume_ratio < 0 || + g_pscnt_resume_ratio > COMMON_RESUME_MAX || g_pscnt_resume_ratio >= g_pscnt_alarm_ratio) { + return false; + } + return true; +} + +static bool check_pscnt_config(bool parse_ok) +{ + if (parse_ok == false || + g_system_monitor_info[PSCNT].alarm_value < 0 || + g_system_monitor_info[PSCNT].resume_value < 0 || + g_system_monitor_info[PSCNT].resume_value >= g_system_monitor_info[PSCNT].alarm_value || + g_system_monitor_info[PSCNT].monitor_period == 0 || !check_pscnt_ratio() || + g_pscnt_threads_top_num > TOP_PROCESS_THREADS_NUM_MAX) { + log_item_info("process count", false); + return false; + } + log_item_info("process count", true); + return true; +} + +static void set_default_sysfd_config(void) +{ + g_system_monitor_info[SYSTEM_FDCNT].alarm_value = SYSFD_ALARM_VALUE; + g_system_monitor_info[SYSTEM_FDCNT].resume_value = SYSFD_RESUME_VALUE; + g_system_monitor_info[SYSTEM_FDCNT].monitor_period = SYSFD_MONITOR_PERIOD; +} + +static bool check_sysfd_config(bool parse_ok) +{ + bool ret = false; + + ret = parse_ok && check_config_common(SYSTEM_FDCNT); + if (ret == false) { + set_default_sysfd_config(); + log_printf(LOG_INFO, "[error]system fd num monitor: configuration illegal,use default value"); + } + log_item_info("system fd num", true); + return true; +} + +static const struct config_parse_func g_config_func[SYSTEM_MONITOR_ITEM_CNT] = { + { "/etc/sysmonitor/cpu", parse_cpu_line, check_cpu_config }, + { "/etc/sysmonitor/memory", parse_mem_line, check_mem_config }, + { "/etc/sysmonitor/pscnt", parse_pscnt_line, check_pscnt_config }, + { "/etc/sysmonitor/sys_fd_conf", parse_sysfd_line, check_sysfd_config } +}; + +static void parse_sy_resources_config(void) +{ + unsigned int i; + bool ret = false; + + for (i = 0; i < array_size(g_config_func); i++) { + if (g_system_monitor_info[i].monitor == false) { + continue; + } + ret = parse_config(g_config_func[i].config_file, g_config_func[i].parse_line_func); + g_system_monitor_info[i].config_ok = g_config_func[i].check_config(ret); + } +} + +static bool get_single_cpu_stat(cpu_domain *domain, unsigned int cpu) +{ + int ret = 0; + int tmp_cpu = 0; + unsigned long long tmp_u, tmp_n, tmp_s, tmp_i, tmp_w, tmp_x, tmp_y, tmp_z; + char buf[SMLBUFSIZ] = {0}; + char cmd[MAX_TEMPSTR] = {0}; + + tmp_u = tmp_n = tmp_s = tmp_i = tmp_w = tmp_x = tmp_y = tmp_z = 0; + ret = snprintf_s(cmd, sizeof(cmd), sizeof(cmd) - 1, "cat /proc/stat | grep -w cpu%u", cpu); + if (ret < 0) { + log_printf(LOG_ERR, "get_single_cpu_stat: snprintf_s cmd failed, ret: %d", ret); + return false; + } + ret = monitor_popen(cmd, buf, sizeof(buf) - 1, POPEN_TIMEOUT, NULL); + if (ret != 0) { + log_printf(LOG_WARNING, "failed to read CPU %d stats, check cpu state", cpu); + return false; + } + ret = sscanf_s(buf, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu", &tmp_cpu, + &tmp_u, &tmp_n, &tmp_s, &tmp_i, &tmp_w, &tmp_x, &tmp_y, &tmp_z); + if (ret <= 0) { + log_printf(LOG_WARNING, "failed to read CPU %d stats, check cpu state", cpu); + return false; + } + + domain->cpustat.u += tmp_u; + domain->cpustat.n += tmp_n; + domain->cpustat.s += tmp_s; + domain->cpustat.i += tmp_i; + domain->cpustat.w += tmp_w; + domain->cpustat.x += tmp_x; + domain->cpustat.y += tmp_y; + domain->cpustat.z += tmp_z; + return true; +} + +static bool cpus_refresh_domain(cpu_domain *domain) +{ + unsigned int index, offset, cpu; + unsigned int nprocs = 0; + unsigned int num = 0; + int tmp = get_nprocs_conf(); + + domain->cpustat.u = domain->cpustat.n = domain->cpustat.s = domain->cpustat.i = 0; + domain->cpustat.w = domain->cpustat.x = domain->cpustat.y = domain->cpustat.z = 0; + + if (tmp > 0) { + nprocs = (unsigned int)tmp; + } + + for (cpu = 0; cpu < nprocs; cpu++) { + if (num >= domain->cpu_num) { + break; + } + + index = cpu / CHAR_BITS; + if (index >= DOMAIN_CPU_LEN) { + break; + } + + offset = CHAR_BITS - cpu % CHAR_BITS - 1; + if (!((domain->cpus[index] >> offset) & 1)) { + continue; + } + + num++; + if (get_single_cpu_stat(domain, cpu) == false) { + domain->broken = true; + return false; + } + } + + domain->broken = false; + return true; +} + +static bool cpus_refresh(str_cpu *cpus) +{ + FILE *fp = NULL; + int num; + char buf[SMLBUFSIZ] = {0}; + + fp = fopen("/proc/stat", "r"); + if (fp == NULL) { + log_printf(LOG_ERR, "failed /proc/stat open [%d]", errno); + return false; + } + + rewind(fp); + (void)fflush(fp); + + if (!fgets(buf, sizeof(buf), fp)) { + log_printf(LOG_ERR, "failed /proc/stat read [%d]", errno); + (void)fclose(fp); + return false; + } + num = sscanf_s(buf, "cpu %llu %llu %llu %llu %llu %llu %llu %llu", + &cpus->u, &cpus->n, &cpus->s, &cpus->i, &cpus->w, &cpus->x, &cpus->y, &cpus->z); + if (num <= 0) { + log_printf(LOG_INFO, "failed /proc/stat read"); + (void)fclose(fp); + return false; + } + (void)fclose(fp); + return true; +} + +static float get_usage_percent(str_cpu *cpu) +{ + long long u_frme, s_frme, n_frme, i_frme, w_frme, x_frme, y_frme, z_frme, tot_frme, tz; + + u_frme = (long long)(cpu->u - cpu->u_sav); + s_frme = (long long)(cpu->s - cpu->s_sav); + n_frme = (long long)(cpu->n - cpu->n_sav); + i_frme = trimz(cpu->i - cpu->i_sav); + w_frme = (long long)(cpu->w - cpu->w_sav); + x_frme = (long long)(cpu->x - cpu->x_sav); + y_frme = (long long)(cpu->y - cpu->y_sav); + z_frme = (long long)(cpu->z - cpu->z_sav); + tot_frme = u_frme + s_frme + n_frme + i_frme + w_frme + x_frme + y_frme + z_frme; + if (tot_frme < 1) { + tot_frme = 1; + } + + /* remember for next time around */ + cpu->u_sav = cpu->u; + cpu->s_sav = cpu->s; + cpu->n_sav = cpu->n; + cpu->i_sav = cpu->i; + cpu->w_sav = cpu->w; + cpu->x_sav = cpu->x; + cpu->y_sav = cpu->y; + cpu->z_sav = cpu->z; + + return (float)(tot_frme - i_frme) / (float)tot_frme * 100.0; +} + +static void handle_cpu_alarm(float usage, bool alarm, const cpu_domain *domain) +{ + char cpu_info[MAX_TEMPSTR]; + if (domain == NULL) { + snprintf_s(cpu_info, sizeof(cpu_info), sizeof(cpu_info) - 1, + "CPU usage"); + } else { + snprintf_s(cpu_info, sizeof(cpu_info), sizeof(cpu_info) - 1, + "CPU %s usage", domain->desc); + } + + if (alarm) { + log_printf(LOG_WARNING, "%s alarm: %4.1f%%", cpu_info, usage); + } else { + log_printf(LOG_INFO, "%s resume: %4.1f%%", cpu_info, usage); + } +} + +static void process_cpu_usage(float usage, bool thread_start, cpu_domain *domain) +{ + float alarm; + float resume; + bool *status = NULL; + int ret = 0; + + if (domain == NULL) { + alarm = g_system_monitor_info[CPU].alarm_value; + resume = g_system_monitor_info[CPU].resume_value; + status = &g_system_monitor_info[CPU].status; + } else { + alarm = domain->alarm_value; + resume = domain->resume_value; + status = &domain->status; + } + + if (usage >= alarm && *status == false) { + handle_cpu_alarm(usage, true, domain); + if (strlen(g_cpu_report_cmd) && g_has_reported_flag == false) { + ret = monitor_cmd(DEFAULT_USER_ID, g_cpu_report_cmd, REPORT_CMD_TIMEOUT, NULL, false); + if (ret == 0) { + log_printf(LOG_INFO, "cpu monitor: execute REPORT_COMMAND[%s] successfully", g_cpu_report_cmd); + } else { + log_printf(LOG_ERR, "cpu monitor: execute REPORT_COMMAND[%s] failed", g_cpu_report_cmd); + } + g_has_reported_flag = true; + } + *status = true; + } else if ((usage <= resume && *status == true) || (usage <= resume && thread_start)) { + handle_cpu_alarm(usage, false, domain); + *status = false; + } +} + +static void process_domain_cpustat_first(void) +{ + cpu_domain *t = NULL; + + for (t = g_domain_head; t != NULL; t = t->next) { + if (!cpus_refresh_domain(t)) { + continue; + } + (void)get_usage_percent(&t->cpustat); + t->first_collected = true; + } + g_monitor_domain_flag = true; +} + +static void process_domain_cpustat_second(bool thread_start) +{ + float usage; + cpu_domain *t = NULL; + + for (t = g_domain_head; t != NULL; t = t->next) { + if (t->broken || !cpus_refresh_domain(t)) { + t->first_collected = false; + log_printf(LOG_WARNING, "skip monitor on CPU %s", t->desc); + continue; + } + /* skip if cpustat not collected in the first refresh */ + if (t->first_collected == false) { + continue; + } + + t->first_collected = false; + usage = get_usage_percent(&t->cpustat); + process_cpu_usage(usage, thread_start, t); + } +} + +static bool process_global_cpustat_first(str_cpu *cpus) +{ + int ret = 0; + + g_monitor_domain_flag = false; + ret = memset_s(cpus, sizeof(str_cpu), 0, sizeof(str_cpu)); + if (ret != 0) { + log_printf(LOG_ERR, "process_global_cpustat_first: memset_s cpus failed, ret: %d", ret); + return false; + } + if (!cpus_refresh(cpus)) { + return false; + } + (void)get_usage_percent(cpus); + return true; +} + +static bool process_global_cpustat_second(str_cpu *cpus, bool thread_start) +{ + float usage; + + if (!cpus_refresh(cpus)) { + return false; + } + usage = get_usage_percent(cpus); + process_cpu_usage(usage, thread_start, NULL); + return true; +} + +static void monitor_cpu(void) +{ + static bool thread_start = true; + static unsigned int stat_count = 0; + static str_cpu cpus = {0}; + + /* when monitor mode changes, make sure go into first refresh */ + if ((g_monitor_domain_flag == true && g_domain_head == NULL) || + (g_monitor_domain_flag == false && g_domain_head != NULL)) { + stat_count = 0; + } + + /* first refresh cpustat */ + if (stat_count == 0) { + if (g_domain_head != NULL) { + process_domain_cpustat_first(); + } else { + if (process_global_cpustat_first(&cpus) == false) { + return; + } + } + } + + if ((stat_count++) * g_sys_res_period < g_system_monitor_info[CPU].stat_period) { + return; + } + stat_count = 0; + + /* second refresh cpustat and get usage */ + if (g_domain_head != NULL) { + process_domain_cpustat_second(thread_start); + } else { + if (process_global_cpustat_second(&cpus, thread_start) == false) { + return; + } + } + thread_start = false; + g_has_reported_flag = false; +} + +struct mem_info_table { + const char *name; + unsigned long *count; +}; + +static struct mem_info_table g_meminfo_table[] = { + { "Buffers", &g_mem_info.buffers }, + { "Cached", &g_mem_info.cached }, + { "MemFree", &g_mem_info.free }, + { "MemTotal", &g_mem_info.total }, + { "SReclaimable", &g_mem_info.sreclaimable }, + { "Shmem", &g_mem_info.shmem } +}; + +static int compare_mem_table_structs(const void *a, const void *b) +{ + return strcmp(((const struct mem_info_table*)a)->name, ((const struct mem_info_table*)b)->name); +} + +static int get_mem_info(void) +{ + int fd = -1; + char out_buf[MEM_INFO_BUFFER] = {0}; + char namebuf[ITEM_LEN]; + char *head = NULL; + char *tail = NULL; + struct mem_info_table *found = NULL; + struct mem_info_table findme = { namebuf, NULL }; + int ret; + ssize_t read_ret; + + fd = open("/proc/meminfo", O_RDONLY); + if (fd == -1) { + log_printf(LOG_ERR, "get_mem_info: open /proc/meminfo failed, errno[%d]", errno); + return -1; + } + + (void)lseek(fd, 0, SEEK_SET); + + read_ret = read(fd, out_buf, sizeof(out_buf) - 1); + if (read_ret < 0) { + log_printf(LOG_ERR, "get_mem_info: read /proc/meminfo failed, rrno[%d]", errno); + (void)close(fd); + return -1; + } + out_buf[read_ret] = '\0'; + + head = out_buf; + for (;;) { + tail = strchr(head, ':'); + if (tail == NULL) { + break; + } + *tail = '\0'; + if (strlen(head) > sizeof(namebuf)) { + head = tail + 1; + goto nextline; + } + ret = strcpy_s(namebuf, sizeof(namebuf) - 1, head); + if (ret != 0) { + log_printf(LOG_ERR, "get_mem_info: strcpy_s namebuf failed, errno[%d]", errno); + (void)close(fd); + return -1; + } + found = bsearch(&findme, g_meminfo_table, array_size(g_meminfo_table), + sizeof(struct mem_info_table), compare_mem_table_structs); + head = tail + 1; + if (found != NULL) { + *(found->count) = (unsigned long)strtoull(head, &tail, STRTOULL_NUMBER_BASE); + } +nextline: + tail = strchr(head, '\n'); + if (tail == NULL) { + break; + } + head = tail + 1; + } + + if (fd >= 0) { + (void)close(fd); + } + return 0; +} + +static void sysrq_show_memory_info(void) +{ + int ret; + char cmd[MAX_CONFIG] = {0}; + + ret = snprintf_s(cmd, MAX_CONFIG, MAX_CONFIG - 1, "echo m > %s", PROC_SYSRQ_TRIGGER); + if (ret == -1) { + log_printf(LOG_ERR, "sysrq_show_memory_info: snprintf_s failed"); + return; + } + + ret = monitor_cmd(DEFAULT_USER_ID, cmd, POPEN_TIMEOUT, NULL, true); + if (ret != 0) { + log_printf(LOG_ERR, "sysrq_show_memory_info: monitor_cmd failed"); + return; + } + log_printf(LOG_INFO, "sysrq show memory info in message."); +} + +static void show_memory_info(void) +{ + FILE *fp = NULL; + char buf[MAX_CONFIG] = {0}; + + fp = fopen("/proc/meminfo", "r"); + if (fp == NULL) { + log_printf(LOG_ERR, "show_memory_info: fopen /proc/meminfo error [%d]", errno); + return; + } + log_printf(LOG_INFO, "---------------show /proc/meminfo: ---------------"); + while (fgets(buf, MAX_CONFIG - 1, fp)) { + log_printf(LOG_INFO, "%s", buf); + } + + (void)fclose(fp); + log_printf(LOG_INFO, "---------------show_memory_info end.---------------"); +} + +static void handle_memory_alarm(float usage, bool alarm) +{ + + if (alarm) { + log_printf(LOG_INFO, "memory usage alarm: %4.1f%%", usage); + } else { + log_printf(LOG_INFO, "memory usage resume: %4.1f%%", usage); + } + + if (alarm) { + show_memory_info(); + sysrq_show_memory_info(); + } +} + +static void handle_memory_usage(float usage, bool thread_start) +{ + if (usage >= g_system_monitor_info[MEM].alarm_value && g_system_monitor_info[MEM].status == false) { + handle_memory_alarm(usage, true); + g_system_monitor_info[MEM].status = true; + } else if ((usage <= g_system_monitor_info[MEM].resume_value && g_system_monitor_info[MEM].status == true) || + (usage <= g_system_monitor_info[MEM].resume_value && thread_start)) { + handle_memory_alarm(usage, false); + g_system_monitor_info[MEM].status = false; + } +} + +/* + * memory usage monitor + * get three times of usage and calculate average usage + */ +static void monitor_memory(void) +{ + static float usage = 0.0; + static int times = 0; + static bool thread_start = true; + int ret; + + ret = memset_s(&g_mem_info, sizeof(struct mem_info), 0, sizeof(struct mem_info)); + if (ret != 0) { + log_printf(LOG_ERR, "memset_s meminfo failed, ret: %d", ret); + return; + } + + ret = get_mem_info(); + if (ret != 0) { + return; + } + + if (g_mem_info.total == 0) { + log_printf(LOG_INFO, "get total memory failed."); + return; + } + + usage += (float)(g_mem_info.total - g_mem_info.free - g_mem_info.cached - + g_mem_info.sreclaimable - g_mem_info.buffers + g_mem_info.shmem) * 100 / (float)g_mem_info.total; + + times++; + if (times < MEM_STAT_TIMES) { + return; + } + usage /= MEM_STAT_TIMES; + times = 0; + + handle_memory_usage(usage, thread_start); + thread_start = false; + usage = 0.0; +} + +static int get_item_from_proc_file(const char *file, unsigned long *result) +{ + char cnt_buf[MAX_TEMPSTR] = {0}; + FILE *fp = NULL; + + fp = fopen(file, "r"); + if (fp == NULL) { + log_printf(LOG_ERR, "open %s failed, errno[%d]", file, errno); + return -1; + } + + rewind(fp); + (void)fflush(fp); + + if (fgets(cnt_buf, sizeof(cnt_buf), fp) == NULL) { + (void)fclose(fp); + log_printf(LOG_ERR, "read %s failed", file); + return -1; + } + + *result = strtoul(cnt_buf, NULL, 0); + (void)fclose(fp); + return 0; +} + +static unsigned long get_process_use_threads_cnt(const char *dir) +{ + DIR *dir_tmp = NULL; + struct dirent *direntp = NULL; + unsigned long num = 0; + struct stat sb; + + if (stat(dir, &sb) < 0) { + return 0; + } + if (!S_ISDIR(sb.st_mode)) { + return 0; + } + + /* if dno't monitor threads, return 1 for counting process num */ + if (g_pscnt_threads_top_num == 0) { + return PSCNT_COUNT_FOR_PROCESS; + } + + dir_tmp = opendir(dir); + if (dir_tmp == NULL) { + return 0; + } + + for (;;) { + direntp = readdir(dir_tmp); + if (direntp == NULL) { + break; + } + /* check int to exclude directory . and .. */ + if (check_int(direntp->d_name) == false) { + continue; + } + num++; + } + (void)closedir(dir_tmp); + return num; +} + +/* + * create for g_top_process_threads by size size, and free by using free_top_process_threads + */ +static void create_top_process_threads(unsigned int size) +{ + int ret; + + /* no need to maloc repeatedly */ + if (!g_pscnt_threads_create_flag) { + return; + } + if (g_top_process_threads != NULL) { + log_printf(LOG_ERR, "top process threads g_top_process_threads is not null, so return."); + return; + } + + if (size == 0 || size > TOP_PROCESS_THREADS_NUM_MAX) { + log_printf(LOG_ERR, "create top process threads size %d is error.", size); + return; + } + + g_top_process_threads = malloc(sizeof(ps_threads_info) * size); + if (g_top_process_threads == NULL) { + log_printf(LOG_ERR, "top process threads malloc error."); + return; + } + ret = memset_s(g_top_process_threads, sizeof(ps_threads_info) * size, 0, sizeof(ps_threads_info) * size); + if (ret != 0) { + log_printf(LOG_ERR, "top process threads memset_s error."); + free(g_top_process_threads); + g_top_process_threads = NULL; + return; + } + g_pscnt_threads_create_flag = false; + return; +} + +static void get_top_process_threads(const char *pid, unsigned long num, + unsigned int process_threads_num) +{ + int ret; + unsigned int i; + unsigned int j; + size_t len; + + create_top_process_threads(g_pscnt_threads_top_num); + if (g_top_process_threads == NULL) { + log_printf(LOG_ERR, "get top process threads is null."); + return; + } + + if (process_threads_num < 1) { + log_printf(LOG_ERR, "process_threads_num %u is error.", process_threads_num); + return; + } + + if (num < g_top_process_threads[process_threads_num - 1].num) { + return; + } + + ps_threads_info info; + ret = memset_s(&info, sizeof(info), 0, sizeof(info)); + if (ret != 0) { + log_printf(LOG_ERR, "get top process threads memset_s error [%d]", ret); + return; + } + len = sizeof(info.cmd); + get_ps_cmd(info.cmd, pid, len); + info.num = num; + ret = strcpy_s(info.pid, sizeof(info.pid) - 1, pid); + if (ret != 0) { + log_printf(LOG_ERR, "get top process threads strcpy_s error [%d]", ret); + return; + } + + for (i = 0; i < process_threads_num; ++i) { + if (info.num > g_top_process_threads[i].num) { + for (j = process_threads_num - 1; j > i; --j) { + g_top_process_threads[j] = g_top_process_threads[j - 1]; + } + g_top_process_threads[i] = info; + break; + } + } +} + +static int get_threads_for_count(const char *name, bool get_top_flag, unsigned long *count_threads_tmp) +{ + unsigned long count_threads; + char path[MAX_TEMPSTR] = {0}; + int ret; + + ret = snprintf_s(path, MAX_TEMPSTR, MAX_TEMPSTR - 1, "/proc/%s/task", name); + if (ret == -1) { + log_printf(LOG_ERR, "get threads: snprintf_s path failed, errno: %d", errno); + return PSCNT_RET_ERROR; + } + + count_threads = get_process_use_threads_cnt(path); + if (count_threads == 0) { + return PSCNT_RET_CONTINUE; + } + + if (get_top_flag) { + get_top_process_threads(name, count_threads, g_pscnt_threads_top_num); + } + + *count_threads_tmp = count_threads; + return PSCNT_RET_TRUE; +} + +/* + * get process count and threads count from /proc/xxx/task/ + * read /proc/xxx/task/ count for process count and count for dir, which name is number for threads count + */ +static int get_process_and_threads_count(unsigned long *result_process, unsigned long *result_threads, + bool get_top_threads_flag) +{ + struct dirent *direntp = NULL; + DIR *dir = NULL; + unsigned long count_process = 0; + unsigned long count_threads_sum = 0; + unsigned long count_threads_tmp = 0; + int ret; + + dir = opendir("/proc"); + if (dir == NULL) { + log_printf(LOG_ERR, "open /proc failed"); + return -1; + } + + for (;;) { + direntp = readdir(dir); + if (direntp == NULL) { + break; + } + if (check_int(direntp->d_name) == false) { + continue; + } + + ret = get_threads_for_count(direntp->d_name, get_top_threads_flag, &count_threads_tmp); + if (ret == PSCNT_RET_ERROR) { + (void)closedir(dir); + return -1; + } else if (ret == PSCNT_RET_CONTINUE) { + continue; + } else { + count_threads_sum += count_threads_tmp; + /* calculate process count when get threads return true */ + count_process++; + } + } + *result_process = count_process; + *result_threads = count_threads_sum; + (void)closedir(dir); + return 0; +} + +static void update_alarm_value(unsigned long cnt_max, unsigned long *alarm, float set_alarm_value, + float alarm_ratio) +{ + float alarm_value = (float)cnt_max * alarm_ratio / RATIO; + + if (set_alarm_value >= alarm_value) { + *alarm = (unsigned long)set_alarm_value; + } else { + *alarm = (unsigned long)alarm_value; + } +} + +static void update_resume_value(unsigned long cnt_max, unsigned long *resume, float set_resume_value, + float resume_ratio) +{ + float resume_value = (float)cnt_max * resume_ratio / RATIO; + + if (set_resume_value >= resume_value) { + *resume = (unsigned long)set_resume_value; + } else { + *resume = (unsigned long)resume_value; + } +} + +static void handle_pscnt_and_threads_alarm(unsigned long cnt, bool alarm, const char *str, unsigned short alarmid) +{ + if (alarm) { + log_printf(LOG_WARNING, "%s alarm: %lu", str, cnt); + } else { + log_printf(LOG_INFO, "%s resume: %lu", str, cnt); + } +} + +static void ps_show_sysfd_info(const char *alarm_msg) +{ + unsigned long cnt = 0; + unsigned long max_fd_num = 0; + int ret; + + ret = get_file_nr(&cnt, &max_fd_num); + if (ret != 0 || cnt == 0 || max_fd_num == 0) { + return; + } + + log_printf(LOG_INFO, "%s, show sys fd count: %lu", alarm_msg, cnt); +} + +static void ps_show_mem_info(const char *alarm_msg) +{ + log_printf(LOG_INFO, "%s, show mem info", alarm_msg); + show_memory_info(); +} + +static void process_pscnt_usage(unsigned long cnt, unsigned long alarm, unsigned long resume, bool thread_start) +{ + if (cnt >= alarm && g_system_monitor_info[PSCNT].status == false) { + log_printf(LOG_INFO, "---------------process count alarm start: ---------------"); + handle_pscnt_and_threads_alarm(cnt, true, "process count", PSCNT_ABNORMAL); + ps_show_sysfd_info("process count alarm"); + ps_show_mem_info("process count alarm"); + log_printf(LOG_INFO, "---------------process count alarm end. ---------------"); + g_system_monitor_info[PSCNT].status = true; + } else if ((cnt <= resume && g_system_monitor_info[PSCNT].status == true) || + (cnt <= resume && thread_start)) { + handle_pscnt_and_threads_alarm(cnt, false, "process count", PSCNT_ABNORMAL); + g_system_monitor_info[PSCNT].status = false; + } +} + +static void free_top_process_threads(void) +{ + if (g_top_process_threads != NULL) { + free(g_top_process_threads); + g_top_process_threads = NULL; + g_pscnt_threads_create_flag = true; + } +} + + +static void print_top_threads(unsigned int process_threads_num) +{ + unsigned int i; + int ret; + char tmp_buf[FD_TMP_BUF + 1] = {0}; + unsigned int zero_num = 0; + unsigned int print_num = 0; + if (g_top_process_threads == NULL) { + log_printf(LOG_ERR, "print top threads is null."); + return; + } + + for (i = 0; i < process_threads_num; i++) { + ret = memset_s(tmp_buf, sizeof(tmp_buf), 0, sizeof(tmp_buf)); + if (ret != 0) { + log_printf(LOG_ERR, "print top threads memset_s error [%d]", ret); + continue; + } + /* if top process num in config is bigger than process num on device, need to ignore useless processes count */ + if (g_top_process_threads[i].num == 0) { + zero_num++; + continue; + } + ret = snprintf_s(tmp_buf, sizeof(tmp_buf), sizeof(tmp_buf) - 1, + "open threads most %u processes is:[top%u:pid=%s,openthreadsnum=%lu,cmd=%s]", + process_threads_num, i + 1, g_top_process_threads[i].pid, g_top_process_threads[i].num, + g_top_process_threads[i].cmd); + if (ret < 0) { + log_printf(LOG_ERR, "print top threads snprintf_s error [%d]", ret); + continue; + } + print_num++; + log_printf(LOG_INFO, "%s", tmp_buf); + } + + if (zero_num > 0) { + log_printf(LOG_INFO, "print top threads: total set num:%u, actual print num:%u, ignore useless num:%u.", + process_threads_num, print_num, zero_num); + } +} + +/* print top threads info, and free g_pscnt_threads_top_num after print */ +static void print_threads_info(void) +{ + print_top_threads(g_pscnt_threads_top_num); + free_top_process_threads(); +} + +static void ps_show_process_cnt(unsigned long cnt_process) +{ + log_printf(LOG_INFO, "threads count alarm, show process count %lu", cnt_process); +} +static void ps_threads_usage(unsigned long cnt, unsigned long alarm, unsigned long resume, bool thread_start_flag) +{ + int ret; + unsigned long cnt_process = 0; + unsigned long cnt_threads = 0; + + if (cnt >= alarm && g_pscnt_threads_status_flag == false) { + log_printf(LOG_INFO, "---------------threads count alarm start: ---------------"); + handle_pscnt_and_threads_alarm(cnt, true, "threads count", PS_THREADS_ABNORMAL); + /* get and print threads alarm info */ + ret = get_process_and_threads_count(&cnt_process, &cnt_threads, true); + if (ret < 0 || cnt_process == 0 || cnt_threads == 0) { + log_printf(LOG_ERR, "ps threads usage error return ret:%d, cnt_process:%lu, cnt_threads:%lu", + ret, cnt_process, cnt_threads); + return; + } + print_threads_info(); + ps_show_process_cnt(cnt_process); + ps_show_sysfd_info("threads count alarm"); + ps_show_mem_info("threads count alarm"); + log_printf(LOG_INFO, "---------------threads count alarm end. ---------------"); + g_pscnt_threads_status_flag = true; + } else if ((cnt <= resume && g_pscnt_threads_status_flag == true) || + (cnt <= resume && thread_start_flag)) { + handle_pscnt_and_threads_alarm(cnt, false, "threads count", PS_THREADS_ABNORMAL); + g_pscnt_threads_status_flag = false; + } +} + +static void monitor_threads_cnt(unsigned long cnt_threads) +{ + unsigned long cnt_threads_max = 0; + unsigned long threads_alarm_bigger; + unsigned long threads_resume_bigger; + int ret; + static bool thread_start_flag = true; + + ret = get_item_from_proc_file("/proc/sys/kernel/threads-max", &cnt_threads_max); + if (ret < 0 || cnt_threads_max == 0) { + log_printf(LOG_ERR, "monitor threads cnt: get file error ret: %d, cnt_threads_max:%lu.", ret, cnt_threads_max); + return; + } + update_alarm_value(cnt_threads_max, &threads_alarm_bigger, g_system_monitor_info[PSCNT].alarm_value, + g_pscnt_alarm_ratio); + update_resume_value(cnt_threads_max, &threads_resume_bigger, g_system_monitor_info[PSCNT].resume_value, + g_pscnt_resume_ratio); + ps_threads_usage(cnt_threads, threads_alarm_bigger, threads_resume_bigger, thread_start_flag); + thread_start_flag = false; +} + +static void monitor_pscnt(void) +{ + static bool thread_start = true; + unsigned long cnt_process = 0; + unsigned long cnt_threads = 0; + unsigned long cnt_max = 0; + unsigned long alarm_bigger; + unsigned long resume_bigger; + int ret; + + ret = get_process_and_threads_count(&cnt_process, &cnt_threads, false); + if (ret < 0 || cnt_process == 0 || cnt_threads == 0) { + return; + } + + ret = get_item_from_proc_file("/proc/sys/kernel/pid_max", &cnt_max); + if (ret < 0 || cnt_max == 0) { + return; + } + + update_alarm_value(cnt_max, &alarm_bigger, g_system_monitor_info[PSCNT].alarm_value, + g_pscnt_alarm_ratio); + update_resume_value(cnt_max, &resume_bigger, g_system_monitor_info[PSCNT].resume_value, + g_pscnt_resume_ratio); + process_pscnt_usage(cnt_process, alarm_bigger, resume_bigger, thread_start); + thread_start = false; + /* monitor for threads cnt */ + if (g_pscnt_threads_top_num != 0) { + monitor_threads_cnt(cnt_threads); + } +} + +static unsigned long get_dirfilenum(const char *dir) +{ + DIR *dp = NULL; + struct dirent *entry = NULL; + struct stat statbuf; + unsigned long num = 0; + int ret; + char tmp_path[FD_PATH_MAX + 1] = {0}; + + ret = memset_s(&statbuf, sizeof(statbuf), 0, sizeof(statbuf)); + if (ret != 0) { + log_printf(LOG_ERR, "get_dirfilenum: memset_s statbuf failed, ret: %d.", ret); + return 0; + } + + dp = opendir(dir); + if (dp == NULL) { + return 0; + } + + for (;;) { + entry = readdir(dp); + if (entry == NULL) { + break; + } + + if (entry->d_type == DT_DIR) { + ret = memset_s(tmp_path, sizeof(tmp_path), 0, sizeof(tmp_path)); + if (ret != 0) { + log_printf(LOG_ERR, "monitor_io_delay memset_s error [%d]", ret); + break; + } + ret = snprintf_s(tmp_path, sizeof(tmp_path), sizeof(tmp_path) - 1, "%s/%s", dir, entry->d_name); + if (ret < 0) { + log_printf(LOG_ERR, "monitor_io_delay snprintf_s error [%d]", ret); + break; + } + continue; + } + + num++; + } + (void)closedir(dp); + return num; +} + +static void get_ps_cmd(char *cmd, const char *pid, size_t cmd_len) +{ + char cmd_line[PS_CMD_MAX + 1] = { 0 }; + int fd = -1; + char cmd_file[FD_PATH_MAX + 1] = { 0 }; + ssize_t len; + int ret; + + ret = snprintf_s(cmd_file, sizeof(cmd_file), sizeof(cmd_file) - 1, "/proc/%s/cmdline", pid); + if (ret < 0) { + log_printf(LOG_ERR, "get_ps_cmd snprintf_s error [%d]", ret); + return; + } + fd = open(cmd_file, O_RDONLY); + if (fd < 0) { + log_printf(LOG_ERR, "can't open %s", cmd_file); + return; + } + + len = read(fd, cmd_line, PS_CMD_MAX); + if (len == -1) { + log_printf(LOG_ERR, "get cmd from file [%s] failed", cmd_file); + (void)close(fd); + return; + } + + while (len > 0) { + if (((unsigned char)cmd_line[len - 1]) < ' ') { + cmd_line[len - 1] = ' '; + } + len--; + } + (void)close(fd); + cmd_line[PS_CMD_MAX] = '\0'; + ret = memset_s(cmd, cmd_len, '\0', cmd_len); + if (ret != 0) { + log_printf(LOG_ERR, "get_ps_cmd memset_s error [%d]", ret); + return; + } + ret = memcpy_s(cmd, cmd_len, cmd_line, strlen(cmd_line)); + if (ret != 0) { + log_printf(LOG_ERR, "get_ps_cmd memcpy_s error [%d]", ret); + } +} + +static void get_top_fd_info(ps_fd_info *top_fd, const char *pid, unsigned long num) +{ + int ret; + int i, j; + size_t len; + ps_fd_info ps_info; + + ret = memset_s(&ps_info, sizeof(ps_info), 0, sizeof(ps_info)); + if (ret != 0) { + log_printf(LOG_ERR, "get_top_fd_info memset_s error [%d]", ret); + return; + } + len = sizeof(ps_info.cmd); + get_ps_cmd(ps_info.cmd, pid, len); + ps_info.num = num; + ret = strcpy_s(ps_info.pid, sizeof(ps_info.pid) - 1, pid); + if (ret != 0) { + log_printf(LOG_ERR, "get_top_fd_info strcpy_s error [%d]", ret); + return; + } + + /* update top 3 processes list */ + if (ps_info.num < top_fd[TOPFD_PROCESS_NUM - 1].num) { + return; + } + for (i = 0; i < TOPFD_PROCESS_NUM; ++i) { + if (ps_info.num > top_fd[i].num) { + for (j = TOPFD_PROCESS_NUM - 1; j > i; --j) { + top_fd[j] = top_fd[j - 1]; + } + top_fd[i] = ps_info; + break; + } + } +} + +static void print_processes(const ps_fd_info top_fd[], unsigned int process_num) +{ + unsigned int i; + int ret; + char tmp_buf[FD_TMP_BUF + 1] = {0}; + + for (i = 0; i < process_num; i++) { + ret = memset_s(tmp_buf, sizeof(tmp_buf), 0, sizeof(tmp_buf)); + if (ret != 0) { + log_printf(LOG_ERR, "get_maxfd_process_info memset_s error [%d]", ret); + continue; + } + ret = snprintf_s(tmp_buf, sizeof(tmp_buf), sizeof(tmp_buf) - 1, + "open fd most three processes is:[top%u:pid=%s,openfdnum=%lu,cmd=%s]", + i + 1, top_fd[i].pid, top_fd[i].num, top_fd[i].cmd); + if (ret < 0) { + log_printf(LOG_ERR, "get_maxfd_process_info snprintf_s error [%d]", ret); + continue; + } + log_printf(LOG_INFO, "%s", tmp_buf); + } +} + +static void get_maxfd_process_info(void) +{ + DIR *dp = NULL; + struct dirent *dirp = NULL; + unsigned long num; + size_t i, len; + char fd_dir[FD_BUF_LEN] = {0}; + ps_fd_info top_fd[TOPFD_PROCESS_NUM] = {0}; + int ret; + + dp = opendir("/proc"); + if (dp == NULL) { + log_printf(LOG_ERR, "dir [/proc] not exist,failed to get open fd most three processes"); + return; + } + for (;;) { + dirp = readdir(dp); + if (dirp == NULL) { + break; + } + + if (dirp->d_type != DT_DIR) { + continue; + } + + len = strlen(dirp->d_name); + for (i = 0; dirp->d_name[i] != 0; ++i) { + if (!isdigit(dirp->d_name[i])) { + break; + } + } + + if (len == i) { + ret = memset_s(fd_dir, sizeof(fd_dir), 0, sizeof(fd_dir)); + if (ret != 0) { + log_printf(LOG_ERR, "get_maxfd_process_info memset_s error [%d]", ret); + continue; + } + ret = snprintf_s(fd_dir, sizeof(fd_dir), sizeof(fd_dir) - 1, "/proc/%s/fd", dirp->d_name); + if (ret < 0) { + log_printf(LOG_ERR, "get_maxfd_process_info snprintf_s error [%d]", ret); + continue; + } + num = get_dirfilenum(fd_dir); + if (num == 0) { + continue; + } + get_top_fd_info(top_fd, dirp->d_name, num); + } + } + + print_processes(top_fd, TOPFD_PROCESS_NUM); + (void)closedir(dp); +} + +static int get_file_nr(unsigned long *file_nr, unsigned long *file_max) +{ + char file[MAX_TEMPSTR] = "/proc/sys/fs/file-nr"; + char cnt_buf[MAX_TEMPSTR] = {0}; + FILE *fp = NULL; + unsigned long nr_files = 0; + unsigned long nr_free_files; + unsigned long max_files = 0; + int ret; + + fp = fopen(file, "r"); + if (fp == NULL) { + log_printf(LOG_ERR, "open %s failed, errno: %d", file, errno); + return -1; + } + + rewind(fp); + (void)fflush(fp); + + if (fgets(cnt_buf, sizeof(cnt_buf), fp) == NULL) { + log_printf(LOG_ERR, "read %s failed", file); + (void)fclose(fp); + return -1; + } + + ret = sscanf_s(cnt_buf, "%lu %lu %lu", &nr_files, &nr_free_files, &max_files); + if (ret <= 0) { + log_printf(LOG_INFO, "parse %s failed", file); + (void)fclose(fp); + return -1; + } + + *file_nr = nr_files; + *file_max = max_files; + (void)fclose(fp); + return 0; +} + +static void handle_sysfd_alarm(unsigned long cnt, bool alarm, unsigned long max_fd) +{ + unsigned long alarm_value; + unsigned long resume_value; + + alarm_value = (unsigned long)(g_system_monitor_info[SYSTEM_FDCNT].alarm_value / 100 * max_fd); + resume_value = (unsigned long)(g_system_monitor_info[SYSTEM_FDCNT].resume_value / 100 * max_fd); + + if (alarm) { + log_printf(LOG_INFO, "sys fd count alarm: %lu (alarm: %lu, resume: %lu)", + cnt, alarm_value, resume_value); + } else { + log_printf(LOG_INFO, "sys fd count resume: %lu (alarm: %lu, resume: %lu)", + cnt, alarm_value, resume_value); + } +} + +static void monitor_sysfd(void) +{ + unsigned long cnt = 0; + unsigned long max_fd_num = 0; + float usage; + int ret; + + ret = get_file_nr(&cnt, &max_fd_num); + if (ret != 0 || cnt == 0 || max_fd_num == 0) { + return; + } + + usage = (float)((float)cnt * 100 / (float)max_fd_num); + + if (usage >= g_system_monitor_info[SYSTEM_FDCNT].alarm_value && + (g_system_monitor_info[SYSTEM_FDCNT].status == false)) { + handle_sysfd_alarm(cnt, true, max_fd_num); + g_system_monitor_info[SYSTEM_FDCNT].status = true; + get_maxfd_process_info(); + } + if ((usage < g_system_monitor_info[SYSTEM_FDCNT].resume_value) && + (g_system_monitor_info[SYSTEM_FDCNT].status == true)) { + handle_sysfd_alarm(cnt, false, max_fd_num); + g_system_monitor_info[SYSTEM_FDCNT].status = false; + } +} + +/* + * monitor item, if config_ok, exec monitor_func + * for system_fdnum, if config_ok = false, use default config + * we calculate monitor sleep period according to item monitor period and cpu stat period + * monitor_count is default to -1, this will call monitor_func at the thread startup + */ +static void monitor_item(void) +{ + int i; + + for (i = 0; i < SYSTEM_MONITOR_ITEM_CNT; i++) { + if (g_system_monitor_info[i].monitor == false || g_system_monitor_info[i].config_ok == false) { + continue; + } + + g_system_monitor_info[i].monitor_count++; + + if (g_system_monitor_info[i].monitor_count == 0) { + goto exec_monitor; + } + + if (((unsigned int)g_system_monitor_info[i].monitor_count * g_sys_res_period < + g_system_monitor_info[i].monitor_period)) { + if (i == CPU && + (unsigned int)g_system_monitor_info[i].monitor_count * g_sys_res_period >= + g_system_monitor_info[i].stat_period) { + goto exec_monitor; + } + continue; + } +exec_monitor: + g_system_monitor_info[i].monitor_func(); + g_system_monitor_info[i].monitor_count = 0; + } +} + +/* + * Maximum common divisor + */ +static unsigned int get_common_divisor(unsigned int a, unsigned int b) +{ + while (a != b) { + if (a > b) { + a = a - b; + } else { + b = b - a; + } + } + return a; +} + +/* + * get system resource monitor period + * the period is refer to cpu, memory, system-fd and pscnt monitor period config + * also this period is refer to cpu stat period + */ +static void get_sys_res_period(void) +{ + int i; + int j = 0; + unsigned int array_period[SYSTEM_MONITOR_ITEM_CNT + 1] = {0}; + unsigned int temp; + int ret; + + g_sys_res_period = SYS_RES_MONITOR_PERIOD_MIN; + + for (i = 0; i < SYSTEM_MONITOR_ITEM_CNT; i++) { + if (g_system_monitor_info[i].monitor && g_system_monitor_info[i].config_ok) { + array_period[j++] = g_system_monitor_info[i].monitor_period; + } + } + + if (g_system_monitor_info[CPU].monitor && g_system_monitor_info[CPU].config_ok) { + array_period[j++] = g_system_monitor_info[CPU].stat_period; + } + + if (j == 0) { + log_printf(LOG_INFO, "calculate for g_sys_res_period failed, use default %d", SYS_RES_MONITOR_PERIOD_MIN); + return; + } + + temp = array_period[0]; + for (i = 1; i < j; i++) { + temp = get_common_divisor(temp, array_period[i]); + } + + g_sys_res_period = temp; + log_printf(LOG_INFO, "system resource monitor period: %u", g_sys_res_period); + + /* increase monitor thread period or sysmonitor will be restarted during report_cmd execution */ + if (strlen(g_cpu_report_cmd)) { + temp += REPORT_CMD_TIMEOUT; + } + ret = set_thread_status_period(THREAD_SYSTEM_ITEM, temp); + if (ret == -1) { + log_printf(LOG_ERR, "system resource monitor set period error"); + return; + } +} + +static void *sys_resources_monitor_start(void *arg) +{ + bool reload_flag = false; + int ret; + + (void)prctl(PR_SET_NAME, "monitor-sysres"); + log_printf(LOG_INFO, "system resources monitor starting up"); + + for (;;) { + reload_flag = get_thread_item_reload_flag(SYSTEM_ITEM); + if (reload_flag) { + log_printf(LOG_INFO, "system resource monitor, start reload"); + (void)clear_report_cmd(); + parse_sy_resources_config(); + + /* refresh monitor sleep period */ + get_sys_res_period(); + /* free g_top_process_threads when reload config for new malloc size */ + free_top_process_threads(); + /* free g_domain_head and set new domain list */ + free_set_domain_head(); + set_thread_item_reload_flag(SYSTEM_ITEM, false); + clear_thread_status(THREAD_SYSTEM_ITEM); + ret = set_thread_status_check_flag(THREAD_SYSTEM_ITEM, true); + if (ret == -1) { + log_printf(LOG_ERR, "system resource monitor set check flag error"); + break; + } + } + + monitor_item(); + ret = feed_thread_status_count(THREAD_SYSTEM_ITEM); + if (ret == -1) { + log_printf(LOG_ERR, "system resource monitor feed error"); + break; + } + (void)sleep(g_sys_res_period); + } + + return NULL; +} + +bool sys_resources_monitor_parse(const char *item, const char *value, int type, bool monitor) +{ + return parse_value_bool(item, value, + monitor ? &g_system_monitor_info[type].monitor : &g_system_monitor_info[type].alarm); +} + +void sys_resources_item_init_early(void) +{ + int ret; + int i; + + ret = memset_s(g_system_monitor_info, sizeof(system_monitor_info) * SYSTEM_MONITOR_ITEM_CNT, + 0, sizeof(system_monitor_info) * SYSTEM_MONITOR_ITEM_CNT); + if (ret != 0) { + log_printf(LOG_ERR, "sys_resources_item_init_early, memset_s system_item_info failed, ret: %d.", ret); + return; + } + + for (i = 0; i < SYSTEM_MONITOR_ITEM_CNT; i++) { + g_system_monitor_info[i].monitor = true; + g_system_monitor_info[i].alarm = false; + } +} + +void sys_resources_item_init(void) +{ + int i; + + set_thread_item_monitor_flag(SYSTEM_ITEM, false); + for (i = 0; i < SYSTEM_MONITOR_ITEM_CNT; i++) { + if (g_system_monitor_info[i].monitor == true) { + set_thread_item_monitor_flag(SYSTEM_ITEM, true); + break; + } + } + + if (!get_thread_item_monitor_flag(SYSTEM_ITEM)) { + return; + } + + /* set default value for monitor item info */ + if (g_system_monitor_info[CPU].monitor) { + g_system_monitor_info[CPU].alarm_value = ALARM_RATIO_DEFAULT; + g_system_monitor_info[CPU].resume_value = RESUME_RATIO_DEFAULT; + g_system_monitor_info[CPU].monitor_period = MONITOR_PERIOD_DEFAULT; + g_system_monitor_info[CPU].stat_period = CPU_STAT_PERIOD; + g_system_monitor_info[CPU].monitor_count = MONITOR_COUNT_INIT; + g_system_monitor_info[CPU].monitor_func = monitor_cpu; + } + + if (g_system_monitor_info[MEM].monitor) { + g_system_monitor_info[MEM].alarm_value = ALARM_RATIO_DEFAULT; + g_system_monitor_info[MEM].resume_value = RESUME_RATIO_DEFAULT; + g_system_monitor_info[MEM].monitor_period = MONITOR_PERIOD_DEFAULT; + g_system_monitor_info[MEM].monitor_count = MONITOR_COUNT_INIT; + g_system_monitor_info[MEM].monitor_func = monitor_memory; + } + + if (g_system_monitor_info[PSCNT].monitor) { + g_system_monitor_info[PSCNT].alarm_value = PSCNT_ALARM_VALUE; + g_system_monitor_info[PSCNT].resume_value = PSCNT_RESUME_VALUE; + g_system_monitor_info[PSCNT].monitor_period = MONITOR_PERIOD_DEFAULT; + g_system_monitor_info[PSCNT].monitor_count = MONITOR_COUNT_INIT; + g_system_monitor_info[PSCNT].monitor_func = monitor_pscnt; + } + + if (g_system_monitor_info[SYSTEM_FDCNT].monitor) { + set_default_sysfd_config(); + g_system_monitor_info[SYSTEM_FDCNT].monitor_count = MONITOR_COUNT_INIT; + g_system_monitor_info[SYSTEM_FDCNT].monitor_func = monitor_sysfd; + } +} + +void sys_resources_monitor_init(void) +{ + pthread_t tid; + + if (pthread_create(&tid, NULL, sys_resources_monitor_start, NULL)) { + log_printf(LOG_ERR, "create system resources monitor thread error [%d]", errno); + return; + } + set_thread_item_tid(SYSTEM_ITEM, tid); +} diff --git a/sysmonitor-1.3.2/src/sys_resources.h b/sysmonitor-1.3.2/src/sys_resources.h new file mode 100644 index 0000000000000000000000000000000000000000..b8e7f4e78cfee343f47eaa4f32c3e58546c2073d --- /dev/null +++ b/sysmonitor-1.3.2/src/sys_resources.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: define structure and function for system monitor + * Author: xuchunmei + * Create: 2019-2-14 + */ +#ifndef SYS_RESOURCES_H +#define SYS_RESOURCES_H + +#include "common.h" + +enum system_monitor_item { + CPU, + MEM, + PSCNT, + SYSTEM_FDCNT, + SYSTEM_MONITOR_ITEM_CNT +}; + +/* + * interface for parse item monitor or alarm in system resources + */ +bool sys_resources_monitor_parse(const char *item, const char *value, int type, bool monitor); + +/* + * create system resources monitor thread + */ +void sys_resources_monitor_init(void); + +/* + * init g_system_item_info before parse sysmonitor.conf + */ +void sys_resources_item_init_early(void); + +/* + * call after parse /etc/sysconfig/sysmonitor + * init system resources monitor item default value + */ +void sys_resources_item_init(void); + +#endif diff --git a/sysmonitor-1.3.2/src/sysmonitor.c b/sysmonitor-1.3.2/src/sysmonitor.c new file mode 100644 index 0000000000000000000000000000000000000000..6aa00fee844f4c0b6c0d62f20a25df88be076e46 --- /dev/null +++ b/sysmonitor-1.3.2/src/sysmonitor.c @@ -0,0 +1,1010 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: main for sysmonitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#define _GNU_SOURCE +#include "sysmonitor.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "common.h" +#include "custom.h" +#include "disk.h" +#include "fsmonitor.h" +#include "filemonitor.h" +#include "process.h" +#include "sys_resources.h" +#include "sys_event.h" +#include "zombie.h" +#include "monitor_thread.h" + +/* monitor period for each item */ +#define PS_PERIOD 3 +#define DISK_PERIOD 60 +#define INODE_PERIOD 60 +#define IODELAY_PERIOD 5 +#define CUSTOM_DAEMON_PERIOD 10 +#define ZOMBIE_PERIOD 60 +#define MONITOR_SUM 16 +#define HEARTBEAT_TIMEOUT 15 +#define RESTART_ALARM_TIMES_MAX 1000 +#define RESTART_ALARM_PERIOD_MAX 60 +#define SYSMONITOR_PIDFILE_MODE 0640 +#define PIDFILE "/var/run/sysmonitor.pid" +#define USER_ARGS 2 +#define SKIP_TWO_CHARS_LEN 2 + +static monitor_thread g_thread_item[MONITOR_ITEMS_CNT]; + +#define W_LOG_DEFAULT_PATH "/var/log/sysmonitor.log" /* normal mode, write path */ +#define W_LOG_CONF_FILE "/etc/sysmonitor/w_log_conf" /* normal mode, write config */ + +static int g_log_interface_flag = DAEMON_SYSLOG; +static int g_monitor_log_fd = -1; +static pid_t g_monitor_main_pid; +static char g_log_path[LOG_FILE_LEN] = {0}; +static bool g_flag_log_ok = false; +static bool g_flag_utc = false; +static pthread_mutex_t g_log_fd_mutex = PTHREAD_MUTEX_INITIALIZER; + +struct item_value_func { + char item[ITEM_LEN]; + bool (*func)(const char *item, const char *value); +}; + +bool get_thread_item_reload_flag(monitor_item_type type) +{ + return g_thread_item[type].reload; +} + +void set_thread_item_reload_flag(monitor_item_type type, bool flag) +{ + g_thread_item[type].reload = flag; +} + +int get_log_interface_flag(void) +{ + return g_log_interface_flag; +} + +bool get_flag_log_ok(void) +{ + return g_flag_log_ok; +} + +bool get_thread_item_monitor_flag(monitor_item_type type) +{ + return g_thread_item[type].monitor; +} + +void set_thread_item_monitor_flag(monitor_item_type type, bool flag) +{ + g_thread_item[type].monitor = flag; +} + +bool get_thread_item_alarm_flag(monitor_item_type type) +{ + return g_thread_item[type].alarm; +} + +int get_thread_item_period(monitor_item_type type) +{ + return g_thread_item[type].period; +} + +void set_thread_item_period(monitor_item_type type, int period) +{ + g_thread_item[type].period = period; +} + +void set_thread_item_tid(monitor_item_type type, pthread_t tid) +{ + g_thread_item[type].tid = tid; +} + +/* + * write msg to log file + */ +static void write_log(const char *msg) +{ + int ret; + ssize_t write_ret; + + (void)pthread_mutex_lock(&g_log_fd_mutex); + ret = faccessat(0, g_log_path, F_OK, 0); + if (ret != 0) { + (void)close(g_monitor_log_fd); + g_monitor_log_fd = open(g_log_path, O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, LOG_FILE_PERMISSION); + if (g_monitor_log_fd < 0) { + (void)printf("[sysmonitor]: reopen %s failed,errno[%d].\n", g_log_path, errno); + (void)pthread_mutex_unlock(&g_log_fd_mutex); + return; + } + } + (void)lseek(g_monitor_log_fd, 0, SEEK_END); + write_ret = write(g_monitor_log_fd, msg, strlen(msg)); + if (write_ret == -1) { + (void)printf("[sysmonitor]: write to log file failed, errno[%d].\n", errno); + } + (void)pthread_mutex_unlock(&g_log_fd_mutex); +} + +static int get_log_time(struct tm *t) +{ + time_t now; + int ret; + struct tm *ret_t = NULL; + + now = time((time_t)0); + ret = memset_s(t, sizeof(struct tm), 0, sizeof(struct tm)); + if (ret != 0) { + return -1; + } + + if (g_flag_utc == true) { + ret_t = gmtime_r(&now, t); + } else { + ret_t = localtime_r(&now, t); + } + + if (ret_t == NULL) { + return -1; + } + + return 0; +} + +static void log_for_daemon(int priority, const char *detail) +{ + int ret; + char msg[MAX_LOG_LEN + MAX_TEMPSTR] = { 0 }; + + ret = snprintf_s(msg, MAX_LOG_LEN + MAX_TEMPSTR, strlen(detail) + MAX_TEMPSTR - 1, + "sysmonitor[%d]: %s", g_monitor_main_pid, detail); + if (ret == -1) { + syslog(priority, "log_it snprintf_s for msg error [%d]", ret); + } + syslog(priority, "%s", msg); +} + +static void log_for_normal(const char *detail, struct tm t) +{ + char msg[MAX_LOG_LEN + MAX_TEMPSTR] = { 0 }; + int ret; + + if (g_flag_utc == true) { + ret = snprintf_s(msg, MAX_LOG_LEN + MAX_TEMPSTR, MAX_LOG_LEN + MAX_TEMPSTR - 1, + "[UTC %04d-%02d-%02d:%02d:%02d:%02d]sysmonitor[%d]: %s\n", + t.tm_year + TM_YEAR_BEGIN, t.tm_mon + 1, t.tm_mday, + t.tm_hour, t.tm_min, t.tm_sec, g_monitor_main_pid, detail); + } else { + ret = snprintf_s(msg, MAX_LOG_LEN + MAX_TEMPSTR, MAX_LOG_LEN + MAX_TEMPSTR - 1, + "[LOC %04d-%02d-%02d:%02d:%02d:%02d]sysmonitor[%d]: %s\n", + t.tm_year + TM_YEAR_BEGIN, t.tm_mon + 1, t.tm_mday, + t.tm_hour, t.tm_min, t.tm_sec, g_monitor_main_pid, detail); + } + + if (ret == -1) { + (void)printf("log_it: snprintf_s msg failed"); + return; + } + + if (g_flag_log_ok) { + write_log(msg); + } else { + (void)printf("%s", msg); + } +} + +/* + * write info to log file, use syslog or write interface + */ +static void log_it(int priority, const char *detail) +{ + struct tm t; + + if (get_log_time(&t) != 0) { + return; + } + + if (g_log_interface_flag == DAEMON_SYSLOG) { + log_for_daemon(priority, detail); + } else { + log_for_normal(detail, t); + } +} + +void log_printf(int priority, const char *format, ...) +{ + char msg_buffer[MAX_LOG_LEN] = {0}; + int ret; + va_list arg_list; + + va_start(arg_list, format); + ret = vsnprintf_s(msg_buffer, sizeof(msg_buffer), sizeof(msg_buffer) - 1, format, arg_list); + if (ret == -1 && msg_buffer[0] == '\0') { + (void)printf("log_printf: vsnprintf_s aMsgBuffer failed"); + va_end(arg_list); + return; + } + + va_end(arg_list); + log_it(priority, msg_buffer); +} + +static void close_log(void) +{ + if (g_log_interface_flag == NORMAL_WRITE) { + if (g_monitor_log_fd >= 0) { + (void)close(g_monitor_log_fd); + g_monitor_log_fd = -1; + } + } +} + +static void handle_lock_pidfile_failed(const char *pidfile, int error_no, int fd) +{ + char buf[MAX_TEMPSTR] = {0}; + char *ep = NULL; + long other_pid; + char err_buf[MAX_STRERROR_SIZE] = {0}; + ssize_t num; + char *err_ret = NULL; + + num = read(fd, buf, sizeof(buf) - 1); + if (num > 0) { + /* use decimal conversion */ + other_pid = strtol(buf, &ep, STRTOL_NUMBER_BASE); + if (other_pid > 0 && ep != buf && *ep == '\n' && other_pid != LONG_MAX) { + err_ret = strerror_r(error_no, err_buf, sizeof(err_buf)); + log_printf(LOG_ERR, "can't lock %s, otherpid may be %ld: %s", pidfile, other_pid, err_ret); + } + } else { + err_ret = strerror_r(error_no, err_buf, sizeof(err_buf)); + log_printf(LOG_ERR, "can't lock %s, otherpid unknown: %s", pidfile, err_ret); + } +} + +static bool write_pid_to_file(pid_t pid, const char *pidfile, int fd) +{ + char buf[MAX_TEMPSTR] = {0}; + char err_buf[MAX_STRERROR_SIZE] = {0}; + int ret; + char *err_ret = NULL; + ssize_t num; + + ret = snprintf_s(buf, sizeof(buf), sizeof(buf) - 1, "%ld\n", (long)pid); + if (ret == -1) { + log_printf(LOG_ERR, "acquire_daemonlock: snprintf_s buf failed"); + return false; + } + (void)lseek(fd, (off_t)0, SEEK_SET); + num = write(fd, buf, strlen(buf)); + if (num < 0) { + err_ret = strerror_r(errno, err_buf, sizeof(err_buf)); + log_printf(LOG_ERR, "acquire_daemonlock: write %s error, %s", pidfile, err_ret); + return false; + } + if (ftruncate(fd, num)) { + err_ret = strerror_r(errno, err_buf, sizeof(err_buf)); + log_printf(LOG_ERR, "acquire_daemonlock: ftruncate error, %s", err_ret); + return false; + } + + return true; +} + +static bool acquire_daemonlock(const char *pidfile, bool update, pid_t pid) +{ + static int fd = -1; + char err_buf[MAX_STRERROR_SIZE] = {0}; + char *err_ret = NULL; + + if (update == false) { + /* Initial mode is 0600 to prevent flock() race/DoS. */ + fd = open(pidfile, O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (fd == -1) { + err_ret = strerror_r(errno, err_buf, sizeof(err_buf)); + log_printf(LOG_ERR, "can't open or create %s: %s", pidfile, err_ret); + return false; + } + if (flock(fd, LOCK_EX | LOCK_NB) < OK) { + handle_lock_pidfile_failed(pidfile, errno, fd); + (void)close(fd); + fd = -1; + return false; + } + (void)fchmod(fd, SYSMONITOR_PIDFILE_MODE); + (void)fcntl(fd, F_SETFD, 1); + } + + if (write_pid_to_file(pid, pidfile, fd) == false) { + (void)close(fd); + fd = -1; + (void)unlink(pidfile); + return false; + } + + return true; +} + +static bool daemonize(void) +{ + int fd = -1; + pid_t pid; + bool ret = false; + + ret = acquire_daemonlock(PIDFILE, false, getpid()); + if (ret == false) { + log_printf(LOG_ERR, "daemonize: acquire_daemonlock failed."); + return false; + } + if (g_log_interface_flag == DAEMON_SYSLOG) { + pid = fork(); + if (pid < 0) { + log_printf(LOG_ERR, "daemonize: fork error"); + return false; + } else if (pid != 0) { + ret = acquire_daemonlock(PIDFILE, true, pid); + if (ret) { + exit(0); + } + return false; + } + + (void)setsid(); + fd = open("/dev/null", O_RDWR, 0); + if (fd >= 0) { + (void)dup2(fd, STDIN_FILENO); + (void)dup2(fd, STDOUT_FILENO); + (void)dup2(fd, STDERR_FILENO); + + (void)close(fd); + } + } + + g_monitor_main_pid = getpid(); + return true; +} + +static void monitor_var_init(void) +{ + int i; + + g_thread_item[PS_ITEM].init = ps_monitor_init; + g_thread_item[FS_ITEM].init = fs_monitor_init; + g_thread_item[DISK_ITEM].init = disk_monitor_init; + g_thread_item[INODE_ITEM].init = inode_monitor_init; + g_thread_item[FILE_ITEM].init = file_monitor_init; + g_thread_item[CUSTOM_DAEMON_ITEM].init = custom_daemon_monitor_init; + g_thread_item[CUSTOM_PERIODIC_ITEM].init = custom_periodic_monitor_init; + g_thread_item[IO_DELAY_ITEM].init = io_delay_monitor_init; + g_thread_item[SYSTEM_ITEM].init = sys_resources_monitor_init; + g_thread_item[SYS_EVENT_ITEM].init = sys_event_monitor_init; + g_thread_item[ZOMBIE_ITEM].init = zombie_monitor_init; + + g_thread_item[PS_ITEM].period = PS_PERIOD; + g_thread_item[DISK_ITEM].period = DISK_PERIOD; + g_thread_item[INODE_ITEM].period = INODE_PERIOD; + g_thread_item[IO_DELAY_ITEM].period = IODELAY_PERIOD; + g_thread_item[CUSTOM_DAEMON_ITEM].period = CUSTOM_DAEMON_PERIOD; + g_thread_item[ZOMBIE_ITEM].period = ZOMBIE_PERIOD; + + for (i = 0; i < MONITOR_ITEMS_CNT; i++) { + g_thread_item[i].monitor = true; + g_thread_item[i].alarm = false; + g_thread_item[i].reload = true; + } + + /* init system resources monitor item */ + sys_resources_item_init_early(); + sys_event_item_init_early(); +} + +static bool montor_item_root_start(void) +{ + int i; + + for (i = 0; i < MONITOR_ITEMS_CNT; i++) { + if (g_thread_item[i].monitor == true && g_thread_item[i].init) { + g_thread_item[i].init(); + if (g_thread_item[i].tid == 0) { + return false; + } + } + } + + return true; +} + +static bool monitor_start(void) +{ + if (!montor_item_root_start()) { + return false; + } + return true; +} + +static void quit_handler(int signo) +{ + (void)unlink(HEARTBEAT_SOCKET); + close_sys_event_fd(); + close_log(); + (void)unlink(PIDFILE); + _exit(EXIT_SUCCESS); +} + +static void reload_handler(int signo) +{ + int i; + + for (i = 0; i < MONITOR_ITEMS_CNT; i++) { + g_thread_item[i].reload = true; + } + clear_all_thread_status(); +} + +static void sig_setup(void) +{ + struct sigaction quit_action; + struct sigaction reload_action; + int ret; + unsigned int sig_size = sizeof(struct sigaction); + + ret = memset_s(&quit_action, sig_size, 0, sig_size); + if (ret) { + log_printf(LOG_ERR, "sig_setup: memset_s quit_action failed, ret: %d.", ret); + return; + } + ret = memset_s(&reload_action, sig_size, 0, sig_size); + if (ret) { + log_printf(LOG_ERR, "sig_setup: memset_s reload_action failed, ret: %d.", ret); + return; + } + + quit_action.sa_handler = quit_handler; + reload_action.sa_handler = reload_handler; + + (void)sigaction(SIGINT, &quit_action, NULL); + (void)sigaction(SIGTERM, &quit_action, NULL); + (void)sigaction(SIGUSR2, &reload_action, NULL); + + (void)signal(SIGPIPE, SIG_IGN); +} + +static bool _parse_value_off(const char *item, const char *value, bool *v) +{ + if (strcmp(value, "off") == 0) { + *v = false; + } else if (strcmp(value, "on") != 0) { + log_printf(LOG_INFO, "%s set error", item); + return false; + } + return true; +} + +static bool _parse__process_monitor(const char *item, const char *value) +{ + return _parse_value_off(item, value, &g_thread_item[PS_ITEM].monitor); +} + +static bool _parse_process_monitor_delay(const char *item, const char *value) +{ + return parse_process_monitor_delay(item, value); +} + +static bool _parse_process_alarm_supress(const char *item, const char *value) +{ + return parse_process_alarm_supress(value); +} + +static bool _parse__process_monitor_period(const char *item, const char *value) +{ + g_thread_item[PS_ITEM].period = (int)strtol(value, NULL, STRTOL_NUMBER_BASE); + if (check_int(value) == false || g_thread_item[PS_ITEM].period <= 0) { + log_printf(LOG_INFO, "PROCESS_MONITOR_PERIOD set error"); + return false; + } + return true; +} + +static bool _parse__process_recall_period(const char *item, const char *value) +{ + return parse_process_recall_period(value); +} + +static bool _parse__process_restart_timeout(const char *item, const char *value) +{ + return parse_process_restart_tiemout(value); +} + +static bool _parse__filesystem_monitor(const char *item, const char *value) +{ + return _parse_value_off(item, value, &g_thread_item[FS_ITEM].monitor); +} + +static bool _parse__signal_monitor(const char *item, const char *value) +{ + return sys_event_monitor_parse(item, value, SIGNAL, true); +} + +static bool _parse__disk_monitor(const char *item, const char *value) +{ + return _parse_value_off(item, value, &g_thread_item[DISK_ITEM].monitor); +} + +static bool _parse__disk_monitor_period(const char *item, const char *value) +{ + g_thread_item[DISK_ITEM].period = (int)strtol(value, NULL, STRTOL_NUMBER_BASE); + if (check_int(value) == false || g_thread_item[DISK_ITEM].period <= 0) { + log_printf(LOG_INFO, "DISK_MONITOR_PERIOD set error"); + return false; + } + return true; +} + +static bool _parse__inode_monitor(const char *item, const char *value) +{ + return _parse_value_off(item, value, &g_thread_item[INODE_ITEM].monitor); +} + +static bool _parse__inode_monitor_period(const char *item, const char *value) +{ + g_thread_item[INODE_ITEM].period = (int)strtol(value, NULL, STRTOL_NUMBER_BASE); + if (check_int(value) == false || g_thread_item[INODE_ITEM].period <= 0) { + log_printf(LOG_INFO, "INODE_MONITOR_PERIOD set error"); + return false; + } + return true; +} + +static bool _parse__netcard_monitor(const char *item, const char *value) +{ + return sys_event_monitor_parse(item, value, NETWORK, true); +} + +static bool _parse__file_monitor(const char *item, const char *value) +{ + return _parse_value_off(item, value, &g_thread_item[FILE_ITEM].monitor); +} + + +static bool _parse__cpu_monitor(const char *item, const char *value) +{ + return sys_resources_monitor_parse(item, value, CPU, true); +} + +static bool _parse__mem_monitor(const char *item, const char *value) +{ + return sys_resources_monitor_parse(item, value, MEM, true); +} + +static bool _parse__pscnt_monitor(const char *item, const char *value) +{ + return sys_resources_monitor_parse(item, value, PSCNT, true); +} + +static bool _parse__fdcnt_monitor(const char *item, const char *value) +{ + return sys_resources_monitor_parse(item, value, SYSTEM_FDCNT, true); +} + +static bool _parse__custom_daemon_monitor(const char *item, const char *value) +{ + return _parse_value_off(item, value, &g_thread_item[CUSTOM_DAEMON_ITEM].monitor); +} + +static bool _parse__custom_periodic_monitor(const char *item, const char *value) +{ + return _parse_value_off(item, value, &g_thread_item[CUSTOM_PERIODIC_ITEM].monitor); +} + +static bool _parse__io_delay_monitor(const char *item, const char *value) +{ + return _parse_value_off(item, value, &g_thread_item[IO_DELAY_ITEM].monitor); +} + + +static bool _parse__zombie_monitor(const char *item, const char *value) +{ + return _parse_value_off(item, value, &g_thread_item[ZOMBIE_ITEM].monitor); +} + +static bool _parse__process_fd_num_monitor(const char *item, const char *value) +{ + return sys_event_monitor_parse(item, value, FDSTAT, true); +} + +static bool _parse_net_rate_limit_burst(const char *item, const char *value) +{ + return parse_net_ratelimit_burst(value); +} + +static bool _parse_fd_monitor_log_path(const char *item, const char *value) +{ + return parse_fd_monitor_log_path(value); +} + +static bool _parse_check_thread_monitor(const char *item, const char *value) +{ + return check_thread_monitor(item, value); +} + +static bool _parse_check_thread_failure_num(const char *item, const char *value) +{ + return check_thread_failure_num(item, value); +} + +static const struct item_value_func g_opt_array[] = { + { "PROCESS_MONITOR", _parse__process_monitor }, + { "PROCESS_MONITOR_PERIOD", _parse__process_monitor_period }, + { "PROCESS_ALARM_SUPRESS_NUM", _parse_process_alarm_supress }, + { "PROCESS_MONITOR_DELAY", _parse_process_monitor_delay }, + { "PROCESS_RESTART_TIMEOUT", _parse__process_restart_timeout }, + { "PROCESS_RECALL_PERIOD", _parse__process_recall_period }, + { "FILESYSTEM_MONITOR", _parse__filesystem_monitor }, + { "SIGNAL_MONITOR", _parse__signal_monitor }, + { "DISK_MONITOR", _parse__disk_monitor }, + { "DISK_MONITOR_PERIOD", _parse__disk_monitor_period }, + { "INODE_MONITOR", _parse__inode_monitor }, + { "INODE_MONITOR_PERIOD", _parse__inode_monitor_period }, + { "NETCARD_MONITOR", _parse__netcard_monitor }, + { "FILE_MONITOR", _parse__file_monitor }, + { "CPU_MONITOR", _parse__cpu_monitor }, + { "MEM_MONITOR", _parse__mem_monitor }, + { "PSCNT_MONITOR", _parse__pscnt_monitor }, + { "FDCNT_MONITOR", _parse__fdcnt_monitor }, + { "CUSTOM_DAEMON_MONITOR", _parse__custom_daemon_monitor }, + { "CUSTOM_PERIODIC_MONITOR", _parse__custom_periodic_monitor }, + { "IO_DELAY_MONITOR", _parse__io_delay_monitor }, + { "PROCESS_FD_NUM_MONITOR", _parse__process_fd_num_monitor }, + { "NET_RATE_LIMIT_BURST", _parse_net_rate_limit_burst }, + { "FD_MONITOR_LOG_PATH", _parse_fd_monitor_log_path }, + { "ZOMBIE_MONITOR", _parse__zombie_monitor }, + { "CHECK_THREAD_MONITOR", _parse_check_thread_monitor }, + { "CHECK_THREAD_FAILURE_NUM", _parse_check_thread_failure_num } +}; + +static bool parse_line(const char *config) +{ + char item[ITEM_LEN]; + char value[MAX_TEMPSTR]; + char *ptr = NULL; + unsigned int size; + unsigned int i; + int ret; + + while (*config == ' ' || *config == '\t') { + config++; + } + + if (*config == '#') { + return true; + } + + ptr = strstr(config, "=\""); + if (ptr == NULL) { + return true; + } + + ret = memset_s(item, sizeof(item), 0, sizeof(item)); + if (ret) { + log_printf(LOG_ERR, "parse_line: memset_s item failed, ret: %d.", ret); + return false; + } + ret = memset_s(value, sizeof(value), 0, sizeof(value)); + if (ret) { + log_printf(LOG_ERR, "parse_line: memset_s value failed, ret: %d.", ret); + return false; + } + + size = (unsigned int)(ptr - config); + if (size >= sizeof(item)) { + log_printf(LOG_ERR, "sysmonitor parse_line: item length(%u) too long(>%u).", size, sizeof(item)); + return false; + } + ret = strncpy_s(item, sizeof(item), config, size); + if (ret) { + log_printf(LOG_ERR, "parse_line: strncpy_s item failed, ret: %d.", ret); + return false; + } + get_value(config, size, value, sizeof(value)); + for (i = 0; i < array_size(g_opt_array); i++) { + if (strcmp(item, g_opt_array[i].item) != 0) { + continue; + } + if (g_opt_array[i].func != NULL) { + return g_opt_array[i].func(item, value); + } else { + return false; + } + } + + return true; +} + +static bool check_config_path_valid(const char *config) +{ + if (strlen(config) > LOG_FILE_LEN - 1) { + (void)printf("[sysmonitor] log path length is more than %d bytes\n", LOG_FILE_LEN - 1); + return false; + } + + if (*config != '/') { + (void)printf("[sysmonitor] log path must begin with /\n"); + return false; + } + + if (strchr(config, '\"') == NULL) { + (void)printf("[sysmonitor] log path must end with \"\n"); + return false; + } + + return true; +} + +static bool parse_log_path(const char *config, const char *value) +{ + int ret; + char tmp_path[LOG_FILE_LEN] = {0}; + + if (check_config_path_valid(config) == false) { + return false; + } + + if (strlen(value) == 0) { + (void)printf("[sysmonitor] log path len can`t be empty\n"); + return false; + } + + ret = strncpy_s(tmp_path, LOG_FILE_LEN, value, LOG_FILE_LEN - 1); + if (ret) { + (void)printf("parse_line_log_path: strncpy_s log_path failed, ret: %d.", ret); + return false; + } + + if (!check_log_path(tmp_path)) { + return false; + } + + ret = strncpy_s(g_log_path, LOG_FILE_LEN, tmp_path, LOG_FILE_LEN - 1); + if (ret) { + (void)printf("parse_line_log_path: strncpy_s log_path failed, ret: %d", ret); + return false; + } + + return true; +} + +static bool parse_line_log_path(const char *config) +{ + char words[ITEM_LEN] = {0}; + char value[LOG_FILE_LEN] = {0}; + char *ptr = NULL; + unsigned int size; + int ret; + + while (*config == ' ' || *config == '\t') { + config++; + } + + if (*config == '#' || *config == '\n') { + return true; + } + + if (check_conf_file_valid(config) == -1) { + return false; + } + + ptr = strstr(config, "=\""); + if (ptr != NULL) { + size = (unsigned int)(ptr - config); + if (size >= sizeof(words)) { + (void)printf("parse_line_log_path: key length(%u) too long(>%lu).", size, sizeof(words)); + return false; + } + ret = strncpy_s(words, sizeof(words), config, size); + if (ret) { + (void)printf("parse_line_log_path: strncpy_s words failed, ret: %d.", ret); + return false; + } + + get_value(config, size, value, sizeof(value)); + if (!strcmp(words, "WRITE_LOG_PATH")) { + return parse_log_path(ptr + SKIP_TWO_CHARS_LEN, value); + } else if (!strcmp(words, "UTC_TIME")) { + if (!strcmp(value, "on")) { + g_flag_utc = true; + } + return true; + } + } + + (void)printf("[sysmonitor] keyword '%s' not found\n", words); + return false; +} + +static int init_log(void) +{ + int ret; + + if (g_log_interface_flag == NORMAL_WRITE) { + ret = parse_config(W_LOG_CONF_FILE, parse_line_log_path); + if (ret == false || strlen(g_log_path) == 0) { + ret = strncpy_s(g_log_path, sizeof(g_log_path), W_LOG_DEFAULT_PATH, sizeof(g_log_path) - 1); + if (ret) { + (void)printf("init_log: strncpy_s log_path failed, ret: %d.", ret); + return ERROR_OPEN; + } + (void)printf("[sysmonitor] parse '%s' failed, default log path '%s' will be used\n", + W_LOG_CONF_FILE, g_log_path); + } + + g_monitor_log_fd = open(g_log_path, O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, LOG_FILE_PERMISSION); + if (g_monitor_log_fd < OK) { + (void)printf("[sysmonitor] open '%s' failed, process will exit.errno[%d]\n", g_log_path, errno); + return ERROR_OPEN; + } + } + + g_flag_log_ok = true; + return 0; +} + +static int parse_args(int argc, const char **argv) +{ + if (argc != USER_ARGS) { + (void)printf("Usage: 'sysmonitor --daemon' or 'sysmonitor --normal'\n"); + return ERROR_ARGS_WRONG; + } + + if (strcmp(argv[1], "--daemon") == 0) { + g_log_interface_flag = DAEMON_SYSLOG; + } else if (strcmp(argv[1], "--normal") == 0) { + g_log_interface_flag = NORMAL_WRITE; + } else { + (void)printf("Usage: 'sysmonitor --daemon' or 'sysmonitor --normal'\n"); + return ERROR_ARGS_WRONG; + } + + return 0; +} + +static int check_monitor_thread(pthread_t *worker_tid) +{ + int i; + + for (i = 0; i < MONITOR_ITEMS_CNT; i++) { + if (g_thread_item[i].tid && !pthread_tryjoin_np(g_thread_item[i].tid, NULL)) { + g_thread_item[i].tid = 0; + if (g_thread_item[i].init != NULL) { + g_thread_item[i].init(); + } + if (g_thread_item[i].tid == 0) { + return -1; + } + } + } + if (*worker_tid != 0 && !pthread_tryjoin_np(*worker_tid, NULL) && + worker_thread_init(worker_tid) == false) { + return -1; + } + + return 0; +} + +static int monitor_struct_init(void) +{ + int ret; + + ret = memset_s(g_thread_item, sizeof(monitor_thread) * MONITOR_ITEMS_CNT, 0, + sizeof(monitor_thread) * MONITOR_ITEMS_CNT); + if (ret) { + (void)printf("main, memset_s thread_item failed, ret: %d.", ret); + return -1; + } + + if (worker_task_struct_init() == false) { + return -1; + } + + return 0; +} + +static bool monitor_thread_start(pthread_t *worker_tid) +{ + if (worker_thread_init(worker_tid) == false || monitor_start() == false) { + return false; + } + + return true; +} + +int main(int argc, char **argv) +{ + pthread_t worker_tid = 0; + int ret; + bool delete_pid_file = false; + + if (monitor_struct_init() == -1) { + goto err; + } + + ret = parse_args(argc, (const char **)argv); + if (ret != 0) { + goto err; + } + + ret = init_log(); + if (ret != 0) { + goto err; + } + + monitor_var_init(); + if (parse_config(CONF, parse_line) == false) { + goto err; + } + + /* after parse /etc/sysconfig/sysmonitor, init system resources monitor item */ + sys_resources_item_init(); + sys_event_item_init(); + + /* + * Creat daemon after log-init,So we can record error and info. + * Creat daemon after monitor_var_init,beacase 'systemctl status' return parent process status. + */ + if (daemonize() == false) { + goto err; + } + + delete_pid_file = true; + + sig_setup(); + log_printf(LOG_INFO, "[--------------------------sysmonitor starting up----------------------------]"); + + if (thread_status_struct_init() == -1) { + goto err; + } + + if (monitor_thread_start(&worker_tid) == false) { + goto err; + } + + for (;;) { + if (check_monitor_thread(&worker_tid) == -1) { + goto err; + } + if (check_thread_status() == -1) { + goto err; + } + (void)sleep(SYSMONITOR_PERIOD); + } +err: + close_log(); + if (delete_pid_file) { + (void)unlink(PIDFILE); + } + exit(EXIT_FAILURE); +} diff --git a/sysmonitor-1.3.2/src/sysmonitor.h b/sysmonitor-1.3.2/src/sysmonitor.h new file mode 100644 index 0000000000000000000000000000000000000000..6cd838588f0775b23e125cfad177f79ad57f43cf --- /dev/null +++ b/sysmonitor-1.3.2/src/sysmonitor.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: define variable and function for sysmonitor + * Author: xuchunmei + * Create: 2016-1-1 + */ +#ifndef SYSMONITOR_H +#define SYSMONITOR_H + +#define HEARTBEAT_SOCKET "/var/run/heartbeat.socket" +#define CONF "/etc/sysconfig/sysmonitor" + +#define RESTART_ALARM "systemctl restart sysalarm &> /dev/null" + +extern void close_alarm(void); + +#endif diff --git a/sysmonitor-1.3.2/src/zombie.c b/sysmonitor-1.3.2/src/zombie.c new file mode 100644 index 0000000000000000000000000000000000000000..a678b6417d935845d9bf3e9d32afd1cb12418432 --- /dev/null +++ b/sysmonitor-1.3.2/src/zombie.c @@ -0,0 +1,251 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: Monitor the zombie process number + * Author: Suo Ben + * Create: 2016-5-27 + */ + +#include "zombie.h" + +#include +#include + +#include "common.h" +#include "monitor_thread.h" + +#define ZOMBIE_EXTDES "zombie process count" +#define MAX_MONITORCOMMAND 4096 +#define BUF_LEN 100 + +static unsigned long g_alarm_cnt = 500; +static unsigned long g_resume_cnt = 400; +static int g_thread_start = 1; + +struct item_value_func { + char item[ITEM_LEN]; + bool (*func)(const char *item, const char *value); +}; + +static bool parse_zombie_alarm(const char *item, const char *value) +{ + return parse_value_ulong(item, value, &g_alarm_cnt); +} + +static bool parse_zombie_resume(const char *item, const char *value) +{ + return parse_value_ulong(item, value, &g_resume_cnt); +} + +static bool parse_zombie_period(const char *item, const char *value) +{ + unsigned int period; + bool ret = false; + + ret = parse_value_int(item, value, &period); + if (ret) { + set_thread_item_period(ZOMBIE_ITEM, (int)period); + } + return ret; +} + +static const struct item_value_func g_item_array[] = { + { "ALARM", parse_zombie_alarm }, + { "RESUME", parse_zombie_resume }, + { "PERIOD", parse_zombie_period } +}; + +static bool parse_line(const char *config) +{ + char item[ITEM_LEN] = {0}; + char value[VALUE_LEN] = {0}; + char *ptr = NULL; + unsigned int size; + unsigned int i; + errno_t rc; + + while (*config == ' ' || *config == '\t') { + config++; + } + + if (*config == '#') { + return true; + } + + ptr = strstr(config, "=\""); + if (ptr == NULL) { + return true; + } + + size = (unsigned int)(ptr - config); + size = size < (unsigned int)sizeof(item) ? size : (unsigned int)sizeof(item) - 1; + rc = strncpy_s(item, sizeof(item), config, size); + if (rc != EOK) { + log_printf(LOG_ERR, "parse_line strncpy_s error [%d]", rc); + return false; + } + + get_value(config, size, value, sizeof(value)); + if (!strlen(value)) { + return true; + } + + for (i = 0; i < array_size(g_item_array); i++) { + if (strcmp(item, g_item_array[i].item) == 0 && g_item_array[i].func != NULL) { + return g_item_array[i].func(item, value); + } + } + + return true; +} + +static void zombie_get_parent_process(void) +{ + char cmd[MAX_MONITORCOMMAND] = {0}; + char ppid_buf[MAX_TEMPSTR] = {0}; + errno_t rc; + + rc = snprintf_s(cmd, sizeof(cmd), sizeof(cmd) - 1, "/usr/libexec/sysmonitor/getzombieparent.py"); + if (rc < 0) { + log_printf(LOG_ERR, "zombie_get_parent_process snprintf_s error [%d]", rc); + return; + } + + if (monitor_popen(cmd, ppid_buf, sizeof(ppid_buf), POPEN_TIMEOUT, NULL)) { + log_printf(LOG_INFO, "failed to get zombie process info"); + return; + } + + ppid_buf[MAX_TEMPSTR - 1] = '\0'; +} + +static bool get_zombie_process(unsigned long *cnt) +{ + char cmd[MAX_MONITORCOMMAND] = {0}; + char cnt_buf[BUF_LEN] = {0}; + errno_t rc; + + rc = snprintf_s(cmd, sizeof(cmd), sizeof(cmd) - 1, + "ps -A -o stat,ppid,pid,cmd | grep -e '^[Zz]' | awk '{print $0}' | wc -l"); + if (rc < 0) { + log_printf(LOG_ERR, "monitor_zombie snprintf_s error [%d]", rc); + return false; + } + if (monitor_popen(cmd, cnt_buf, sizeof(cnt_buf), POPEN_TIMEOUT, NULL)) { + log_printf(LOG_ERR, "failed to get zombie process count"); + return false; + } + + cnt_buf[BUF_LEN - 1] = '\0'; + *cnt = strtoul(cnt_buf, NULL, 0); + if (errno == EINVAL || errno == ERANGE) { + log_printf(LOG_ERR, "process count is wrong"); + return false; + } + + return true; +} + +static void monitor_zombie(bool *status) +{ + unsigned long cnt; + bool execute_result = false; + + execute_result = get_zombie_process(&cnt); + if (!execute_result) { + return; + } + + if (cnt >= g_alarm_cnt && *status == false) { + log_printf(LOG_WARNING, "zombie process count alarm: %lu (alarm: %lu, resume: %lu)", + cnt, g_alarm_cnt, g_resume_cnt); + *status = true; + zombie_get_parent_process(); + } else if ((cnt <= g_resume_cnt && *status == true) || (cnt <= g_resume_cnt && g_thread_start)) { + log_printf(LOG_INFO, "zombie process count resume: %lu (alarm: %lu, resume: %lu)", + cnt, g_alarm_cnt, g_resume_cnt); + *status = false; + } + g_thread_start = 0; + + return; +} + +static int zombie_parse_config(void) +{ + bool ret = false; + int period; + int result; + + ret = parse_config(ZOMBIE_CONF, parse_line); + period = get_thread_item_period(ZOMBIE_ITEM); + set_thread_item_reload_flag(ZOMBIE_ITEM, false); + if ((ret == false) || (g_alarm_cnt <= g_resume_cnt || period <= 0)) { + log_printf(LOG_ERR, + "zombie process monitor: configuration illegal, alarm is %lu, resume is %lu, period is %d", + g_alarm_cnt, g_resume_cnt, period); + ret = false; + result = set_thread_status_check_flag(THREAD_ZOMBIE_ITEM, false); + if (result == -1) { + log_printf(LOG_ERR, "reload zombie monitor set check flag error"); + return RET_BREAK; + } + } + if (ret) { + clear_thread_status(THREAD_ZOMBIE_ITEM); + result = set_thread_check_value(THREAD_ZOMBIE_ITEM, true, (unsigned int)period); + if (result == -1) { + log_printf(LOG_ERR, "zombie monitor set check flag or period error"); + return RET_BREAK; + } + return RET_SUCCESS; + } + + return RET_CONTINUE; +} + +static void *zombie_monitor_start(void *arg) +{ + bool failed = false; + int result = -1; + log_printf(LOG_INFO, "zombie monitor starting up"); + + for (;;) { + if (get_thread_item_reload_flag(ZOMBIE_ITEM)) { + log_printf(LOG_INFO, "zombie monitor, start reload"); + result = zombie_parse_config(); + if (result == RET_BREAK) { + break; + } + } + if (result == RET_SUCCESS) { + monitor_zombie(&failed); + result = feed_thread_status_count(THREAD_ZOMBIE_ITEM); + if (result == -1) { + log_printf(LOG_ERR, "zombie monitor feed error"); + break; + } + } + (void)sleep((unsigned int)get_thread_item_period(ZOMBIE_ITEM)); + } + + return NULL; +} + +void zombie_monitor_init(void) +{ + pthread_t tid; + + if (pthread_create(&tid, NULL, zombie_monitor_start, NULL)) { + log_printf(LOG_ERR, "create zombie monitor thread error [%d]", errno); + return; + } + set_thread_item_tid(ZOMBIE_ITEM, tid); +} diff --git a/sysmonitor-1.3.2/src/zombie.h b/sysmonitor-1.3.2/src/zombie.h new file mode 100644 index 0000000000000000000000000000000000000000..c34b9fa06f46947301de1084e3a46be045168373 --- /dev/null +++ b/sysmonitor-1.3.2/src/zombie.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2016-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: Monitor the zombie process number + * Author: Suo Ben + * Create: 2016-5-27 + */ + +#ifndef ZOMBIE_H +#define ZOMBIE_H + +#define ZOMBIE_CONF "/etc/sysmonitor/zombie" + +void zombie_monitor_init(void); + +#endif + diff --git a/sysmonitor-1.3.2/test/CMakeLists.txt b/sysmonitor-1.3.2/test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2e1a96ad6525ecd0f03451fad1c9d01bee224109 --- /dev/null +++ b/sysmonitor-1.3.2/test/CMakeLists.txt @@ -0,0 +1,20 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. +# Description: cmake file of sysmonitor testcase +# Author: xuchunmei +# Create: 2019-9-9 + +project(sysmonitor) + +set(CMAKE_C_FLAGS "-g -Wall -Werror -D_FORTIFY_SOURCE=2 -O2 -fPIE -fstack-protector-strong -s") + +add_subdirectory(common) +add_subdirectory(custom) +add_subdirectory(process) +add_subdirectory(disk) +add_subdirectory(filemonitor) +add_subdirectory(sys_resources) +add_subdirectory(sys_event) +add_subdirectory(sysmonitor) +add_subdirectory(fuzz) +add_subdirectory(zombie) +add_subdirectory(fs) diff --git a/sysmonitor-1.3.2/test/common/CMakeLists.txt b/sysmonitor-1.3.2/test/common/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..a780ff8ad4e928ddc4f84a28c687a68370522d93 --- /dev/null +++ b/sysmonitor-1.3.2/test/common/CMakeLists.txt @@ -0,0 +1,16 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. +# Description: cmake file of common_llt +# Author: xuchunmei +# Create: 2019-9-9 + +project(sysmonitor) + +SET(EXE common_llt) +INCLUDE_DIRECTORIES(../../src) +add_executable(${EXE} common_llt.c ../../src/common.c ../common_interface/common_interface.c) +set_target_properties(${EXE} PROPERTIES LINK_FLAGS "-Wl,-z,relro -Wl,-z,noexecstack -Wl,-z,now -Wtrampolines -pie") +target_link_libraries(${EXE} cunit boundscheck pthread) + +add_executable(process_exit_test process_exit_test.c) +set_target_properties(process_exit_test PROPERTIES LINK_FLAGS "-Wl,-z,relro -Wl,-z,noexecstack -Wl,-z,now -Wtrampolines -pie") +target_link_libraries(process_exit_test boundscheck) diff --git a/sysmonitor-1.3.2/test/common/common_llt.c b/sysmonitor-1.3.2/test/common/common_llt.c new file mode 100644 index 0000000000000000000000000000000000000000..659fbd73a02c9453d2d99bec16ebf580cb301c64 --- /dev/null +++ b/sysmonitor-1.3.2/test/common/common_llt.c @@ -0,0 +1,504 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: testcase for common interface + * Author: xuchunmei + * Create: 2019-9-9 + */ + +#include +#include +#include +#include +#include +#include "common.h" +#include "../common_interface/common_interface.h" + +typedef enum { + CUNIT_SCREEN = 0, + CUNIT_XMLFILE, + CUNIT_CONSOLE +} cu_run_mode; + +#define MIN_TEST_TEMP 2 +#define TEST_TMP_LEN 10 +#define MAX_FILE_MODE 0777 +#define KB_SIZE 1024 + +static void test_monitor_popen_001() +{ + int ret; + char buffer[MAX_CONFIG] = {0}; + char temp[MIN_TEST_TEMP] = {0}; + + ret = monitor_popen("ls -al /home", buffer, sizeof(buffer), 0, NULL); + CU_ASSERT(ret == 0); + ret = monitor_popen("sleep 2", buffer, sizeof(buffer), 1, "ls -al /home > /dev/null"); + CU_ASSERT(ret == ERROR_TIMEOUT); + ret = monitor_popen("ls -al /home", temp, sizeof(temp), 0, NULL); + CU_ASSERT(ret == 0); + (void)monitor_cmd(DEFAULT_USER_ID, "cp ./common/process_exit_test /home", 0, NULL, true); + ret = monitor_popen("/home/process_exit_test", temp, sizeof(temp), MIN_TEST_TEMP, NULL); + CU_ASSERT(ret == ERROR_TIMEOUT); +} + +static void test_monitor_cmd_001() +{ + int ret; + + ret = monitor_cmd(DEFAULT_USER_ID, "ls -al /home", 0, NULL, false); + CU_ASSERT(ret == 0); + ret = monitor_cmd(DEFAULT_USER_ID, "ls -al /home > /dev/null", 0, NULL, false); + CU_ASSERT(ret != 0); + ret = monitor_cmd(DEFAULT_USER_ID, "ls -al /home > /dev/null", 0, NULL, true); + CU_ASSERT(ret == 0); + ret = monitor_cmd(0, "ls \"/home\"", 0, NULL, false); + CU_ASSERT(ret == 0); + ret = monitor_cmd(DEFAULT_USER_ID, "sleep 2", 1, NULL, false); + CU_ASSERT(ret != 0); + ret = monitor_cmd(DEFAULT_USER_ID, "sleep 2", 1, "ls /home", false); + CU_ASSERT(ret != 0); + ret = monitor_cmd(DEFAULT_USER_ID, "ls \"/home\" \"", 0, NULL, false); + CU_ASSERT(ret != 0); + ret = monitor_cmd(DEFAULT_USER_ID, "ls 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0\ + 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2\ + 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4\ + 5 6 7 8 9 0", 0, NULL, false); + CU_ASSERT(ret != 0); + ret = monitor_cmd(DEFAULT_USER_ID, "ls \"/home\" \"/boot\"", 0, NULL, false); + CU_ASSERT(ret == 0); + ret = monitor_cmd(DEFAULT_USER_ID, "/bin/bash -c \"ls /home\"", 0, NULL, false); + CU_ASSERT(ret == 0); + ret = monitor_cmd(DEFAULT_USER_ID, "ls \"/home\" \" /boot\"", 0, NULL, false); + CU_ASSERT(ret != 0); + ret = monitor_cmd(DEFAULT_USER_ID, "ls \"1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0" + "1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3" + "4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6" + "7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9" + "0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2" + "3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5" + "6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8" + "9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1" + "2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4" + "5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7" + "8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0" + "1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3" + "4 5 6 7 8 9 0\"", 0, NULL, false); + CU_ASSERT(ret != 0); +} + +static void test_get_value_001() +{ + char value[MAX_TEMPSTR] = {0}; + char tmp[TEST_TMP_LEN] = {0}; + + get_value("MONITOR_SWITCH=\"on\"", (unsigned int)strlen("MONITOR_SWITCH"), value, sizeof(value)); + CU_ASSERT(strcmp(value, "on") == 0); + + get_value("MONITOR_COMMAND=\"11111111111111\"", (unsigned int)strlen("MONITOR_COMMAND"), tmp, sizeof(tmp)); + CU_ASSERT(strcmp(tmp, "11111111111111") != 0); + CU_ASSERT(strcmp(tmp, "111111111") == 0); + (void)memset_s(tmp, sizeof(tmp), 0, sizeof(tmp)); + get_value("MONITOR_COMMAND=\"\"", (unsigned int)strlen("MONITOR_COMMAND"), tmp, sizeof(tmp)); + CU_ASSERT(strlen(tmp) == 0); +} + +static void create_test_file(const char *name, const char *msg) +{ + int fd; + ssize_t ret; + + fd = open(name, O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, LOG_FILE_PERMISSION); + if (fd < 0) { + return; + } + + lseek(fd, 0, SEEK_END); + ret = write(fd, msg, strlen(msg)); + if (ret == -1) { + (void)printf("write to %s failed.\n", name); + } + (void)close(fd); + fd = -1; +} + +/* + * create large file for 1G=1024*1024*1024 + */ +static void create_large_file(const char *name) +{ + int fd = -1; + char temp[KB_SIZE] = {0}; + int i; + ssize_t ret; + + fd = open(name, O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, LOG_FILE_PERMISSION); + if (fd < 0) { + (void)printf("create file %s failed.", name); + return; + } + + (void)memset_s(temp, sizeof(temp), '@', sizeof(temp)); + for (i = 0; i < KB_SIZE * KB_SIZE; i++) { + (void)lseek(fd, 0, SEEK_END); + ret = write(fd, temp, sizeof(temp)); + if (ret < 0 || ret != sizeof(temp)) + (void)printf("write to %s failed.\n", name); + } + (void)close(fd); + fd = -1; +} + +static bool parse_line_ok(const char *line) +{ + return true; +} + +static bool parse_line_fail(const char *line) +{ + return false; +} + +static bool parse_line_check_valid(const char *line) +{ + if (check_conf_file_valid(line) == -1) + return false; + + return true; +} + +static void test_parse_config_001() +{ + bool ret = false; + + ret = parse_config("test_config", parse_line_ok); + CU_ASSERT(ret == false); + + set_log_interface_flag(NORMAL_WRITE); + set_flag_log_ok(false); + ret = parse_config("test_config", parse_line_ok); + CU_ASSERT(ret == false); + + create_test_file("test_config", "test parse config"); + ret = parse_config("test_config", parse_line_ok); + CU_ASSERT(ret == true); + ret = parse_config("test_config", parse_line_fail); + CU_ASSERT(ret == false); + (void)unlink("test_config"); + create_large_file("test_large"); + ret = parse_config("test_large", parse_line_ok); + CU_ASSERT(ret == true); + (void)unlink("test_large"); + + create_test_file("test_valid", "!@$%^&*("); + ret = parse_config("test_config", parse_line_check_valid); + CU_ASSERT(ret == false); + (void)unlink("test_valid"); +} + +static void test_open_cfgfile_001() +{ + int fd = -1; + FILE *file = NULL; + + /* test file not exist */ + file = open_cfgfile("test_opencfg", &fd); + CU_ASSERT(file == NULL); + + /* create test_opencfg and chmod mode to 700 */ + create_test_file("test_opencfg", "test open cfg"); + + /* test open file */ + file = open_cfgfile("test_opencfg", &fd); + CU_ASSERT(file != NULL); + if (file != NULL) { + (void)fclose(file); + file = NULL; + } + if (fd >= 0) { + fd = -1; + } + + /* test file mode */ + (void)chmod("test_opencfg", MAX_FILE_MODE); + file = open_cfgfile("test_opencfg", &fd); + CU_ASSERT(file == NULL); + if (file != NULL) { + (void)fclose(file); + file = NULL; + } + (void)unlink("test_opencfg"); +} + +static void test_check_int_001() +{ + CU_ASSERT(check_int(NULL) == false); + CU_ASSERT(check_int("12345") == true); + CU_ASSERT(check_int("a") == false); + CU_ASSERT(check_int("-12345") == false); + CU_ASSERT(check_int("123.45") == false); +} + +static void test_check_decimal_001() +{ + CU_ASSERT(check_decimal(NULL) == false); + CU_ASSERT(check_decimal("12345") == true); + CU_ASSERT(check_decimal("123.45") == true); + CU_ASSERT(check_decimal("a") == false); + CU_ASSERT(check_decimal("-12345") == false); + CU_ASSERT(check_decimal(".123") == true || check_decimal("123.") == true); + CU_ASSERT(check_decimal("2.2.2") == true); +} + +static void test_lvos_system_001() +{ + int ret; + char out[MAX_TEMPSTR] = {0}; + + CU_ASSERT(lovs_system(NULL) == -1); + /* check if system is in running state, in obs build we cannot restart systemd service */ + ret = monitor_popen("systemctl is-system-running", out, sizeof(out), 0, NULL); + if (ret < 0) { + return; + } + if (strstr(out, "running") || strstr(out, "degraded")) { + CU_ASSERT(lovs_system("systemctl restart crond") == 0); + } else { + CU_ASSERT(lovs_system("systemctl restart crond") != 0); + } + CU_ASSERT(lovs_system("ls /home > /dev/null") == 0); +} + +static void test_check_conf_file_valid_001() +{ + CU_ASSERT(check_conf_file_valid(";") == -1); + CU_ASSERT(check_conf_file_valid("|") == -1); + CU_ASSERT(check_conf_file_valid("&") == -1); + CU_ASSERT(check_conf_file_valid("$") == -1); + CU_ASSERT(check_conf_file_valid(">") == -1); + CU_ASSERT(check_conf_file_valid("<") == -1); + CU_ASSERT(check_conf_file_valid("(") == -1); + CU_ASSERT(check_conf_file_valid(")") == -1); + CU_ASSERT(check_conf_file_valid("./") == -1); + CU_ASSERT(check_conf_file_valid("/.") == -1); + CU_ASSERT(check_conf_file_valid("?") == -1); + CU_ASSERT(check_conf_file_valid("*") == -1); + CU_ASSERT(check_conf_file_valid("`") == -1); + CU_ASSERT(check_conf_file_valid("\\") == -1); + CU_ASSERT(check_conf_file_valid("[") == -1); + CU_ASSERT(check_conf_file_valid("]") == -1); + CU_ASSERT(check_conf_file_valid("'") == -1); + CU_ASSERT(check_conf_file_valid("!") == -1); + CU_ASSERT(check_conf_file_valid("a") == 0); + CU_ASSERT(check_conf_file_valid(".") == 0); + CU_ASSERT(check_conf_file_valid("0") == 0); + CU_ASSERT(check_conf_file_valid("-") == 0); +} + +static void test_check_file_001() +{ + CU_ASSERT(check_file(NULL) == false); + CU_ASSERT(check_file("") == false); + CU_ASSERT(check_file("/var/run/test.pid") == false); + CU_ASSERT(check_file("/bin/ls") == false); + CU_ASSERT(check_file("/etc/profile") == true); +} + +static void test_parse_value_int_001() +{ + unsigned int result; + + CU_ASSERT(parse_value_int("MONITOR_PERIOD", "a", &result) == false); + CU_ASSERT(parse_value_int("MONITOR_PERIOD", "-1", &result) == false); + CU_ASSERT(parse_value_int("MONITOR_PERIOD", "10", &result) == true); +} + +static void test_parse_value_ulong_001() +{ + unsigned long result; + + CU_ASSERT(parse_value_ulong("MONITOR_PERIOD", "a", &result) == false); + CU_ASSERT(parse_value_ulong("MONITOR_PERIOD", "-1", &result) == false); + CU_ASSERT(parse_value_ulong("MONITOR_PERIOD", "600", &result) == true && result == 600); +} + +static void test_parse_value_string_001() +{ + char temp[MAX_TEMPSTR] = {0}; + + CU_ASSERT(parse_value_string("MONITOR_COMMAND", "0123456789", temp, MIN_TEST_TEMP) == false); + CU_ASSERT(parse_value_string("MONITOR_COMMAND", "0123456789", temp, sizeof(temp)) == true); +} + +static void test_parse_value_bool_001() +{ + bool result = false; + + CU_ASSERT(parse_value_bool("PROCESS_MONITOR", "ON", &result) == true); + CU_ASSERT(result == true); + CU_ASSERT(parse_value_bool("PROCESS_MONITOR", "on", &result) == true); + CU_ASSERT(result == true); + CU_ASSERT(parse_value_bool("PROCESS_MONITOR", "OFF", &result) == true); + CU_ASSERT(result == false); + CU_ASSERT(parse_value_bool("PROCESS_MONITOR", "off", &result) == true); + CU_ASSERT(result == false); + result = true; + CU_ASSERT(parse_value_bool("PROCESS_MONITOR", "Off", &result) == false); + CU_ASSERT(result == true); +} + +static void test_parse_value_float_001() +{ + float result; + + CU_ASSERT(parse_value_float("ALARM", "11.22", &result) == true); + CU_ASSERT(parse_value_float("ALARM", "a", &result) == false); + CU_ASSERT(parse_value_float("ALARM", "-123", &result) == false); +} + +static void test_check_log_path_001() +{ + CU_ASSERT(check_log_path("/var/log/test.log") == true); + CU_ASSERT(check_log_path("/home/111/222") == false); + CU_ASSERT(check_log_path("/bin/ls") == false); +} + +/*static void set_proc_fdenable(const char *msg, const char *path) +{ + int ret; + long num; + long value; + size_t len; + char buf[MAX_LEN] = {0}; + char cmd[MAX_LEN] = {0}; + + ret = snprintf_s(cmd, sizeof(cmd), sizeof(cmd) - 1, + "cat %s", path); + if (ret == -1) { + (void)printf("snprintf_s cmd failed, ret: %d", ret); + return; + } + + ret = set_value_to_file(msg, path); + CU_ASSERT(ret == 0); + + (void)monitor_popen(cmd, buf, sizeof(buf) - 1, 0, NULL); + CU_ASSERT(ret == 0); + len = strlen(buf); + if (len > 0 && buf[len - 1] == '\n') { + buf[len - 1] = '\0'; + } + num = strtol(buf, NULL, STRTOL_NUMBER_BASE); + value = strtol(msg, NULL, STRTOL_NUMBER_BASE); + CU_ASSERT(num == value); +}*/ + +static void test_set_value_to_file() +{ + /*int ret; + + set_proc_fdenable("1", RROC_FDENABLE); + + set_proc_fdenable("0", RROC_FDENABLE); + + ret = set_value_to_file("2", RROC_FDENABLE); + CU_ASSERT(ret != 0); + + ret = set_value_to_file("1", "/proc/fdenable1"); + CU_ASSERT(ret != 0);*/ +} + +static bool add_test_parse_value(CU_pSuite suite) +{ + if (CU_add_test(suite, "test_parse_value_int_001", test_parse_value_int_001) == NULL || + CU_add_test(suite, "test_parse_value_ulong_001", test_parse_value_ulong_001) == NULL || + CU_add_test(suite, "test_parse_value_string_001", test_parse_value_string_001) == NULL || + CU_add_test(suite, "test_parse_value_bool_001", test_parse_value_bool_001) == NULL || + CU_add_test(suite, "test_parse_value_float_001", test_parse_value_float_001) == NULL) { + return false; + } + return true; +} + +static bool add_test_check(CU_pSuite suite) +{ + if (CU_add_test(suite, "test_check_int_001", test_check_int_001) == NULL || + CU_add_test(suite, "test_check_decimal_001", test_check_decimal_001) == NULL || + CU_add_test(suite, "test_check_conf_file_valid_001", test_check_conf_file_valid_001) == NULL || + CU_add_test(suite, "test_check_file_001", test_check_file_001) == NULL || + CU_add_test(suite, "test_check_log_path_001", test_check_log_path_001) == NULL) { + return false; + } + return true; +} + +static bool general_test(CU_pSuite suite) +{ + if (CU_add_test(suite, "test_set_value_to_file", test_set_value_to_file) == NULL) { + return false; + } + return true; +} + +int main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + unsigned int num_failures; + cu_run_mode g_cunit_mode = CUNIT_SCREEN; + + if (argc > 1) { + g_cunit_mode = (cu_run_mode)strtol(argv[1], NULL, STRTOL_NUMBER_BASE); + } + + if (CU_initialize_registry() != CUE_SUCCESS) { + return CU_get_error(); + } + + suite = CU_add_suite("common", NULL, NULL); + if (suite == NULL) { + goto err; + } + + if (CU_add_test(suite, "test_monitor_popen_001", test_monitor_popen_001) == NULL || + CU_add_test(suite, "test_monitor_cmd_001", test_monitor_cmd_001) == NULL || + CU_add_test(suite, "test_get_value_001", test_get_value_001) == NULL || + CU_add_test(suite, "test_parse_config_001", test_parse_config_001) == NULL || + CU_add_test(suite, "test_open_cfgfile_001", test_open_cfgfile_001) == NULL || + CU_add_test(suite, "test_lvos_system_001", test_lvos_system_001) == NULL || + !add_test_parse_value(suite) || !add_test_check(suite) || + !general_test(suite)) { + goto err; + } + + switch (g_cunit_mode) { + case CUNIT_SCREEN: + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + break; + case CUNIT_XMLFILE: + CU_set_output_filename("common"); + CU_list_tests_to_file(); + CU_automated_run_tests(); + break; + case CUNIT_CONSOLE: + CU_console_run_tests(); + break; + default: + (void)printf("not suport cunit mode, only suport: 0 or 1\n"); + goto err; + } + + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + return num_failures; +err: + CU_cleanup_registry(); + return CU_get_error(); +} diff --git a/sysmonitor-1.3.2/test/common/process_exit_test.c b/sysmonitor-1.3.2/test/common/process_exit_test.c new file mode 100644 index 0000000000000000000000000000000000000000..565ee9e729c720c5129ba722cc8c81d2846f5d7c --- /dev/null +++ b/sysmonitor-1.3.2/test/common/process_exit_test.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: process receive SIGTERM not quit immediatally + * Author: xuchunmei + * Create: 2019-9-9 + */ + +#include +#include +#include +#include +#include +#include + +#define SLEEP_INTERVAL 600 + +static int g_quit = false; +static void quit_handler(int signo) +{ + g_quit = true; +} + +int main() +{ + struct sigaction quit_action; + + (void)memset_s(&quit_action, sizeof(quit_action), 0, sizeof(quit_action)); + quit_action.sa_handler = quit_handler; + (void)sigaction(SIGTERM, &quit_action, NULL); + + while (!g_quit) { + (void)sleep(SLEEP_INTERVAL); + } + (void)sleep(SLEEP_INTERVAL); + return 0; +} diff --git a/sysmonitor-1.3.2/test/common_interface/common_interface.c b/sysmonitor-1.3.2/test/common_interface/common_interface.c new file mode 100644 index 0000000000000000000000000000000000000000..cb239f4eeb55282770a2cb7eb1698518b0cd8d69 --- /dev/null +++ b/sysmonitor-1.3.2/test/common_interface/common_interface.c @@ -0,0 +1,275 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: common interface for test + * Author: xuchunmei + * Create: 2019-9-28 + */ +#include "common_interface.h" + +#include +#include + +const static pid_t g_monitor_main_pid = 0; +static int g_monitor_log_fd = -1; +static char g_log_path[LOG_FILE_LEN] = {0}; +static int g_log_interface_flag = -1; +static bool g_flag_log_ok = false; +static pthread_mutex_t g_log_fd_mutex = PTHREAD_MUTEX_INITIALIZER; +static monitor_thread g_thread_item[MONITOR_ITEMS_CNT]; + +void init_log_for_test(const char *name) +{ + int ret; + + if (name == NULL) { + return; + } + + g_log_interface_flag = NORMAL_WRITE; + ret = strncpy_s(g_log_path, sizeof(g_log_path), name, sizeof(g_log_path) - 1); + if (ret != 0) { + return; + } + + if (g_monitor_log_fd >= 0) { + (void)close(g_monitor_log_fd); + } + + g_monitor_log_fd = open(g_log_path, O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, LOG_FILE_PERMISSION); + if (g_monitor_log_fd < 0) { + return; + } + + g_flag_log_ok = true; +} + +void clear_log_config(const char *name) +{ + if (name == NULL) { + return; + } + + if (g_monitor_log_fd >= 0) { + (void)close(g_monitor_log_fd); + g_monitor_log_fd = -1; + } + + (void)memset_s(g_log_path, sizeof(g_log_path), 0, sizeof(g_log_path)); + g_flag_log_ok = false; + g_log_interface_flag = DAEMON_SYSLOG; + (void)unlink(name); +} + +/* + * write msg to log file + */ +static void write_log(const char *msg) +{ + ssize_t ret; + + (void)pthread_mutex_lock(&g_log_fd_mutex); + ret = faccessat(0, g_log_path, F_OK, 0); + if (ret != 0) { + (void)close(g_monitor_log_fd); + g_monitor_log_fd = open(g_log_path, O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, LOG_FILE_PERMISSION); + if (g_monitor_log_fd < 0) { + (void)printf("[sysmonitor]: reopen %s failed,errno[%d].\n", g_log_path, errno); + (void)pthread_mutex_unlock(&g_log_fd_mutex); + return; + } + } + (void)lseek(g_monitor_log_fd, 0, SEEK_END); + ret = write(g_monitor_log_fd, msg, strlen(msg)); + if (ret == -1) { + (void)printf("[sysmonitor]: write to log file failed, errno[%d].\n", errno); + } + (void)pthread_mutex_unlock(&g_log_fd_mutex); +} + +static int get_log_time(struct tm *t) +{ + time_t now; + int ret; + struct tm *ret_t = NULL; + + now = time((time_t)0); + ret = memset_s(t, sizeof(struct tm), 0, sizeof(struct tm)); + if (ret != 0) { + return -1; + } + + ret_t = localtime_r(&now, t); + if (ret_t == NULL) { + return -1; + } + + return 0; +} + +static void log_for_daemon(int priority, const char *detail) +{ + int ret; + char msg[MAX_LOG_LEN + MAX_TEMPSTR] = { 0 }; + + ret = snprintf_s(msg, MAX_LOG_LEN + MAX_TEMPSTR, strlen(detail) + MAX_TEMPSTR - 1, + "sysmonitor[%d]: %s", g_monitor_main_pid, detail); + if (ret == -1) { + syslog(priority, "log_it snprintf_s for msg error [%d]", ret); + } + syslog(priority, "%s", msg); +} + +static void log_for_normal(const char *detail, struct tm t) +{ + char msg[MAX_LOG_LEN + MAX_TEMPSTR] = { 0 }; + int ret; + + ret = snprintf_s(msg, MAX_LOG_LEN + MAX_TEMPSTR, MAX_LOG_LEN + MAX_TEMPSTR - 1, + "[LOC %04d-%02d-%02d:%02d:%02d:%02d]sysmonitor[%d]: %s\n", + t.tm_year + TM_YEAR_BEGIN, t.tm_mon + 1, t.tm_mday, + t.tm_hour, t.tm_min, t.tm_sec, g_monitor_main_pid, detail); + if (ret == -1) { + (void)printf("log_it: snprintf_s msg failed"); + return; + } + + if (g_flag_log_ok) { + write_log(msg); + } else { + (void)printf("%s", msg); + } +} + +/* + * write info to log file, use syslog or write interface + */ +static void log_it(int priority, const char *detail) +{ + struct tm t; + + if (get_log_time(&t) != 0) { + return; + } + + if (g_log_interface_flag == DAEMON_SYSLOG) { + log_for_daemon(priority, detail); + } else { + log_for_normal(detail, t); + } +} + +void log_printf(int priority, const char *format, ...) +{ + char msg_buffer[MAX_LOG_LEN] = {0}; + int ret; + va_list arg_list; + + va_start(arg_list, format); + ret = vsnprintf_s(msg_buffer, sizeof(msg_buffer), sizeof(msg_buffer) - 1, format, arg_list); + if (ret == -1 && msg_buffer[0] == '\0') { + (void)printf("log_printf: vsnprintf_s aMsgBuffer failed"); + va_end(arg_list); + return; + } + + va_end(arg_list); + log_it(priority, msg_buffer); +} + +int get_log_interface_flag(void) +{ + return g_log_interface_flag; +} + +bool get_flag_log_ok(void) +{ + return g_flag_log_ok; +} + +monitor_thread *get_thread_item_info(int type) +{ + if (type < 0 || type >= MONITOR_ITEMS_CNT) { + return NULL; + } + return &g_thread_item[type]; +} + +void set_log_interface_flag(int flag) +{ + g_log_interface_flag = flag; +} + +void set_flag_log_ok(bool flag) +{ + g_flag_log_ok = flag; +} + +int exec_cmd_test(const char *cmd) +{ + return monitor_cmd(DEFAULT_USER_ID, cmd, 0, NULL, true); +} + +bool get_thread_item_reload_flag(monitor_item_type type) +{ + return g_thread_item[type].reload; +} + +void set_thread_item_reload_flag(monitor_item_type type, bool flag) +{ + g_thread_item[type].reload = flag; +} + +bool get_thread_item_monitor_flag(monitor_item_type type) +{ + return g_thread_item[type].monitor; +} + +void set_thread_item_monitor_flag(monitor_item_type type, bool flag) +{ + g_thread_item[type].monitor = flag; +} + +bool get_thread_item_alarm_flag(monitor_item_type type) +{ + return g_thread_item[type].alarm; +} + +int get_thread_item_period(monitor_item_type type) +{ + return g_thread_item[type].period; +} + +void set_thread_item_period(monitor_item_type type, int period) +{ + g_thread_item[type].period = period; +} + +void set_thread_item_tid(monitor_item_type type, pthread_t tid) +{ + g_thread_item[type].tid = tid; +} + +void recover_sysmonitor(void) +{ + (void)exec_cmd_test("mv /usr/bin/sysmonitor /usr/bin/sysmonitor.del"); + (void)exec_cmd_test("mv /usr/bin/sysmonitor.back /usr/bin/sysmonitor"); + (void)exec_cmd_test("systemctl restart sysmonitor"); + (void)exec_cmd_test("rm /usr/bin/sysmonitor.del -rf"); + (void)printf("recover sysmonitor\n"); +} + +void init_sysmonitor(void) +{ + (void)exec_cmd_test("mv /usr/bin/sysmonitor /usr/bin/sysmonitor.back"); + (void)exec_cmd_test("cp ./sysmonitor/sysmonitor_test /usr/bin/sysmonitor"); + (void)exec_cmd_test("systemctl restart sysmonitor"); + (void)printf("init sysmonitor\n"); +} \ No newline at end of file diff --git a/sysmonitor-1.3.2/test/common_interface/common_interface.h b/sysmonitor-1.3.2/test/common_interface/common_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..af7c48ef5b2fc12c551734523561585d6718c7e0 --- /dev/null +++ b/sysmonitor-1.3.2/test/common_interface/common_interface.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: common function + * Author: xuchunmei + * Create: 2019-9-28 + */ +#ifndef __COMMON_INTERFACE_H +#define __COMMON_INTERFACE_H +#include "common.h" + +#define DEFAULT_FDTHRESHOLD 80 +#define DEFAULT_FDENABLE 1 +#define PROC_FDTHRESHOLD "/proc/fdthreshold" +#define RROC_FDENABLE "/proc/fdenable" +#define SIGCATCHMAK "/sys/module/sysmonitor/parameters/sigcatchmask" +#define MAX_LEN 200 + +void init_log_for_test(const char *name); +void clear_log_config(const char *name); +void set_log_interface_flag(int flag); +void set_flag_log_ok(bool flag); +int exec_cmd_test(const char *cmd); +monitor_thread *get_thread_item_info(int type); +void recover_sysmonitor(void); +void init_sysmonitor(void); +#endif diff --git a/sysmonitor-1.3.2/test/custom/CMakeLists.txt b/sysmonitor-1.3.2/test/custom/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..333ad7ae0ec00cf0a37af148b8669d292fc7e4ed --- /dev/null +++ b/sysmonitor-1.3.2/test/custom/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. +# Description: cmake file of custom_llt +# Author: xuchunmei +# Create: 2019-9-28 +project(sysmonitor) + +INCLUDE_DIRECTORIES(../../src) +add_executable(custom_llt ../../src/common.c ../../src/custom.c ../common_interface/common_interface.c ../../src/monitor_thread.c custom_llt.c) +set_target_properties(custom_llt PROPERTIES LINK_FLAGS "-Wl,-z,relro -Wl,-z,noexecstack -Wl,-z,now -Wtrampolines -pie -g") +target_link_libraries(custom_llt boundscheck pthread cunit) diff --git a/sysmonitor-1.3.2/test/custom/custom_llt.c b/sysmonitor-1.3.2/test/custom/custom_llt.c new file mode 100644 index 0000000000000000000000000000000000000000..8f01e3d0f3b86a496e8c2fa7032eecfe8d3b3508 --- /dev/null +++ b/sysmonitor-1.3.2/test/custom/custom_llt.c @@ -0,0 +1,558 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: testcase for custom daemon and periodic monitor + * Author: xuchunmei + * Create: 2019-9-28 + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "custom.h" +#include "../common_interface/common_interface.h" + +typedef enum { + CUNIT_SCREEN = 0, + CUNIT_XMLFILE, + CUNIT_CONSOLE +} cu_run_mode; + +#define CUSTOM_TEST_LOG "/home/custom.log" +#define TEST_MAX_TEMPSTR 1024 +#define SLEEP_INTERVAL 3 + +static monitor_thread *g_daemon_info = NULL; +static monitor_thread *g_periodic_info = NULL; + +static int init_before_test(void) +{ + init_log_for_test(CUSTOM_TEST_LOG); + (void)exec_cmd_test("mv /etc/sysmonitor.d /etc/sysmonitor.d-bak"); + g_daemon_info = get_thread_item_info(CUSTOM_DAEMON_ITEM); + g_periodic_info = get_thread_item_info(CUSTOM_PERIODIC_ITEM); + if (g_daemon_info == NULL || g_periodic_info == NULL) { + return 1; + } + g_daemon_info->period = 1; + custom_daemon_monitor_init(); + return 0; +} + +static int clean_after_test(void) +{ + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d"); + (void)exec_cmd_test("mv /etc/sysmonitor.d-bak /etc/sysmonitor.d"); + clear_log_config(CUSTOM_TEST_LOG); + return 0; +} + +static void wait_for_reload(void) +{ + g_daemon_info->reload = true; + while (g_daemon_info->reload) { + (void)sleep(1); + } +} + +static void test_custom_load_file_fun_001() +{ + int ret; + char temp[MAX_CFG_NAME_LEN + 1] = {0}; + char str[TEST_MAX_TEMPSTR] = {0}; + + ret = monitor_cmd(DEFAULT_USER_ID, "cat /home/custom.log | grep \"/etc/sysmonitor.d/ not exist\"", 0, NULL, true); + CU_ASSERT(ret == 0); + + (void)exec_cmd_test("mkdir /etc/sysmonitor.d"); + (void)memset_s(temp, MAX_CFG_NAME_LEN, 'a', MAX_CFG_NAME_LEN); + (void)snprintf_s(str, TEST_MAX_TEMPSTR, TEST_MAX_TEMPSTR - 1, "touch /etc/sysmonitor.d/%s", temp); + (void)exec_cmd_test(str); + (void)exec_cmd_test("touch /etc/sysmonitor.d/aaa && chmod 777 /etc/sysmonitor.d/aaa"); + wait_for_reload(); + + ret = monitor_cmd(DEFAULT_USER_ID, "cat /home/custom.log | grep \"load_task: config file" + " name should be less than 128, file: \"", 0, NULL, true); + CU_ASSERT(ret == 0); + ret = monitor_cmd(DEFAULT_USER_ID, + "cat /home/custom.log | grep \"/etc/sysmonitor.d/aaa: bad file mode\"", 0, NULL, true); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d/aaa*"); +} + +static void check_parse_config_fun_test(void) +{ + int ret; + + ret = exec_cmd_test("cat /home/custom.log | grep \"parse config-test1 error\""); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep " + "'ERROR: \"(MONITOR_SWITCH)=\"on\"\" include nonsecure character!'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep \"custom parse_line: item length(60) too long(>50)\""); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep \"custom monitor: size should be less than 160, error!\""); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep \"custom monitor: monitor switch configuration error!\""); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep \"custom monitor: type configuration error!\""); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep \"custom monitor: period configuration error!\""); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep \"custom monitor: enviromentfile configuration error!\""); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep" + " \"custom monitor: enviromentfile path should be less than 128, error!\""); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep \"custom monitor: execstart configuration error!\""); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep \"UNKNOWN not support\""); + CU_ASSERT(ret == 0); +} + +static void check_long_name_config(void) +{ + char temp[TEST_MAX_TEMPSTR] = {0}; + char name[MAX_CFG_NAME_LEN + 1] = {0}; + int ret; + + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/daemon-test1"); + (void)exec_cmd_test("echo 'TYPE=\"daemon\"' >> /etc/sysmonitor.d/daemon-test1"); + (void)exec_cmd_test("echo 'EXECSTART=\"sleep 600\"' >> /etc/sysmonitor.d/daemon-test1"); + wait_for_reload(); + ret = exec_cmd_test("cat /home/custom.log | grep 'is added to monitor list' | grep daemon-test1"); + CU_ASSERT(ret == 0); + + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"off\"' > /etc/sysmonitor.d/daemon-test1"); + (void)memset_s(temp, TEST_MAX_TEMPSTR, 0, TEST_MAX_TEMPSTR); + (void)memset_s(name, MAX_CFG_NAME_LEN, '1', MAX_CFG_NAME_LEN); + (void)snprintf_s(temp, TEST_MAX_TEMPSTR, TEST_MAX_TEMPSTR - 1, "echo test > /etc/sysmonitor.d/config-test%s", name); + (void)exec_cmd_test(temp); + wait_for_reload(); + ret = exec_cmd_test("cat /home/custom.log | grep " + "'reload_task: config file name should be less than 128, file: config-test111'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d"); + wait_for_reload(); + ret = exec_cmd_test("cat /home/custom.log | grep 'reload_task: /etc/sysmonitor.d/ not exist'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("mkdir -p /etc/sysmonitor.d"); +} + +static void test_custom_parse_config_fun_001() +{ + char temp[TEST_MAX_TEMPSTR] = {0}; + char str[MAX_CUSTOM_CMD_LEN + 1] = {0}; + char name[MAX_CFG_NAME_LEN + 1] = {0}; + int ret; + + (void)exec_cmd_test("touch /etc/sysmonitor.d/config-test1" + " && echo \" #12345\" > /etc/sysmonitor.d/config-test1"); + (void)exec_cmd_test("touch /etc/sysmonitor.d/config-test2" + " && echo '(MONITOR_SWITCH)=\"on\"' > /etc/sysmonitor.d/config-test2"); + (void)exec_cmd_test("touch /etc/sysmonitor.d/config-test3" + "&& echo '123456789012345678901234567890123456789012345678901234567890=\"on\"'" + " > /etc/sysmonitor.d/config-test3"); + (void)exec_cmd_test("touch /etc/sysmonitor.d/config-test4"); + (void)memset_s(str, MAX_CUSTOM_CMD_LEN, '1', MAX_CUSTOM_CMD_LEN); + (void)snprintf_s(temp, TEST_MAX_TEMPSTR, TEST_MAX_TEMPSTR - 1, + "echo 'EXECSTART=\"%s\"' > /etc/sysmonitor.d/config-test4", str); + (void)exec_cmd_test(temp); + (void)exec_cmd_test("touch /etc/sysmonitor.d/config-test5" + " && echo 'MONITOR_SWITCH=\"On\"' > /etc/sysmonitor.d/config-test5"); + (void)exec_cmd_test("touch /etc/sysmonitor.d/config-test6" + " && echo 'TYPE=\"unknown\"' > /etc/sysmonitor.d/config-test6"); + (void)exec_cmd_test("touch /etc/sysmonitor.d/config-test7" + " && echo 'PERIOD=\"abc\"' > /etc/sysmonitor.d/config-test7"); + (void)exec_cmd_test("touch /etc/sysmonitor.d/config-test8" + " && echo 'ENVIROMENTFILE=\"\"' > /etc/sysmonitor.d/config-test8"); + (void)exec_cmd_test("touch /etc/sysmonitor.d/config-test9"); + (void)memset_s(temp, TEST_MAX_TEMPSTR, 0, TEST_MAX_TEMPSTR); + (void)memset_s(name, MAX_CFG_NAME_LEN, '1', MAX_CFG_NAME_LEN); + (void)snprintf_s(temp, TEST_MAX_TEMPSTR, TEST_MAX_TEMPSTR - 1, + "echo 'ENVIROMENTFILE=\"%s\"' > /etc/sysmonitor.d/config-test9", name); + (void)exec_cmd_test(temp); + (void)exec_cmd_test("touch /etc/sysmonitor.d/config-test10" + " && echo 'EXECSTART=\"\"' > /etc/sysmonitor.d/config-test10"); + (void)exec_cmd_test("touch /etc/sysmonitor.d/config-test11" + " && echo 'UNKNOWN=\"ON\"' > /etc/sysmonitor.d/config-test11"); + (void)exec_cmd_test("touch /etc/sysmonitor.d/config-test12" + " && echo 'EXECSTARTPRE=\"\"' > /etc/sysmonitor.d/config-test12"); + wait_for_reload(); + check_parse_config_fun_test(); + ret = exec_cmd_test("rm -rf /etc/sysmonitor.d/config-test*"); + CU_ASSERT(ret == 0); + check_long_name_config(); +} + +#define MAX_ENVFILE_LINES 260 +static void test_custom_parse_config_fun_002() +{ + int ret; + int i; + + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/daemon-test1"); + (void)exec_cmd_test("echo 'TYPE=\"daemon\"' >> /etc/sysmonitor.d/daemon-test1"); + (void)exec_cmd_test("echo 'EXECSTART=\"sleep 600\"' >> /etc/sysmonitor.d/daemon-test1"); + (void)exec_cmd_test("echo 'ENVIROMENTFILE=\"/home/envfile\"' >> /etc/sysmonitor.d/daemon-test1"); + wait_for_reload(); + ret = exec_cmd_test("cat /home/custom.log | grep 'access /home/envfile failed'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("touch /home/test1 && ln -s /home/test1 /home/envfile"); + wait_for_reload(); + ret = exec_cmd_test("cat /home/custom.log | grep '/home/envfile should be absolute path'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /home/test1 && rm -rf /home/envfile"); + (void)exec_cmd_test("echo \" \" > /home/envfile"); + (void)exec_cmd_test("echo >> /home/envfile"); + (void)exec_cmd_test("echo \"#test\" >> /home/envfile"); + for (i = 0; i < MAX_ENVFILE_LINES; i++) { + (void)exec_cmd_test("echo 111 >> /home/envfile"); + } + wait_for_reload(); + ret = exec_cmd_test("cat /home/custom.log | grep \"is added to monitor list\" | grep daemon-test1"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /home/envfile && rm -rf /etc/sysmonitor.d/daemon-test1"); + wait_for_reload(); +} + +static void wait_for_periodic_reload(void) +{ + g_periodic_info->reload = true; + while (g_periodic_info->reload) { + (void)sleep(1); + } +} + +static void test_custom_parse_config_fun_003() +{ + int ret; + + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/periodic-test"); + (void)exec_cmd_test("echo 'TYPE=\"periodic\"' >> /etc/sysmonitor.d/periodic-test"); + (void)exec_cmd_test("echo 'EXECSTART=\"sleep 10\"' >> /etc/sysmonitor.d/periodic-test"); + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"off\"' > /etc/sysmonitor.d/periodic-test1"); + (void)exec_cmd_test("echo 'TYPE=\"periodic\"' >> /etc/sysmonitor.d/periodic-test1"); + (void)exec_cmd_test("echo 'EXECSTART=\"sleep 10\"' >> /etc/sysmonitor.d/periodic-test1"); + (void)exec_cmd_test("echo 'PERIOD=\"20\"' >> /etc/sysmonitor.d/periodic-test1"); + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/periodic-test2"); + (void)exec_cmd_test("echo 'TYPE=\"periodic\"' >> /etc/sysmonitor.d/periodic-test2"); + (void)exec_cmd_test("echo 'EXECSTART=\"sleep 10\"' >> /etc/sysmonitor.d/periodic-test2"); + (void)exec_cmd_test("echo 'PERIOD=\"20\"' >> /etc/sysmonitor.d/periodic-test2"); + custom_periodic_monitor_init(); + ret = exec_cmd_test("cat /home/custom.log | grep \"parse periodic-test error\""); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep \"is added to monitor list\" | grep periodic-test1"); + CU_ASSERT(ret != 0); + ret = exec_cmd_test("cat /home/custom.log | grep \"is added to monitor list\" | grep periodic-test2"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d/periodic-test*"); + wait_for_periodic_reload(); +} + +static void test_custom_monitor_daemon_fun_001_3(void) +{ + int ret; + unsigned int period = (unsigned int)g_daemon_info->period; + + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/daemon6"); + (void)exec_cmd_test("echo 'TYPE=\"daemon\"' >> /etc/sysmonitor.d/daemon6"); + (void)exec_cmd_test("echo 'EXECSTART=\"sleep 1\"' >> /etc/sysmonitor.d/daemon6"); + g_daemon_info->reload = true; + (void)sleep(period + 1); + ret = exec_cmd_test("cat /home/custom.log | grep 'is added to monitor list' | grep daemon6"); + CU_ASSERT(ret == 0); + /* test daemon process exit, and reload single config, parse env file */ + (void)exec_cmd_test("echo '/home;/root' >> /home/env.log"); + (void)exec_cmd_test("echo 'ENVIROMENTFILE=\"/home/env.log\"' >> /etc/sysmonitor.d/daemon6"); + (void)sleep(period + 1); + ret = exec_cmd_test("cat /home/custom.log | grep 'custom daemon monitor: child process' | grep daemon6| grep exit"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/daemon6"); + (void)exec_cmd_test("echo 'TYPE=\"daemon\"' >> /etc/sysmonitor.d/daemon6"); + (void)exec_cmd_test("echo 'EXECSTART=\"sleep 1\"' >> /etc/sysmonitor.d/daemon6"); + (void)sleep(period + 1); + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d/daemon6 && rm -rf /home/env.log"); +} + +static void test_custom_monitor_daemon_fun_001_2(void) +{ + int ret; + unsigned int period = (unsigned int)g_daemon_info->period; + + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/daemon3"); + (void)exec_cmd_test("echo 'TYPE=\"daemon\"' >> /etc/sysmonitor.d/daemon3"); + (void)exec_cmd_test("echo 'EXECSTART=\" \"' >> /etc/sysmonitor.d/daemon3"); + g_daemon_info->reload = true; + (void)sleep(period + 1); + ret = exec_cmd_test("cat /home/custom.log | grep 'get_exec_and_args, exec and args is empty'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d/daemon3"); + + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/daemon4"); + (void)exec_cmd_test("echo 'TYPE=\"daemon\"' >> /etc/sysmonitor.d/daemon4"); + (void)exec_cmd_test("echo 'EXECSTART=\"1 2 3 4 5 6 7 8 9 0 " + "1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 " + "0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 " + "9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 " + "8 9 0\"' >> /etc/sysmonitor.d/daemon4"); + g_daemon_info->reload = true; + (void)sleep(period + 1); + ret = exec_cmd_test("cat /home/custom.log | grep 'save_args: too many args'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d/daemon4"); + + (void)exec_cmd_test("cp ./common/process_exit_test /home"); + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/daemon5"); + (void)exec_cmd_test("echo 'TYPE=\"daemon\"' >> /etc/sysmonitor.d/daemon5"); + (void)exec_cmd_test("echo 'EXECSTART=\"/home/process_exit_test\"' >> /etc/sysmonitor.d/daemon5"); + g_daemon_info->reload = true; + (void)sleep(period + 1); + ret = exec_cmd_test("cat /home/custom.log | grep 'name daemon5 started'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"off\"' > /etc/sysmonitor.d/daemon5"); + (void)exec_cmd_test("echo 'TYPE=\"daemon\"' >> /etc/sysmonitor.d/daemon5"); + (void)exec_cmd_test("echo 'EXECSTART=\"/home/process_exit_test\"' >> /etc/sysmonitor.d/daemon5"); + g_daemon_info->reload = true; + (void)sleep(period + 1); + ret = exec_cmd_test("cat /home/custom.log | grep 'process SIGTERM timeout, use SIGKILL'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /home/process_exit_test && rm -rf /etc/sysmonitor.d/daemon5"); +} + +static void test_custom_monitor_daemon_fun_001() +{ + int ret; + unsigned int period = (unsigned int)g_daemon_info->period; + + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/daemon1"); + (void)exec_cmd_test("echo 'TYPE=\"daemon\"' >> /etc/sysmonitor.d/daemon1"); + (void)exec_cmd_test("echo 'EXECSTART=\"sleep 2\"' >> /etc/sysmonitor.d/daemon1"); + g_daemon_info->reload = true; + (void)sleep(period + 1); + ret = exec_cmd_test("cat /home/custom.log | grep 'is added to monitor list' | grep conf_name | grep daemon1"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep 'name daemon1 started'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d/daemon1"); + (void)sleep(SLEEP_INTERVAL); + ret = exec_cmd_test("cat /home/custom.log | grep " + "'custom daemon monitor: child process' | grep 'name daemon1 exit code'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep 'open /etc/sysmonitor.d/daemon1 error'"); + CU_ASSERT(ret == 0); + + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/daemon2"); + (void)exec_cmd_test("echo 'TYPE=\"daemon\"' >> /etc/sysmonitor.d/daemon2"); + (void)exec_cmd_test("echo 'EXECSTART=\"sleep 3\"' >> /etc/sysmonitor.d/daemon2"); + g_daemon_info->reload = true; + (void)sleep(period + 1); + ret = exec_cmd_test("cat /home/custom.log | grep 'is added to monitor list' | grep conf_name | grep daemon2"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep 'name daemon2 started'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"off\"' > /etc/sysmonitor.d/daemon2"); + (void)exec_cmd_test("echo 'TYPE=\"periodic\"' >> /etc/sysmonitor.d/daemon2"); + (void)exec_cmd_test("echo 'EXECSTART=\"sleep 3\"' >> /etc/sysmonitor.d/daemon2"); + (void)exec_cmd_test("echo 'PERIOD=\"20\"' >> /etc/sysmonitor.d/daemon2"); + (void)sleep(SLEEP_INTERVAL); + ret = exec_cmd_test("cat /home/custom.log | grep 'reload single config: parse daemon2 error'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep 'single custom type is changed, reload sysmonitor'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep 'single custom monitor is switched off'"); + CU_ASSERT(ret == 0); + + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d/daemon2"); + test_custom_monitor_daemon_fun_001_2(); + test_custom_monitor_daemon_fun_001_3(); +} + +static void test_check_periodic_monitor_fun_001_2(void) +{ + int ret; + unsigned int period = (unsigned int)g_periodic_info->period; + + (void)exec_cmd_test("rm -rf /home/custom.log"); + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/periodic"); + (void)exec_cmd_test("echo 'TYPE=\"periodic\"' >> /etc/sysmonitor.d/periodic"); + (void)exec_cmd_test("cp ./common/process_exit_test /home"); + (void)exec_cmd_test("echo 'EXECSTART=\"/home/process_exit_test\"' >> /etc/sysmonitor.d/periodic"); + (void)exec_cmd_test("echo 'PERIOD=\"3\"' >> /etc/sysmonitor.d/periodic"); + g_periodic_info->reload = true; + (void)sleep(period + 1); + (void)sleep(WORKER_TASK_TIMEOUT); + g_periodic_info->reload = true; + (void)sleep(period + 1); + ret = exec_cmd_test("cat /home/custom.log | grep 'process SIGTERM timeout, use SIGKILL'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /home/process_exit_test && rm -rf /etc/sysmonitor.d/periodic"); + g_periodic_info->reload = true; + (void)sleep(period + 1); + + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/periodic1"); + (void)exec_cmd_test("echo 'TYPE=\"periodic\"' >> /etc/sysmonitor.d/periodic1"); + (void)exec_cmd_test("echo 'EXECSTART=\"122 \"' >> /etc/sysmonitor.d/periodic1"); + (void)exec_cmd_test("echo 'PERIOD=\"3\"' >> /etc/sysmonitor.d/periodic1"); + set_log_interface_flag(DAEMON_SYSLOG); + g_periodic_info->reload = true; + (void)sleep(period + 2); + ret = exec_cmd_test("cat /var/log/messages | grep -a 'worker_routine: periodic pid' | grep -v 'cat'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d/periodic1"); + g_periodic_info->reload = true; + (void)sleep(period + 1); + set_log_interface_flag(NORMAL_WRITE); +} + +static void create_periodic_config(int num) +{ + char temp[MAX_TEMPSTR] = {0}; + int i; + + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/periodic-test1"); + (void)exec_cmd_test("echo 'TYPE=\"periodic\"' >> /etc/sysmonitor.d/periodic-test1"); + (void)exec_cmd_test("echo 'EXECSTART=\"sleep 3\"' >> /etc/sysmonitor.d/periodic-test1"); + (void)exec_cmd_test("echo 'PERIOD=\"3\"' >> /etc/sysmonitor.d/periodic-test1"); + + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/periodic-test"); + (void)exec_cmd_test("echo 'TYPE=\"periodic\"' >> /etc/sysmonitor.d/periodic-test"); + (void)exec_cmd_test("echo 'EXECSTART=\"sleep 15\"' >> /etc/sysmonitor.d/periodic-test"); + (void)exec_cmd_test("echo 'PERIOD=\"3\"' >> /etc/sysmonitor.d/periodic-test"); + for (i = 0; i < num; i++) { + (void)snprintf_s(temp, MAX_TEMPSTR, MAX_TEMPSTR - 1, + "cp /etc/sysmonitor.d/periodic-test /etc/sysmonitor.d/test%d", i); + (void)exec_cmd_test(temp); + } + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d/periodic-test"); +} + +#define TEST_PERIODIC_NUM 120 +#define TEST_PERIODIC_TIMEOUT 15 +#define TEST_PERIODIC_RELOAD 5 +static void test_check_periodic_monitor_fun_001_3(void) +{ + int ret; + + create_periodic_config(TEST_PERIODIC_NUM); + g_periodic_info->reload = true; + (void)sleep(TEST_PERIODIC_TIMEOUT); + ret = exec_cmd_test("cat /home/custom.log | grep 'task queue is full! no index!'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep 'custom_periodic_monitor_start: index' | grep error"); + CU_ASSERT(ret == 0); + g_periodic_info->reload = true; + (void)sleep(TEST_PERIODIC_RELOAD); + g_periodic_info->reload = true; + (void)sleep(TEST_PERIODIC_RELOAD); + ret = exec_cmd_test("cat /home/custom.log | grep 'process_worker_task: index' | grep error"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d/test*"); + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d/periodic-test1"); + wait_for_periodic_reload(); +} + +static void test_check_periodic_monitor_fun_001() +{ + int ret; + pthread_t worker_tid; + unsigned int period; + + g_periodic_info->period = 1; + period = (unsigned int)g_periodic_info->period; + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/periodic"); + (void)exec_cmd_test("echo 'TYPE=\"periodic\"' >> /etc/sysmonitor.d/periodic"); + (void)exec_cmd_test("echo 'EXECSTART=\"/bin/bash /home/test.sh\"' >> /etc/sysmonitor.d/periodic"); + (void)exec_cmd_test("echo 'PERIOD=\"1\"' >> /etc/sysmonitor.d/periodic"); + (void)exec_cmd_test("echo '#! /bin/bash' > /home/test.sh"); + (void)exec_cmd_test("echo 'echo 11111 >> /home/1.log' >> /home/test.sh"); + g_periodic_info->reload = true; + (void)sleep(period + period + period); + ret = exec_cmd_test("cat /home/1.log | grep 11111"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /etc/sysmonitor.d/periodic"); + g_periodic_info->reload = true; + (void)sleep(period + 1); + (void)exec_cmd_test("rm -rf /home/test.sh && rm -rf /home/1.log"); + (void)exec_cmd_test("echo 'MONITOR_SWITCH=\"on\"' > /etc/sysmonitor.d/periodic"); + (void)exec_cmd_test("echo 'TYPE=\"periodic\"' >> /etc/sysmonitor.d/periodic"); + (void)exec_cmd_test("cp ./common/process_exit_test /home"); + (void)exec_cmd_test("echo 'EXECSTART=\"/home/process_exit_test\"' >> /etc/sysmonitor.d/periodic"); + (void)exec_cmd_test("echo 'PERIOD=\"1\"' >> /etc/sysmonitor.d/periodic"); + (void)worker_task_struct_init(); + g_periodic_info->monitor = true; + (void)worker_thread_init(&worker_tid); + g_periodic_info->reload = true; + (void)sleep(period + 1); + (void)sleep(WORKER_TASK_TIMEOUT + 2); + ret = exec_cmd_test("cat /home/custom.log | grep 'execute periodic monitoring timeout'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/custom.log | grep 'process SIGTERM timeout, use SIGKILL'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /home/process_exit_test && rm -rf /etc/sysmonitor.d/periodic"); + g_periodic_info->reload = true; + (void)sleep(period + 1); + + test_check_periodic_monitor_fun_001_2(); + test_check_periodic_monitor_fun_001_3(); +} + +int main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + unsigned int num_failures; + cu_run_mode g_cunit_mode = CUNIT_SCREEN; + + if (argc > 1) { + g_cunit_mode = (cu_run_mode)strtol(argv[1], NULL, STRTOL_NUMBER_BASE); + } + + if (CU_initialize_registry() != CUE_SUCCESS) { + return CU_get_error(); + } + + suite = CU_add_suite("custom", init_before_test, clean_after_test); + if (suite == NULL) { + goto err; + } + (void)CU_ADD_TEST(suite, test_custom_load_file_fun_001); + (void)CU_ADD_TEST(suite, test_custom_parse_config_fun_001); + (void)CU_ADD_TEST(suite, test_custom_parse_config_fun_002); + (void)CU_ADD_TEST(suite, test_custom_parse_config_fun_003); + (void)CU_ADD_TEST(suite, test_custom_monitor_daemon_fun_001); + (void)CU_ADD_TEST(suite, test_check_periodic_monitor_fun_001); + + switch (g_cunit_mode) { + case CUNIT_SCREEN: + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + break; + case CUNIT_XMLFILE: + CU_set_output_filename("custom"); + CU_list_tests_to_file(); + CU_automated_run_tests(); + break; + case CUNIT_CONSOLE: + CU_console_run_tests(); + break; + default: + (void)printf("not suport cunit mode, only suport: 0 or 1\n"); + goto err; + } + + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + return num_failures; +err: + CU_cleanup_registry(); + return CU_get_error(); +} diff --git a/sysmonitor-1.3.2/test/disk/CMakeLists.txt b/sysmonitor-1.3.2/test/disk/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..777c7d34982235252e9af6a757d675872cab2601 --- /dev/null +++ b/sysmonitor-1.3.2/test/disk/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. +# Description: cmake file of disk_llt +# Author: xuchunmei +# Create: 2019-9-28 +project(sysmonitor) + +INCLUDE_DIRECTORIES(../../src) +add_executable(disk_llt ../../src/common.c ../../src/disk.c ../common_interface/common_interface.c ../../src/monitor_thread.c disk_llt.c) +set_target_properties(disk_llt PROPERTIES LINK_FLAGS "-Wl,-z,relro -Wl,-z,noexecstack -Wl,-z,now -Wtrampolines -pie -g") +target_link_libraries(disk_llt boundscheck pthread cunit) diff --git a/sysmonitor-1.3.2/test/disk/disk_llt.c b/sysmonitor-1.3.2/test/disk/disk_llt.c new file mode 100644 index 0000000000000000000000000000000000000000..78b36737330431553441515ac042141461437b9a --- /dev/null +++ b/sysmonitor-1.3.2/test/disk/disk_llt.c @@ -0,0 +1,183 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: testcase for disk monitor + * Author: xuchunmei + * Create: 2019-10-10 + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include "disk.h" +#include "common.h" +#include "../common_interface/common_interface.h" + +#define DISK_TEST_LOG "/home/disk.log" +#define USLEEP_INTERVAL (500 * 1000) + +typedef enum { + CUNIT_SCREEN = 0, + CUNIT_XMLFILE, + CUNIT_CONSOLE +} cu_run_mode; + +static monitor_thread *g_disk_info = NULL; + +static int init_before_test(void) +{ + g_disk_info = get_thread_item_info(DISK_ITEM); + if (g_disk_info == NULL) { + return 1; + } + init_log_for_test(DISK_TEST_LOG); + (void)exec_cmd_test("mv /etc/sysmonitor/disk /etc/sysmonitor/disk-bak"); + g_disk_info->period = 1; + g_disk_info->reload = true; + disk_monitor_init(); + return 0; +} + +static int clean_after_test(void) +{ + (void)exec_cmd_test("rm -rf /etc/sysmonitor/disk"); + (void)exec_cmd_test("mv /etc/sysmonitor/disk-bak /etc/sysmonitor/disk"); + clear_log_config(DISK_TEST_LOG); + return 0; +} + +static void test_disk_monitor_init_fun_001() +{ + int ret; + + ret = exec_cmd_test("cat /home/disk.log | grep 'open /etc/sysmonitor/disk error'"); + CU_ASSERT(ret == 0); +} + +static void wait_for_reload(void) +{ + (void)exec_cmd_test("rm -rf /home/disk.log"); + g_disk_info->reload = true; + for (;;) { + if (!g_disk_info->reload) { + break; + } + (void)usleep(USLEEP_INTERVAL); + } +} + +static void test_disk_monitor_init_fun_002(void) +{ + int ret; + + (void)exec_cmd_test("echo ' DISK=\"/\" ALARM=\"80\" RESUME=\"70\"' > /etc/sysmonitor/disk"); + (void)exec_cmd_test("echo 'DISK=\"/var/log\"' >> /etc/sysmonitor/disk"); + g_disk_info->period = 1; + disk_monitor_init(); + wait_for_reload(); + ret = exec_cmd_test("cat /home/disk.log | grep 'reload disk monitor configuration failed'"); + CU_ASSERT(ret != 0); +} + +static void test_disk_monitor_reload(void) +{ + int ret; + + (void)exec_cmd_test("echo ' DISK=\"/\" ALARM=\"90\" RESUME=\"70\"' > /etc/sysmonitor/disk"); + (void)exec_cmd_test("echo 'DISK=\"/var/log\"' >> /etc/sysmonitor/disk"); + (void)exec_cmd_test("echo 'DISK=\"/dev\"' >> /etc/sysmonitor/disk"); + wait_for_reload(); + ret = exec_cmd_test("cat /home/disk.log | grep 'reload disk monitor configuration failed'"); + CU_ASSERT(ret != 0); +} + +static void test_disk_loadconf_abn_001(void) +{ + int ret; + + (void)exec_cmd_test("echo 'DISK1=\"/var\"' > /etc/sysmonitor/disk"); + wait_for_reload(); + ret = exec_cmd_test("cat /home/disk.log | grep 'parse_diskline error'"); + CU_ASSERT(ret == 0); +} + +static void test_disk_loadconf_abn_002(void) +{ + int ret; + + (void)exec_cmd_test("echo 'DISK=\"/var\" ALARM=\"70\" RESUME=\"80\"' > /etc/sysmonitor/disk"); + wait_for_reload(); + ret = exec_cmd_test("cat /home/disk.log | grep 'alarm:70 or resume:80 invalided'"); + CU_ASSERT(ret == 0); +} + +static void test_disk_loadconf_abn_003(void) +{ + int ret; + + (void)exec_cmd_test("rm -rf /home/test"); + (void)exec_cmd_test("echo 'DISK=\"/home/test\"' > /etc/sysmonitor/disk"); + wait_for_reload(); + ret = exec_cmd_test("cat /home/disk.log | grep 'get_mount:/home/test failed'"); + CU_ASSERT(ret == 0); +} + +int main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + unsigned int num_failures; + cu_run_mode g_cunit_mode = CUNIT_SCREEN; + + if (argc > 1) { + g_cunit_mode = (cu_run_mode)strtol(argv[1], NULL, STRTOL_NUMBER_BASE); + } + + if (CU_initialize_registry() != CUE_SUCCESS) { + return CU_get_error(); + } + + suite = CU_add_suite("disk", init_before_test, clean_after_test); + if (suite == NULL) { + goto err; + } + + (void)CU_ADD_TEST(suite, test_disk_monitor_init_fun_001); + (void)CU_ADD_TEST(suite, test_disk_monitor_init_fun_002); + (void)CU_ADD_TEST(suite, test_disk_monitor_reload); + (void)CU_ADD_TEST(suite, test_disk_loadconf_abn_001); + (void)CU_ADD_TEST(suite, test_disk_loadconf_abn_002); + (void)CU_ADD_TEST(suite, test_disk_loadconf_abn_003); + + switch (g_cunit_mode) { + case CUNIT_SCREEN: + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + break; + case CUNIT_XMLFILE: + CU_set_output_filename("disk"); + CU_list_tests_to_file(); + CU_automated_run_tests(); + break; + case CUNIT_CONSOLE: + CU_console_run_tests(); + break; + default: + (void)printf("not suport cunit mode, only suport: 0 or 1\n"); + goto err; + } + + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + return num_failures; +err: + CU_cleanup_registry(); + return CU_get_error(); +} diff --git a/sysmonitor-1.3.2/test/filemonitor/CMakeLists.txt b/sysmonitor-1.3.2/test/filemonitor/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..51dfc678babf9fadc43a8b5f21bd34d2bd8638a9 --- /dev/null +++ b/sysmonitor-1.3.2/test/filemonitor/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2018-2019. All rights reserved. +# Description: cmake file of disk_llt +# Author: xuchunmei +# Create: 2019-9-28 +project(sysmonitor) + +INCLUDE_DIRECTORIES(../../src) +add_executable(filemonitor_llt ../../src/common.c ../../src/filemonitor.c ../common_interface/common_interface.c filemonitor_llt.c) +set_target_properties(filemonitor_llt PROPERTIES LINK_FLAGS "-Wl,-z,relro -Wl,-z,noexecstack -Wl,-z,now -Wtrampolines -pie -g") +target_link_libraries(filemonitor_llt boundscheck pthread cunit) diff --git a/sysmonitor-1.3.2/test/filemonitor/filemonitor_llt.c b/sysmonitor-1.3.2/test/filemonitor/filemonitor_llt.c new file mode 100644 index 0000000000000000000000000000000000000000..607ca3e7c98a852d7c2584e15932798268ce6207 --- /dev/null +++ b/sysmonitor-1.3.2/test/filemonitor/filemonitor_llt.c @@ -0,0 +1,267 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: testcase for file monitor + * Author: xuchunmei + * Create: 2019-10-10 + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "filemonitor.h" +#include "common.h" +#include "../common_interface/common_interface.h" + +#define FILE_TEST_LOG "/home/file.log" + +typedef enum { + CUNIT_SCREEN = 0, + CUNIT_XMLFILE, + CUNIT_CONSOLE +} cu_run_mode; + +#define FILE_WATCH_SELECT_TIMEOUT 1 +static int init_before_test(void) +{ + init_log_for_test(FILE_TEST_LOG); + set_file_monitor_select_timeout(FILE_WATCH_SELECT_TIMEOUT); + (void)exec_cmd_test("systemctl stop sysmonitor"); + (void)exec_cmd_test("mv /etc/sysmonitor/file /etc/sysmonitor/file-bak"); + file_monitor_init(); + return 0; +} + +static int clean_after_test(void) +{ + (void)exec_cmd_test("rm -rf /etc/sysmonitor/file"); + (void)exec_cmd_test("mv /etc/sysmonitor/file-bak /etc/sysmonitor/file"); + clear_log_config(FILE_TEST_LOG); + return 0; +} + +static void check_load_config_result(void) +{ + int ret; + + ret = exec_cmd_test("cat /home/file.log | grep 'Config file line len is invalid'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | " + "grep \"The path can't be recognised. The path length should be less than 4096 characters. error.\""); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'Mask is 0x500, it is more than add and delete, error.'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep '(/proc /sys /dev)file /proc no need to monitor'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep '(/proc /sys /dev)file /sys no need to monitor'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep '(/proc /sys /dev)file /dev no need to monitor'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep '/lib should be absolute path'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'Watch path is in /var/log, watch /var/log for only delete event'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'File path /home/ is already configed, ignore this conf item.'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | " + "grep 'File path /home/11.log is already configed, ignore this conf item.'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | " + "grep '/run/dbus/system_bus_socket is not a directory or regular file, can not watch it.'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'file monitor:config file name is too long'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep '/etc/sysmonitor/file.d/test: bad file mode'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'file name is \"/home/22.log\", watch event is 0x200'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | " + "grep 'File path /home/22.log is already configed, ignore this conf item.'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'Cannot add watch for \"/home/33.log\" with event mask 0x200'"); + CU_ASSERT(ret == 0); +} + +static void test_file_load_config_fun_001() +{ + int ret; + char temp[MAX_LINE_LEN + 1] = {0}; + char cmd[MAX_LINE_LEN + MAX_TEMPSTR] = {0}; + + ret = exec_cmd_test("cat /home/file.log | grep '/etc/sysmonitor/file.d/ not exist'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'No watcher add to FD'"); + CU_ASSERT(ret == 0); + + (void)memset_s(temp, sizeof(temp), '1', sizeof(temp) - 1); + (void)snprintf_s(cmd, sizeof(cmd), sizeof(cmd) - 1, "echo %s > /etc/sysmonitor/file", temp); + (void)exec_cmd_test(cmd); + (void)exec_cmd_test("cat /etc/sysmonitor/file"); + (void)memset_s(temp, sizeof(temp), 0, sizeof(temp)); + (void)memset_s(temp, sizeof(temp), '1', MAX_PATH_LEN); + (void)snprintf_s(cmd, sizeof(cmd), sizeof(cmd) - 1, "echo %s >> /etc/sysmonitor/file", temp); + (void)exec_cmd_test(cmd); + (void)exec_cmd_test("echo '/home 0x500' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("echo '/proc 0x300' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("echo '/sys 0x300' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("echo '/dev 0x300' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("echo '/lib 0x300' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("echo '/var/log 0x300' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("echo '/home' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("echo '/home/' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("touch /home/11.log"); + (void)exec_cmd_test("echo ' //home/11.log' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("echo '/home/11.log' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("echo '/run/dbus/system_bus_socket' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("mkdir -p /etc/sysmonitor/file.d"); + (void)memset_s(temp, sizeof(temp), 0, sizeof(temp)); + (void)memset_s(temp, sizeof(temp), 'a', FM_MAX_CFG_NAME_LEN); + (void)snprintf_s(cmd, sizeof(cmd), sizeof(cmd) - 1, "touch /etc/sysmonitor/file.d/%s", temp); + (void)exec_cmd_test(cmd); + (void)snprintf_s(cmd, sizeof(cmd), sizeof(cmd) - 1, "chmod 700 /etc/sysmonitor/file.d/%s", temp); + (void)exec_cmd_test(cmd); + (void)exec_cmd_test("touch /etc/sysmonitor/file.d/test && chmod 777 /etc/sysmonitor/file.d/test"); + (void)exec_cmd_test("touch /etc/sysmonitor/file.d/test1 && chmod 700 /etc/sysmonitor/file.d/test1"); + (void)exec_cmd_test("touch /etc/sysmonitor/file.d/test2 && chmod 700 /etc/sysmonitor/file.d/test2"); + (void)exec_cmd_test("touch /home/22.log"); + (void)exec_cmd_test("echo '/home/22.log' > /etc/sysmonitor/file.d/test1"); + (void)exec_cmd_test("echo '/home/22.log' > /etc/sysmonitor/file.d/test2"); + (void)exec_cmd_test("echo '/home/33.log' >> /etc/sysmonitor/file"); + + set_thread_item_reload_flag(FILE_ITEM, true); + (void)sleep(FILE_WATCH_SELECT_TIMEOUT + 1); + check_load_config_result(); + (void)exec_cmd_test("rm -rf /etc/sysmonitor/file.d && rm -rf /home/11.log && rm -rf /home/22.log"); + (void)exec_cmd_test("rm -rf /etc/sysmonitor/file"); +} + +static void test_file_reload_config_fun_001() +{ + int ret; + + (void)exec_cmd_test("rm -rf /home/file.log"); + (void)exec_cmd_test("echo /home > /etc/sysmonitor/file"); + set_thread_item_reload_flag(FILE_ITEM, true); + (void)sleep(FILE_WATCH_SELECT_TIMEOUT + 1); + ret = exec_cmd_test("cat /home/file.log | grep 'file name is \"/home\", watch event is 0x200'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("echo > /etc/sysmonitor/file"); + set_thread_item_reload_flag(FILE_ITEM, true); + (void)sleep(FILE_WATCH_SELECT_TIMEOUT + 1); + ret = exec_cmd_test("cat /home/file.log | grep 'Conf file is modified, reload conf and watch again.'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'No watcher add to FD.'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /home/file.log"); + (void)exec_cmd_test("echo /home > /etc/sysmonitor/file"); + set_thread_item_reload_flag(FILE_ITEM, true); + (void)sleep(FILE_WATCH_SELECT_TIMEOUT + 1); + ret = exec_cmd_test("cat /home/file.log | grep 'file name is \"/home\", watch event is 0x200'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /etc/sysmonitor/file"); + set_thread_item_reload_flag(FILE_ITEM, true); + (void)sleep(FILE_WATCH_SELECT_TIMEOUT + 1); + ret = exec_cmd_test("cat /home/file.log | grep 'Conf file is modified, reload conf and watch again.'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'No watcher add to FD.'"); + CU_ASSERT(ret == 0); +} + +static void test_file_event_handle_fun_001() +{ +#if 0 + int ret; + + (void)exec_cmd_test("touch /home/testfile"); + (void)exec_cmd_test("touch /home/testingore"); + (void)exec_cmd_test("echo '/home 0x300' > /etc/sysmonitor/file"); + (void)exec_cmd_test("echo '/home/testfile 0x100' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("echo '/home/testingore 0x300' >> /etc/sysmonitor/file"); + (void)exec_cmd_test("echo 111 > /home/testingore"); + (void)exec_cmd_test("echo 222 >> /home/testingore"); + (void)exec_cmd_test("echo ggwxjddZZ > /home/test.keys"); + set_thread_item_reload_flag(FILE_ITEM, true); + (void)sleep(FILE_WATCH_SELECT_TIMEOUT + 1); + (void)exec_cmd_test("touch /home/test111"); + (void)exec_cmd_test("rm -rf /home/test111"); + (void)exec_cmd_test("mkdir /home/test222"); + (void)exec_cmd_test("rm -rf /home/test222"); + (void)exec_cmd_test("rm -rf /home/testfile"); + (void)exec_cmd_test("vim -s /home/test.keys /home/testingore"); + ret = exec_cmd_test("cat /home/file.log | grep 'Subfile \"test111\" under \"/home\" was added' | grep 'comm'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'Subfile \"test111\" under \"/home\" was deleted' | grep 'comm'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'Subdir \"test222\" under \"/home\" was added' | grep 'comm'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'Subdir \"test222\" under \"/home\" was deleted' | grep 'comm'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'Subfile \"testfile\" under \"/home\" was deleted' | grep 'comm'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | grep 'File \"/home/testfile\" was deleted' | grep 'comm'"); + CU_ASSERT(ret == 0); + ret = exec_cmd_test("cat /home/file.log | " + "grep 'File \"/home/testingore\" was deleted.' | grep \"It's maybe changed\" | grep 'comm'"); + CU_ASSERT(ret == 0); + (void)exec_cmd_test("rm -rf /home/test.keys"); + (void)exec_cmd_test("rm -rf /home/testingore"); + (void)exec_cmd_test("rm -rf /etc/sysmonitor/file"); +#endif +} + +int main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + unsigned int num_failures; + cu_run_mode g_cunit_mode = CUNIT_SCREEN; + + if (argc > 1) { + g_cunit_mode = (cu_run_mode)strtol(argv[1], NULL, STRTOL_NUMBER_BASE); + } + + if (CU_initialize_registry() != CUE_SUCCESS) { + return CU_get_error(); + } + + suite = CU_add_suite("filemonitor", init_before_test, clean_after_test); + if (suite == NULL) { + goto err; + } + (void)CU_ADD_TEST(suite, test_file_load_config_fun_001); + (void)CU_ADD_TEST(suite, test_file_reload_config_fun_001); + (void)CU_ADD_TEST(suite, test_file_event_handle_fun_001); + + switch (g_cunit_mode) { + case CUNIT_SCREEN: + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + break; + case CUNIT_XMLFILE: + CU_set_output_filename("filemonitor"); + CU_list_tests_to_file(); + CU_automated_run_tests(); + break; + case CUNIT_CONSOLE: + CU_console_run_tests(); + break; + default: + (void)printf("not suport cunit mode, only suport: 0 or 1\n"); + goto err; + } + + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + return num_failures; +err: + CU_cleanup_registry(); + return CU_get_error(); +} diff --git a/sysmonitor-1.3.2/test/fs/CMakeLists.txt b/sysmonitor-1.3.2/test/fs/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..823e9bcdebdbeeaa557b022f66e08754a2505d74 --- /dev/null +++ b/sysmonitor-1.3.2/test/fs/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved. +# Description: cmake file of fs_llt +# Author: xietangxin +# Create: 2021-12-06 + +project(sysmonitor) + +INCLUDE_DIRECTORIES(../../src) +add_executable(fs_llt fs_llt.c ../../src/fsmonitor.c ../../src/common.c ../common_interface/common_interface.c ../../src/monitor_thread.c) +set_target_properties(fs_llt PROPERTIES LINK_FLAGS "-Wl,-z,relro -Wl,-z,noexecstack -Wl,-z,now -Wtrampolines -pie") +target_link_libraries(fs_llt cunit boundscheck pthread) diff --git a/sysmonitor-1.3.2/test/fs/fs_llt.c b/sysmonitor-1.3.2/test/fs/fs_llt.c new file mode 100644 index 0000000000000000000000000000000000000000..e5f95ed587c27f97131f160d5d3a2e425a145f1e --- /dev/null +++ b/sysmonitor-1.3.2/test/fs/fs_llt.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: testcase for fs monitor + * Author: xietangxin + * Create: 2021-12-06 + */ +#include +#include +#include +#include +#include +#include "fsmonitor.h" +#include "common.h" +#include "../common_interface/common_interface.h" + +typedef enum { + MOUNT_RO = 0, + MOUNT_DEF = 1, +} mount_type; + +typedef enum { + CUNIT_SCREEN = 0, + CUNIT_XMLFILE, + CUNIT_CONSOLE +} cu_run_mode; + +#define FS_TEST_LOG "/home/fs.log" + +static monitor_thread *g_fs_info = NULL; + +static int init_before_test(void) +{ + init_log_for_test(FS_TEST_LOG); + g_fs_info = get_thread_item_info(FS_ITEM); + if (g_fs_info == NULL) { + return 1; + } + fs_monitor_init(); + return 0; +} + +static int clean_after_test(void) +{ + (void)exec_cmd_test("umount -l /home/mnt/mpoint"); + (void)exec_cmd_test("rm -rf /home/mnt"); + clear_log_config(FS_TEST_LOG); + return 0; +} + +#if 0 +static void create_fs_err(mount_type type) +{ + (void)exec_cmd_test("umount -l /home/mnt/mpoint"); + (void)exec_cmd_test("rm -rf /home/mnt"); + (void)exec_cmd_test("mkdir -p /home/mnt/mpoint"); + (void)exec_cmd_test("dd if=/dev/zero of=/home/mnt/disk bs=1M count=10"); + (void)exec_cmd_test("echo y | mkfs.ext4 /home/mnt/disk"); + (void)exec_cmd_test("mount /home/mnt/disk /home/mnt/mpoint"); + if (type == MOUNT_RO) { + (void)exec_cmd_test("mount -o remount,errors=remount-ro /home/mnt/mpoint"); + } + (void)exec_cmd_test("dd if=/dev/zero of=/home/mnt/disk bs=1M count=1"); + (void)exec_cmd_test("touch /home/mnt/mpoint/file"); +} +#endif + +static void test_fs_monitor_fun(void) +{ +#if 0 + int ret; + + (void)exec_cmd_test("rm -rf /home/fs.log"); + create_fs_err(MOUNT_RO); + ret = exec_cmd_test("cat /home/fs.log | grep 'filesystem error. Remount filesystem read-only'"); + CU_ASSERT(ret == 0); + create_fs_err(MOUNT_DEF); + ret = exec_cmd_test("cat /home/fs.log | grep 'filesystem error. flag is'"); + CU_ASSERT(ret == 0); +#endif +} + +int main(int argc, char **argv) +{ + CU_pSuite suite = NULL; + unsigned int num_failures; + cu_run_mode g_cunit_mode = CUNIT_SCREEN; + + if (argc > 1) { + g_cunit_mode = (cu_run_mode)strtol(argv[1], NULL, STRTOL_NUMBER_BASE); + } + + if (CU_initialize_registry() != CUE_SUCCESS) { + return CU_get_error(); + } + + suite = CU_add_suite("fs", init_before_test, clean_after_test); + if (suite == NULL) { + CU_cleanup_registry(); + return CU_get_error(); + } + + (void)CU_ADD_TEST(suite, test_fs_monitor_fun); + + switch (g_cunit_mode) { + case CUNIT_SCREEN: + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + break; + case CUNIT_XMLFILE: + CU_set_output_filename("fs"); + CU_list_tests_to_file(); + CU_automated_run_tests(); + break; + case CUNIT_CONSOLE: + CU_console_run_tests(); + break; + default: + (void)printf("not suport cunit mode, only suport: 0 or 1\n"); + CU_cleanup_registry(); + return CU_get_error(); + } + + num_failures = CU_get_number_of_failures(); + CU_cleanup_registry(); + return num_failures; +} diff --git a/sysmonitor-1.3.2/test/fuzz/CMakeLists.txt b/sysmonitor-1.3.2/test/fuzz/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..dacc9db504bcdf7f82be2a6284749535b7edb3e9 --- /dev/null +++ b/sysmonitor-1.3.2/test/fuzz/CMakeLists.txt @@ -0,0 +1,31 @@ +project(sysmonitor) + +execute_process(COMMAND bash "-c" "rpm -qil compiler-rt | grep lib/libclang_rt.fuzzer-*.a" + OUTPUT_VARIABLE LIB_FUZZING_ENGINE + OUTPUT_STRIP_TRAILING_WHITESPACE) + +IF(LIB_FUZZING_ENGINE STREQUAL "") + MESSAGE(WARNING "LIB_FUZZING_ENGINE is NULL, will ignore directory compile") + RETURN() +ENDIF() + +MESSAGE(STATUS "LIB_FUZZING_ENGINE is set to ${LIB_FUZZING_ENGINE}") + +IF(CMAKE_BUILD_TYPE STREQUAL Debug) + SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -fsanitize-coverage=trace-pc") + SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize-coverage=trace-pc") +ENDIF() + +INCLUDE_DIRECTORIES(../../src) + +SET(EXE sysmonitor_fuzz) + +add_executable(${EXE} + sysmonitor_fuzz.c + ../../src/common.c + ../common_interface/common_interface.c) + +set_target_properties(${EXE} PROPERTIES LINKER_LANGUAGE CXX) + +target_link_libraries(${EXE} ${LIB_FUZZING_ENGINE} pthread securec) + diff --git a/sysmonitor-1.3.2/test/fuzz/corpus/samples b/sysmonitor-1.3.2/test/fuzz/corpus/samples new file mode 100644 index 0000000000000000000000000000000000000000..40e501493ef6614c189bf6ad5fe40ae39b5b39a2 --- /dev/null +++ b/sysmonitor-1.3.2/test/fuzz/corpus/samples @@ -0,0 +1,2 @@ +samples data + diff --git a/sysmonitor-1.3.2/test/fuzz/dict/sysmonitor_fuzz.dict b/sysmonitor-1.3.2/test/fuzz/dict/sysmonitor_fuzz.dict new file mode 100644 index 0000000000000000000000000000000000000000..6500b2dd0bb81054842c76b1a7242028dc865800 --- /dev/null +++ b/sysmonitor-1.3.2/test/fuzz/dict/sysmonitor_fuzz.dict @@ -0,0 +1 @@ +"key=\"value\"" diff --git a/sysmonitor-1.3.2/test/fuzz/fuzz.sh b/sysmonitor-1.3.2/test/fuzz/fuzz.sh new file mode 100755 index 0000000000000000000000000000000000000000..939fb7372b8b165c92c3d2d0388c7cf501e2d77d --- /dev/null +++ b/sysmonitor-1.3.2/test/fuzz/fuzz.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +FUZZ_OPTION="corpus -dict=./dict/sysmonitor_fuzz.dict -runs=10000000 -max_total_time=3600 -rss_limit_mb=0" + +# compile fuzz +make -j + +# run fuzz +./sysmonitor_fuzz $FUZZ_OPTION -artifact_prefix=sysmonitor_fuzz- + +# find crash file +echo "############# Fuzz Result #############" +crash=`find -name "*-crash-*"` +if [ x"$crash" != x"" ]; then + echo "find bugs while fuzzing, pls check <*-crash-*> file" + find -name "*-crash-*" + exit 1 +else + echo "all fuzz success." +fi + diff --git a/sysmonitor-1.3.2/test/fuzz/sysmonitor_fuzz.c b/sysmonitor-1.3.2/test/fuzz/sysmonitor_fuzz.c new file mode 100644 index 0000000000000000000000000000000000000000..291b6257719d97b4519327b0d93b1c6a2b459ca0 --- /dev/null +++ b/sysmonitor-1.3.2/test/fuzz/sysmonitor_fuzz.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2019-2019. All rights reserved. + * sysmonitor licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Description: fuzz test + * Author: pengyeqing + * Create: 2019-12-24 + */ + +#include +#include +#include +#include "common.h" + +#define MIN_FORMAT_LEN 4 +#define MAX_DATA_SIZE (128 * 1024) + +/* calculate string length */ +static int string_len(const char *data, size_t size) +{ + const char *p = NULL; + + if (data == NULL || size == 0) { + return 0; + } + + p = data; + while (p < data + size && *p) { + p++; + } + + return p - data; +} + +/* key="value" */ +static int is_key_value_format(const char *buf, int len) +{ + char *ptr = NULL; + + if (buf == NULL || len < MIN_FORMAT_LEN) { + return 0; + } + ptr = strchr(buf + 1, '='); + if (ptr == NULL) { + return 0; + } + ptr = strchr(ptr + 1, '"'); + if (ptr == NULL) { + return 0; + } + return 1; +} + +int LLVMFuzzerInitialize(int *argc, char ***argv) +{ + return 0; +} + +int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size) +{ + char *buf = NULL; + int len; + char *value = NULL; + char *ptr = NULL; + char secure_func_test[] = "k=\"v\""; + int res; + + if (data == NULL || size > MAX_DATA_SIZE) { + printf("data=%lx, size=%lu\n", (unsigned long)data, size); + return 0; + } + + buf = malloc(size + 1); + if (buf == NULL) { + printf("malloc for buf fail!\n"); + return -1; + } + res = memcpy_s(buf, size + 1, data, size); + if (res != 0) { + printf("memcpy_s for buf fail!\n"); + free(buf); + return -1; + } + /* avoid overflow check */ + buf[size] = '\0'; + + value = malloc(size); + if (value == NULL) { + printf("malloc for value fail!\n"); + free(buf); + return -1; + } + + /* test check_int */ + if (size == 0) { + (void)check_int(NULL); + } else { + (void)check_int(buf); + } + + len = string_len((const char *)data, size); + if (is_key_value_format(buf, len)) { + ptr = strchr(buf, '='); + if (ptr != NULL) { + /* test get_value */ + get_value(buf, ptr - buf, value, len); + } + } + if (size == 0) { + get_value(secure_func_test, 1, secure_func_test, 0); + } + + free(buf); + free(value); + + return 0; +} diff --git a/sysmonitor-1.3.2/test/llt.sh b/sysmonitor-1.3.2/test/llt.sh new file mode 100644 index 0000000000000000000000000000000000000000..90b694ae720c9d50a47ab9d72c34b55947e306c9 --- /dev/null +++ b/sysmonitor-1.3.2/test/llt.sh @@ -0,0 +1,267 @@ +####################################################################### +##- @Copyright (C) Huawei Technologies., Ltd. 2019. All rights reserved. +# - iSulad licensed under the Mulan PSL v1. +# - You can use this software according to the terms and conditions of the Mulan PSL v1. +# - You may obtain a copy of Mulan PSL v1 at: +# - http://license.coscl.org.cn/MulanPSL +# - THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# - IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# - PURPOSE. +# - See the Mulan PSL v1 for more details. +####################################################################### +#! /bin/bash + +#set -xe + +usage() +{ + echo "Usage: sh llt.sh [OPTIONS]" + echo "Use llt.sh to control llt operation" + echo + echo "Misc:" + echo " -h, --help Print this help, then exit" + echo + echo "Compile Options:" + echo " -m, --cmake