diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..ff0812fd89cc41fc1b1ed6732a621057d30ed2ad --- /dev/null +++ b/LICENSE @@ -0,0 +1,359 @@ +Valid-License-Identifier: GPL-2.0 +Valid-License-Identifier: GPL-2.0-only +Valid-License-Identifier: GPL-2.0+ +Valid-License-Identifier: GPL-2.0-or-later +SPDX-URL: https://spdx.org/licenses/GPL-2.0.html +Usage-Guide: + To use this license in source code, put one of the following SPDX + tag/value pairs into a comment according to the placement + guidelines in the licensing rules documentation. + For 'GNU General Public License (GPL) version 2 only' use: + SPDX-License-Identifier: GPL-2.0 + or + SPDX-License-Identifier: GPL-2.0-only + For 'GNU General Public License (GPL) version 2 or any later version' use: + SPDX-License-Identifier: GPL-2.0+ + or + SPDX-License-Identifier: GPL-2.0-or-later +License-Text: + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/OAT.xml b/OAT.xml new file mode 100644 index 0000000000000000000000000000000000000000..a5956a1b6611c97378059b6bfc99c3956b59d206 --- /dev/null +++ b/OAT.xml @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/newip/README.OpenSource b/newip/README.OpenSource new file mode 100644 index 0000000000000000000000000000000000000000..5d105dacb0c3313298571615382a303c58da66a4 --- /dev/null +++ b/newip/README.OpenSource @@ -0,0 +1,11 @@ +[ + { + "Name": "linux-5.10", + "License": "GPL-2.0+", + "License File": "COPYING", + "Version Number": "5.10.93", + "Owner": "tanyanying@huawei.com", + "Upstream URL": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/log/?h=linux-5.10.y", + "Description": "linux kernel 5.10" + } +] diff --git a/newip/README_zh.md b/newip/README_zh.md new file mode 100644 index 0000000000000000000000000000000000000000..3d1d1189d0a07ac3d328dc00455bfaf184622e3b --- /dev/null +++ b/newip/README_zh.md @@ -0,0 +1,90 @@ +# New IP内核协议栈 + +## 简介 + +New IP在现有IP能力的基础上,以灵活轻量级报头和可变长多语义地址为基础,通过二三层协议融合,对协议去冗和压缩,减少冗余字节,实现高能效比,高净吞吐,提升通信效率。打造终端之间高效的横向通信,支撑超级终端的体验,实现异构网络的端到端互联。 + +目前WiFi协议报文,三层报头和地址开销使得报文开销大,传输效率较低。 + +![image-20220915162621809](figures/image-20220915162621809.png) + +``` +IPv4地址长度固定4字节,IPv6地址长度固定16字节。 +IPv4网络层报头长度20~60字节,IPv6网络层报头长度40字节。 +``` + +New IP支持**可变长多语义地址(最短1字节)**,**可变长定制化报头封装(最短5字节)**,通过精简报文头开销,提升数据传输效率。 + +New IP报头开销,相比IPv4节省25.9%,相比IPv6节省44.9%。 + +New IP载荷传输效率,相比IPv4提高最少1%,相比IPv6提高最少2.33%。 + +| 对比场景 | 报头开销 | 载荷传输效率(WiFi MTU=1500B,BT MTU=255B) | +| --------------- | ------------ | ------------------------------------------- | +| IPv4 for WiFi | 30+8+20=58 B | (1500-58)/1500=96.13% | +| IPv6 for WiFi | 30+8+40=78 B | (1500-78)/1500=94.8% | +| New IP for WiFi | 30+8+5=43 B | (1500-43)/1500=97.13% | + +## 系统架构 + +New IP内核协议栈架构图如下,用户态应用程序调用Socket API创建New IP socket,采用New IP极简帧头封装进行收发包。 + +![image-20220901152539801](figures/image-20220901152539801.png) + +## 目录 + +New IP内核协议栈主要代码目录结构如下: + +``` +# 代码路径 /kernel/linux/common_modules/newip +├── examples # New IP 用户态样例代码 +├── src +│ ├── common # New IP 通用代码 +│ └── linux-5.10 # New IP Linux-5.10内核代码 +│ ├── include # New IP 头文件 +│ │ ├── linux +│ │ ├── net +│ │ └── uapi +│ └── net +│ └── newip # New IP 功能代码 +├── figures # ReadMe 内嵌图例 +└── tools # New IP 配套工具 +``` + +## 编译构建 + +详见“New IP内核协议栈开发手册-[配置指导](https://gitee.com/openharmony/docs/blob/master/zh-cn/device-dev/kernel/kernel-standard-newip.md#newip配置指导)”章节。 + +## 说明 + +### 可变长报头格式 + +详见“New IP内核协议栈开发手册-[可变报头格式](https://gitee.com/openharmony/docs/blob/master/zh-cn/device-dev/kernel/kernel-standard-newip.md#可变长报头格式)”章节。 + +### 可变长地址格式 + +详见“New IP内核协议栈开发手册-[可变长地址格式](https://gitee.com/openharmony/docs/blob/master/zh-cn/device-dev/kernel/kernel-standard-newip.md#可变长地址格式)”章节。 + +### 接口说明 + +详见“New IP内核协议栈开发手册-[New IP相关接口](https://gitee.com/openharmony/docs/blob/master/zh-cn/device-dev/kernel/kernel-standard-newip.md#newip相关接口)”章节。 + +### New IP收发包代码示例 + +详见“New IP内核协议栈开发手册-[New IP收发包代码示例](https://gitee.com/openharmony/docs/blob/master/zh-cn/device-dev/kernel/kernel-standard-newip.md#newip收发包代码示例)”章节。 + +### selinux规则说明 + +详见“New IP内核协议栈开发手册-[selinux规则说明](https://gitee.com/openharmony/docs/blob/master/zh-cn/device-dev/kernel/kernel-standard-newip.md#selinux规则说明)”章节。 + +## 相关仓 + +[内核子系统](https://gitee.com/openharmony/docs/blob/master/zh-cn/readme/%E5%86%85%E6%A0%B8%E5%AD%90%E7%B3%BB%E7%BB%9F.md) + +[kernel_linux_5.10](https://gitee.com/openharmony/kernel_linux_5.10) + +[kernel_linux_config](https://gitee.com/openharmony/kernel_linux_config) + +[kernel_linux_build](https://gitee.com/openharmony/kernel_linux_build) + +[device_board_hihope](https://gitee.com/openharmony/device_board_hihope) diff --git a/newip/apply_newip.sh b/newip/apply_newip.sh new file mode 100644 index 0000000000000000000000000000000000000000..5cd3732de80b8c166fae2d349be7989c031c2bb8 --- /dev/null +++ b/newip/apply_newip.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2022 Huawei Device Co., Ltd. +# + +set -e + +OHOS_SOURCE_ROOT=$1 +KERNEL_BUILD_ROOT=$2 +PRODUCT_NAME=$3 +KERNEL_VERSION=$4 +NEWIP_SOURCE_ROOT=$OHOS_SOURCE_ROOT/kernel/linux/common_modules/newip + +function main() +{ + pushd . + + cd $KERNEL_BUILD_ROOT/include/linux/ + ln -s -f $(realpath --relative-to=$KERNEL_BUILD_ROOT/include/linux $NEWIP_SOURCE_ROOT/src/linux/include/linux)/*.h ./ + cd $KERNEL_BUILD_ROOT/include/net/netns + ln -s -f $(realpath --relative-to=$KERNEL_BUILD_ROOT/include/net/netns $NEWIP_SOURCE_ROOT/src/linux/include/net/netns)/*.h ./ + cd $KERNEL_BUILD_ROOT/include/net + ln -s -f $(realpath --relative-to=$KERNEL_BUILD_ROOT/include/net $NEWIP_SOURCE_ROOT/src/linux/include/net)/*.h ./ + cd $KERNEL_BUILD_ROOT/include/uapi/linux + ln -s -f $(realpath --relative-to=$KERNEL_BUILD_ROOT/include/uapi/linux $NEWIP_SOURCE_ROOT/src/linux/include/uapi/linux)/*.h ./ + cd $KERNEL_BUILD_ROOT/include/trace/hooks + ln -s -f $(realpath --relative-to=$KERNEL_BUILD_ROOT/include/trace/hooks $NEWIP_SOURCE_ROOT/src/linux/include/trace/hooks)/*.h ./ + + if [ ! -d " $KERNEL_BUILD_ROOT/net/newip" ]; then + mkdir $KERNEL_BUILD_ROOT/net/newip + fi + + cd $KERNEL_BUILD_ROOT/net/newip/ + ln -s -f $(realpath --relative-to=$KERNEL_BUILD_ROOT/net/newip $NEWIP_SOURCE_ROOT/src/linux/net/newip)/* ./ + ln -s -f $(realpath --relative-to=$KERNEL_BUILD_ROOT/net/newip $NEWIP_SOURCE_ROOT/src/common)/* ./ + cd $KERNEL_BUILD_ROOT/include/uapi/linux + ln -s -f $(realpath --relative-to=$KERNEL_BUILD_ROOT/include/uapi/linux $NEWIP_SOURCE_ROOT/src/common)/nip_addr.h nip_addr.h + + popd +} + +main diff --git a/newip/examples/Makefile b/newip/examples/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4121a74eb10963c72f417c6e8fe50d34df7f14d0 --- /dev/null +++ b/newip/examples/Makefile @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Copyright (c) 2022 Huawei Device Co., Ltd. +# +# +CC=gcc +# CC = aarch64-linux-gnu-gcc +# CC = arm-linux-gnueabi-gcc +CFLAGS=-pthread -static -g + +UT_LIST = nip_addr_cfg_demo nip_route_cfg_demo nip_tcp_server_demo nip_tcp_client_demo nip_udp_server_demo nip_udp_client_demo get_af_ninet check_nip_enable nip_addr nip_route + +all: $(UT_LIST) + +clean: + rm -f $(UT_LIST) + rm -f nip_lib.o + rm -f libnip_lib.a + + +#lib +NIP_LIB = libnip_lib.a +NIP_DEF_LIB = -L. -lnip_lib + +nip_lib.o: nip_lib.c + $(CC) -c nip_lib.c -o nip_lib.o + +libnip_lib.a: nip_lib.o + ar -rv libnip_lib.a nip_lib.o + +#UT func list +nip_addr_cfg_demo: nip_addr_cfg_demo.c $(NIP_LIB) + $(CC) $(CFLAGS) -o nip_addr_cfg_demo nip_addr_cfg_demo.c $(NIP_DEF_LIB) + +nip_route_cfg_demo: nip_route_cfg_demo.c $(NIP_LIB) + $(CC) $(CFLAGS) -o nip_route_cfg_demo nip_route_cfg_demo.c $(NIP_DEF_LIB) + +nip_tcp_server_demo: nip_tcp_server_demo.c $(NIP_LIB) + $(CC) $(CFLAGS) -o nip_tcp_server_demo nip_tcp_server_demo.c $(NIP_DEF_LIB) + +nip_tcp_client_demo: nip_tcp_client_demo.c $(NIP_LIB) + $(CC) $(CFLAGS) -o nip_tcp_client_demo nip_tcp_client_demo.c $(NIP_DEF_LIB) + +nip_udp_server_demo: nip_udp_server_demo.c $(NIP_LIB) + $(CC) $(CFLAGS) -o nip_udp_server_demo nip_udp_server_demo.c $(NIP_DEF_LIB) + +nip_udp_client_demo: nip_udp_client_demo.c $(NIP_LIB) + $(CC) $(CFLAGS) -o nip_udp_client_demo nip_udp_client_demo.c $(NIP_DEF_LIB) + +get_af_ninet: get_af_ninet.c + $(CC) $(CFLAGS) -o get_af_ninet get_af_ninet.c + +check_nip_enable: check_nip_enable.c + $(CC) $(CFLAGS) -o check_nip_enable check_nip_enable.c + +nip_addr: nip_addr.c $(NIP_LIB) + $(CC) $(CFLAGS) -o nip_addr nip_addr.c $(NIP_DEF_LIB) + +nip_route: nip_route.c $(NIP_LIB) + $(CC) $(CFLAGS) -o nip_route nip_route.c $(NIP_DEF_LIB) \ No newline at end of file diff --git a/newip/examples/check_nip_enable.c b/newip/examples/check_nip_enable.c new file mode 100644 index 0000000000000000000000000000000000000000..a319a8a016e4b044c48fe7bec59b7bbecd531cb4 --- /dev/null +++ b/newip/examples/check_nip_enable.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include +#include +#include +#include +#include + +#define NIP_DISABLE_PATH ("/sys/module/newip/parameters/disable") +#define NIP_DISABLE_LENTH (5) +#define NIP_ENABLE_INVALID (0xFF) + +int g_nip_enable = NIP_ENABLE_INVALID; + +void _check_nip_enable(void) +{ + char tmp[NIP_DISABLE_LENTH]; + FILE *fn = fopen(NIP_DISABLE_PATH, "r"); + + if (!fn) { + printf("fail to open %s\n\n", NIP_DISABLE_PATH); + return; + } + + if (fgets(tmp, NIP_DISABLE_LENTH, fn) == NULL) { + printf("fail to gets %s\n\n", NIP_DISABLE_PATH); + fclose(fn); + return; + } + + fclose(fn); + g_nip_enable = atoi(tmp) ? 0 : 1; +} + +int check_nip_enable(void) +{ + if (g_nip_enable == NIP_ENABLE_INVALID) { + _check_nip_enable(); + g_nip_enable = (g_nip_enable == 1 ? 1 : 0); + } + + return g_nip_enable; +} + +int main(int argc, char **argv) +{ + int af_ninet = check_nip_enable(); + + if (af_ninet) + printf("Support NewIP\n\n"); + else + printf("Not support NewIP\n\n"); + return 0; +} + diff --git a/newip/examples/get_af_ninet.c b/newip/examples/get_af_ninet.c new file mode 100644 index 0000000000000000000000000000000000000000..da6febb0378dafc976f891750ca61a2d0e3b43bf --- /dev/null +++ b/newip/examples/get_af_ninet.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include +#include +#include +#include +#include + +#define AF_NINET_PATH ("/sys/module/newip/parameters/af_ninet") +#define AF_NINET_LENTH (5) + +int g_af_ninet; + +void _get_af_ninet(void) +{ + char tmp[AF_NINET_LENTH]; + FILE *fn = fopen(AF_NINET_PATH, "r"); + + if (!fn) { + printf("fail to open %s\n\n", AF_NINET_PATH); + return; + } + + if (fgets(tmp, AF_NINET_LENTH, fn) == NULL) { + printf("fail to gets %s\n\n", AF_NINET_PATH); + fclose(fn); + return; + } + + fclose(fn); + g_af_ninet = atoi(tmp); +} + +int get_af_ninet(void) +{ + if (g_af_ninet == 0) + _get_af_ninet(); + + return g_af_ninet; +} + +int main(int argc, char **argv) +{ + printf("af_ninet=%d\n\n", get_af_ninet()); + return 0; +} + diff --git a/newip/examples/newip_route.h b/newip/examples/newip_route.h new file mode 100644 index 0000000000000000000000000000000000000000..2337c1e63e3563ca1f056762fa168d5bb2331ca3 --- /dev/null +++ b/newip/examples/newip_route.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#ifndef _NEWIP_ROUTE_H +#define _NEWIP_ROUTE_H + +#include "nip.h" + +struct nip_rtmsg { + struct nip_addr rtmsg_dst; + struct nip_addr rtmsg_src; + struct nip_addr rtmsg_gateway; + char dev_name[10]; + unsigned int rtmsg_type; + int rtmsg_ifindex; + unsigned int rtmsg_metric; + unsigned long rtmsg_info; + unsigned int rtmsg_flags; +}; + +#endif /* _NEWIP_ROUTE_H */ diff --git a/newip/examples/nip.h b/newip/examples/nip.h new file mode 100644 index 0000000000000000000000000000000000000000..531d41a9a4bd20810dffa6a7029b31bf313e317a --- /dev/null +++ b/newip/examples/nip.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#ifndef _NIP_H +#define _NIP_H + +#define NIP_ADDR_LEN_1 1 +#define NIP_ADDR_LEN_2 2 +#define NIP_ADDR_LEN_3 3 +#define NIP_ADDR_LEN_4 4 +#define NIP_ADDR_LEN_5 5 +#define NIP_ADDR_LEN_7 7 +#define NIP_ADDR_LEN_8 8 + +#define NIP_ADDR_BIT_LEN_8 8 +#define NIP_ADDR_BIT_LEN_16 16 +#define NIP_ADDR_BIT_LEN_24 24 +#define NIP_ADDR_BIT_LEN_40 40 +#define NIP_ADDR_BIT_LEN_MAX 64 + +enum nip_addr_check_value { + ADDR_FIRST_DC = 0xDC, + ADDR_FIRST_F0 = 0xF0, + ADDR_FIRST_F1, + ADDR_FIRST_F2, + ADDR_FIRST_F3, + ADDR_FIRST_FE = 0xFE, + ADDR_FIRST_FF = 0xFF, + ADDR_SECOND_MIN_DD = 0xDD, + ADDR_SECOND_MIN_F1 = 0x14, /* f1 14 00 */ + ADDR_THIRD_MIN_F2 = 0x01, /* f2 00 01 00 00 */ + ADDR_THIRD_MIN_F3 = 0x01, /* F3 0001 0000 0000 */ +}; + +enum nip_8bit_addr_index { + NIP_8BIT_ADDR_INDEX_0 = 0, + NIP_8BIT_ADDR_INDEX_1 = 1, + NIP_8BIT_ADDR_INDEX_2 = 2, + NIP_8BIT_ADDR_INDEX_3 = 3, + NIP_8BIT_ADDR_INDEX_4 = 4, + NIP_8BIT_ADDR_INDEX_5 = 5, + NIP_8BIT_ADDR_INDEX_6 = 6, + NIP_8BIT_ADDR_INDEX_7 = 7, + NIP_8BIT_ADDR_INDEX_MAX, +}; + +enum nip_16bit_addr_index { + NIP_16BIT_ADDR_INDEX_0 = 0, + NIP_16BIT_ADDR_INDEX_1 = 1, + NIP_16BIT_ADDR_INDEX_2 = 2, + NIP_16BIT_ADDR_INDEX_3 = 3, + NIP_16BIT_ADDR_INDEX_MAX, +}; + +enum nip_32bit_addr_index { + NIP_32BIT_ADDR_INDEX_0 = 0, + NIP_32BIT_ADDR_INDEX_1 = 1, + NIP_32BIT_ADDR_INDEX_MAX, +}; + +#define nip_addr_field8 v.u.field8 +#define nip_addr_field16 v.u.field16 +#define nip_addr_field32 v.u.field32 + +#pragma pack(1) +struct nip_addr_field { + union { + unsigned char field8[NIP_8BIT_ADDR_INDEX_MAX]; + unsigned short field16[NIP_16BIT_ADDR_INDEX_MAX]; /* big-endian */ + unsigned int field32[NIP_32BIT_ADDR_INDEX_MAX]; /* big-endian */ + } u; +}; + +struct nip_addr { + unsigned char bitlen; /* The address length is in bit (not byte) */ + struct nip_addr_field v; +}; +#pragma pack() + +enum nip_index { + INDEX_0 = 0, + INDEX_1 = 1, + INDEX_2 = 2, + INDEX_3 = 3, + INDEX_4 = 4, + INDEX_5 = 5, + INDEX_6 = 6, + INDEX_7 = 7, + INDEX_8 = 8, + INDEX_9 = 9, + INDEX_10 = 10, + INDEX_11 = 11, + INDEX_12 = 12, + INDEX_13 = 13, + INDEX_14 = 14, + INDEX_15 = 15, + INDEX_MAX, +}; + +#endif /* _NIP_H */ diff --git a/newip/examples/nip_addr.c b/newip/examples/nip_addr.c new file mode 100644 index 0000000000000000000000000000000000000000..fe6fd65f2679dbe013f505470d6734bae16e6732 --- /dev/null +++ b/newip/examples/nip_addr.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "nip_uapi.h" +#include "nip_lib.h" + +/* get ifindex based on the device name + * struct ifreq ifr; + * struct nip_ifreq ifrn; + * ioctl(fd, SIOGIFINDEX, &ifr); + * ifr.ifr_ifindex; ===> ifindex + */ +int nip_add_addr(int ifindex, const struct nip_addr *addr, int opt) +{ + int fd, ret; + struct nip_ifreq ifrn; + + fd = socket(AF_NINET, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + memset(&ifrn, 0, sizeof(ifrn)); + ifrn.ifrn_addr = *addr; + ifrn.ifrn_ifindex = ifindex; + + ret = ioctl(fd, opt, &ifrn); + if (ret < 0 && errno != EEXIST) { // ignore File Exists error + printf("cfg newip addr fail, ifindex=%d, opt=%d, ret=%d.\n", ifindex, opt, ret); + close(fd); + return -1; + } + + close(fd); + return 0; +} + +void cmd_help(void) +{ + /* nip_addr wlan0 add 01 (在wlan0上配置地址01) */ + /* nip_addr wlan0 del 01 (在wlan0上删除地址01) */ + printf("[cmd example] nip_addr { add | del } \n"); +} + +int parse_name(char **argv, int *ifindex, char *dev) +{ + int len = strlen(*argv); + + memset(dev, 0, ARRAY_LEN); + if (!len || len >= (ARRAY_LEN - 1)) + return -1; + memcpy(dev, *argv, len); + dev[len + 1] = '\0'; + + if (strncmp(dev, NIC_NAME_CHECK, strlen(NIC_NAME_CHECK))) { + printf("unsupport addr cfg cmd-1, cmd=%s\n", dev); + cmd_help(); + return -1; + } + return nip_get_ifindex(dev, ifindex); +} + +int parse_cmd(char **argv, int *opt) +{ + char cmd[ARRAY_LEN]; + int len = strlen(*argv); + + memset(cmd, 0, ARRAY_LEN); + if (!len || len >= (ARRAY_LEN - 1)) + return -1; + memcpy(cmd, *argv, len); + cmd[len + 1] = '\0'; + + if (!strncmp(cmd, CMD_ADD, strlen(CMD_ADD))) { + *opt = SIOCSIFADDR; + } else if (!strncmp(cmd, CMD_DEL, strlen(CMD_DEL))) { + *opt = SIOCDIFADDR; + } else { + printf("unsupport addr cfg cmd-2, cmd=%s\n", cmd); + cmd_help(); + return -1; + } + return 0; +} + +int main(int argc, char **argv_input) +{ + char dev[ARRAY_LEN]; + int ret; + int opt; + int ifindex = 0; + char **argv = argv_input; + struct nip_addr addr = {0}; + + if (argc != DEMO_INPUT_3) { + printf("unsupport addr cfg input, argc=%d\n", argc); + cmd_help(); + return -1; + } + + /* 配置参数1解析: */ + argv++; + ret = parse_name(argv, &ifindex, dev); + if (ret != 0) + return -1; + + /* 配置参数2解析: { add | del } */ + argv++; + ret = parse_cmd(argv, &opt); + if (ret != 0) + return -1; + + /* 配置参数3解析: */ + argv++; + if (nip_get_addr(argv, &addr)) { + printf("unsupport addr cfg cmd-3\n"); + cmd_help(); + return 1; + } + + ret = nip_add_addr(ifindex, &addr, opt); + if (ret != 0) + return -1; + + printf("%s (ifindex=%d) cfg addr success\n", dev, ifindex); + return 0; +} + diff --git a/newip/examples/nip_addr_cfg_demo.c b/newip/examples/nip_addr_cfg_demo.c new file mode 100644 index 0000000000000000000000000000000000000000..50398b8d75ea17b41e9c2e4fe9eaaaa278bcb460 --- /dev/null +++ b/newip/examples/nip_addr_cfg_demo.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "nip_uapi.h" +#include "nip_lib.h" + +/* get ifindex based on the device name + * struct ifreq ifr; + * struct nip_ifreq ifrn; + * ioctl(fd, SIOGIFINDEX, &ifr); + * ifr.ifr_ifindex; ===> ifindex + */ +int nip_add_addr(int ifindex, const unsigned char *addr, unsigned char addr_len) +{ + int fd, ret; + struct nip_ifreq ifrn; + + fd = socket(AF_NINET, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + memset(&ifrn, 0, sizeof(ifrn)); + ifrn.ifrn_addr.bitlen = addr_len * 8; // Byte length is converted to bit length + memcpy(ifrn.ifrn_addr.nip_addr_field8, addr, addr_len); + ifrn.ifrn_ifindex = ifindex; + + ret = ioctl(fd, SIOCSIFADDR, &ifrn); + if (ret < 0 && errno != EEXIST) { // ignore File Exists error + printf("cfg newip addr fail, ifindex=%d, ret=%d\n", ifindex, ret); + close(fd); + return -1; + } + + close(fd); + return 0; +} + +/* Before executing the use case, run ifconfig XXX up. + * XXX indicates the NIC name, for example, eth0 and wlan0 + */ +int main(int argc, char **argv) +{ + int ifindex = 0; + int ret; + unsigned char client_addr[INDEX_1] = {0x50}; // 1-byte address of the client: 0x50 + unsigned char server_addr[INDEX_2] = {0xDE, 0x00}; // 2-byte address of the server: 0xDE00 + unsigned char *addr; + unsigned char addr_len; + + if (argc == DEMO_INPUT_1) { + if (!strcmp(*(argv + 1), "server")) { + printf("server cfg addr=0x%02x%02x\n", + server_addr[INDEX_0], server_addr[INDEX_1]); + addr = server_addr; + addr_len = sizeof(server_addr); + } else if (!strcmp(*(argv + 1), "client")) { + printf("client cfg addr=0x%02x\n", client_addr[INDEX_0]); + addr = client_addr; + addr_len = sizeof(client_addr); + } else { + printf("invalid addr cfg input\n"); + return -1; + } + } else { + printf("unsupport addr cfg input\n"); + return -1; + } + + ret = nip_get_ifindex(NIC_NAME, &ifindex); + if (ret != 0) + return -1; + + ret = nip_add_addr(ifindex, addr, addr_len); + if (ret != 0) + return -1; + + printf("%s %s(ifindex=%d) cfg addr success\n", *argv, NIC_NAME, ifindex); + return 0; +} + diff --git a/newip/examples/nip_lib.c b/newip/examples/nip_lib.c new file mode 100644 index 0000000000000000000000000000000000000000..53aea7753a9df16d45e11f009705cb72e5af6e8a --- /dev/null +++ b/newip/examples/nip_lib.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include /* struct ifreq depend */ + +#include "nip_uapi.h" +#include "nip_lib.h" + +#define ADDR_STR_LEN 2 +#define STR_FMT_1 55 +#define STR_FMT_2 87 + +int32_t nip_get_ifindex(const char *ifname, int *ifindex) +{ + int fd; + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, ifname); + fd = socket(AF_NINET, SOCK_DGRAM, 0); + if (fd < 0) { + printf("creat socket fail, ifname=%s\n", ifname); + return -1; + } + if ((ioctl(fd, SIOCGIFINDEX, &ifr)) < 0) { + printf("get ifindex fail, ifname=%s\n", ifname); + close(fd); + return -1; + } + close(fd); + + printf("%s ifindex=%u\n", ifname, ifr.ifr_ifindex); + *ifindex = ifr.ifr_ifindex; + return 0; +} + +int nip_addr_fmt(char *addr_str, struct nip_addr *sap, int addrlen_input) +{ + unsigned char first_byte; + int addrlen, i; + + memset(sap, 0, sizeof(struct nip_addr)); + for (i = 0; i < INDEX_MAX; i++) { + if (addr_str[i] == 0) + break; + + /* 0 ~ 9 = 48 ~ 57, '0'构造成 0 = 48 - 48 */ + if (addr_str[i] >= '0' && addr_str[i] <= '9') { + addr_str[i] = addr_str[i] - '0'; + /* A ~ F = 65 ~ 70, 'A'构造成 10 = 65 - 55 */ + } else if (addr_str[i] >= 'A' && addr_str[i] <= 'F') { + addr_str[i] = addr_str[i] - STR_FMT_1; + /* a ~ f = 97 ~ 102, 'a'构造成 10 = 97 - 87 */ + } else if (addr_str[i] >= 'a' && addr_str[i] <= 'f') { + addr_str[i] = addr_str[i] - STR_FMT_2; + } else { + printf("Newip addr error: uaddr[%d]=%c\n", i, addr_str[i]); + return 1; + } + } + + first_byte = addr_str[0] << NIP_ADDR_LEN_4; + first_byte += addr_str[1]; + if (first_byte <= ADDR_FIRST_DC) + addrlen = NIP_ADDR_LEN_1; + else if (first_byte <= ADDR_FIRST_F0 || first_byte == ADDR_FIRST_FF) + addrlen = NIP_ADDR_LEN_2; + else if (first_byte == ADDR_FIRST_F1) + addrlen = NIP_ADDR_LEN_3; + else if (first_byte == ADDR_FIRST_F2) + addrlen = NIP_ADDR_LEN_5; + else if (first_byte == ADDR_FIRST_F3) + addrlen = NIP_ADDR_LEN_7; + else if (first_byte == ADDR_FIRST_FE) + addrlen = NIP_ADDR_LEN_8; + else + addrlen = 0; + + if (addrlen_input != addrlen) { + printf("Newip addr error, first_byte=0x%x\n", first_byte); + return 1; + } + + sap->bitlen = addrlen * NIP_ADDR_LEN_8; + printf("*************************************************\n"); + printf("Newip addr len=%d\n", addrlen); + for (i = 0; i < addrlen; i++) { + sap->nip_addr_field8[i] = addr_str[i * INDEX_2] << INDEX_4; + sap->nip_addr_field8[i] += addr_str[i * INDEX_2 + 1]; + printf("%02x ", sap->nip_addr_field8[i]); + } + printf("\n*************************************************\n\n"); + + return 0; +} + +int nip_get_addr(char **args, struct nip_addr *addr) +{ + unsigned int len; + char *sp = *args; + int addrlen_input = 0; + __u8 addr_str[INDEX_MAX] = {0}; + + while (*sp != '\0') { + addrlen_input += 1; + sp++; + } + + if (addrlen_input % ADDR_STR_LEN != 0) { + printf("NewIP addr str-len invalid, addrlen_input=%d\n", addrlen_input); + return -1; + } + + len = strlen(*args); + if (!len || len >= (INDEX_MAX - 1)) + return -1; + memcpy(addr_str, *args, len); + addr_str[len + 1] = '\0'; + + return nip_addr_fmt(addr_str, addr, addrlen_input / ADDR_STR_LEN); +} + diff --git a/newip/examples/nip_lib.h b/newip/examples/nip_lib.h new file mode 100644 index 0000000000000000000000000000000000000000..c30556e9fd09a9273dbbc77712f0341a6c19e1d2 --- /dev/null +++ b/newip/examples/nip_lib.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#ifndef _NIP_LIB_H +#define _NIP_LIB_H + +/* AF_NINET by reading/sys/module/newip/parameters/af_ninet file to get the type value */ +#define AF_NINET 45 + +#define DEMO_INPUT_1 2 /* The DEMO program contains one parameter */ +#define DEMO_INPUT_2 3 +#define DEMO_INPUT_3 4 +#define DEMO_INPUT_4 5 + +/* Change the value based on the actual interface */ +#define NIC_NAME "wlan0" +#define NIC_NAME_CHECK "wlan" +#define CMD_ADD "add" +#define CMD_DEL "del" + +#define BUFLEN 1024 +#define LISTEN_MAX 3 +#define PKTCNT 10 /* Number of sent packets */ +#define PKTLEN 1024 /* Length of sent packet */ +#define SLEEP_US 500000 /* Packet sending interval (ms) */ +#define SELECT_TIME 600 +#define TCP_SERVER_PORT 5556 /* TCP Server Port */ +#define UDP_SERVER_PORT 9090 /* UDP Server Port */ + +#define ARRAY_LEN 255 + +int nip_get_ifindex(const char *ifname, int *ifindex); +int nip_get_addr(char **args, struct nip_addr *addr); + +#endif /* _NIP_LIB_H */ diff --git a/newip/examples/nip_route.c b/newip/examples/nip_route.c new file mode 100644 index 0000000000000000000000000000000000000000..ee7c8ac4f9a2b42f70db3cea5bd54d34e94310a0 --- /dev/null +++ b/newip/examples/nip_route.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nip_uapi.h" +#include "nip_lib.h" +#include "newip_route.h" + +/* get ifindex based on the device name + * struct ifreq ifr; + * struct nip_ifreq ifrn; + * ioctl(fd, SIOGIFINDEX, &ifr); + * ifr.ifr_ifindex; ===> ifindex + */ +int nip_route_add(int ifindex, const struct nip_addr *dst_addr, + const struct nip_addr *gateway_addr, __u8 gateway_flag, int opt) +{ + int fd, ret; + struct nip_rtmsg rt; + + fd = socket(AF_NINET, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + memset(&rt, 0, sizeof(rt)); + rt.rtmsg_ifindex = ifindex; + rt.rtmsg_flags = RTF_UP; + rt.rtmsg_dst = *dst_addr; + + if (gateway_flag) { + rt.rtmsg_gateway = *gateway_addr; + rt.rtmsg_flags |= RTF_GATEWAY; + } + + ret = ioctl(fd, opt, &rt); + if (ret < 0 && errno != EEXIST) { // ignore File Exists error + close(fd); + return -1; + } + + close(fd); + return 0; +} + +void cmd_help(void) +{ + /* nip_route add 02 wlan0 + * (配置目的地址02设备路由,出口是wlan0) + * nip_route add 02 wlan0 03 + * (配置目的地址02设备路由,出口是wlan0,网关地址是03) + * nip_route add ff09 wlan0 03 + * (配置广播默认路由, 出口是wlan0,网关地址是03) + */ + printf("\n[cmd example]\n"); + printf("nip_route { add | del } \n"); + printf("nip_route { add | del } \n"); +} + +int parse_name(char **argv, int *ifindex, char *dev) +{ + int len = strlen(*argv); + + memset(dev, 0, ARRAY_LEN); + if (len >= (ARRAY_LEN - 1) || !len) + return -1; + memcpy(dev, *argv, len); + dev[len + 1] = '\0'; + + if (strncmp(dev, NIC_NAME_CHECK, strlen(NIC_NAME_CHECK))) { + printf("unsupport addr cfg cmd-3, cmd=%s\n", dev); + cmd_help(); + return -1; + } + return nip_get_ifindex(dev, ifindex); +} + +int parse_cmd(char **argv, int *opt) +{ + int len = strlen(*argv); + char cmd[ARRAY_LEN]; + + memset(cmd, 0, ARRAY_LEN); + if (!len || len >= (ARRAY_LEN - 1)) + return -1; + memcpy(cmd, *argv, len); + cmd[len + 1] = '\0'; + + if (!strncmp(cmd, CMD_ADD, strlen(CMD_ADD))) { + *opt = SIOCADDRT; + } else if (!strncmp(cmd, CMD_DEL, strlen(CMD_DEL))) { + *opt = SIOCDELRT; + } else { + printf("unsupport route cfg cmd-1, cmd=%s\n", cmd); + cmd_help(); + return -1; + } + return 0; +} + +int parse_args(char **argv, int *opt, __u8 *gateway_flag, int *ifindex, + struct nip_addr *dst_addr, struct nip_addr *gateway_addr, char *dev, int argc) +{ + /* 配置参数1解析: { add | del } */ + int ret; + + argv++; + ret = parse_cmd(argv, opt); + if (ret != 0) + return -1; + + /* 配置参数2解析: */ + argv++; + if (nip_get_addr(argv, dst_addr)) { + printf("unsupport route cfg cmd-2\n"); + cmd_help(); + return -1; + } + + /* 配置参数3解析: */ + argv++; + ret = parse_name(argv, ifindex, dev); + if (ret != 0) + return -1; + + /* 配置参数4解析: */ + if (argc == DEMO_INPUT_4) { + argv++; + if (nip_get_addr(argv, gateway_addr)) { + printf("unsupport route cfg cmd-4\n"); + cmd_help(); + return -1; + } + *gateway_flag = 1; + } + return ret; +} + +int main(int argc, char **argv_input) +{ + int ret; + int opt; + int ifindex = 0; + __u8 gateway_flag = 0; + char **argv = argv_input; + char dev[ARRAY_LEN]; + struct nip_addr dst_addr = {0}; + struct nip_addr gateway_addr = {0}; + + if (argc != DEMO_INPUT_3 && argc != DEMO_INPUT_4) { + printf("unsupport route cfg input, argc=%d\n", argc); + cmd_help(); + return -1; + } + + ret = parse_args(argv, &opt, &gateway_flag, &ifindex, + &dst_addr, &gateway_addr, dev, argc); + if (ret != 0) + return -1; + + ret = nip_get_ifindex(dev, &ifindex); + if (ret != 0) { + printf("get %s ifindex fail, ret=%d\n", dev, ret); + return -1; + } + + ret = nip_route_add(ifindex, &dst_addr, &gateway_addr, gateway_flag, opt); + if (ret != 0) { + printf("get %s ifindex fail, ret=%d\n", dev, ret); + return -1; + } + + printf("%s (ifindex=%d) cfg route success\n", dev, ifindex); + return 0; +} + diff --git a/newip/examples/nip_route_cfg_demo.c b/newip/examples/nip_route_cfg_demo.c new file mode 100644 index 0000000000000000000000000000000000000000..f650919714c86c0552813a3693d72a910bf397ba --- /dev/null +++ b/newip/examples/nip_route_cfg_demo.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nip_uapi.h" +#include "nip_lib.h" +#include "newip_route.h" + +/* get ifindex based on the device name + * struct ifreq ifr; + * struct nip_ifreq ifrn; + * ioctl(fd, SIOGIFINDEX, &ifr); + * ifr.ifr_ifindex; ===> ifindex + */ +int nip_route_add(int ifindex, const unsigned char *dst_addr, uint8_t dst_addr_len, + const unsigned char *gateway_addr, uint8_t gateway_addr_len) +{ + int fd, ret; + struct nip_rtmsg rt; + + fd = socket(AF_NINET, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + memset(&rt, 0, sizeof(rt)); + rt.rtmsg_ifindex = ifindex; + rt.rtmsg_flags = RTF_UP; + rt.rtmsg_dst.bitlen = dst_addr_len * 8; + memcpy(rt.rtmsg_dst.nip_addr_field8, dst_addr, dst_addr_len); + + if (gateway_addr) { + rt.rtmsg_gateway.bitlen = gateway_addr_len * 8; + memcpy(rt.rtmsg_gateway.nip_addr_field8, gateway_addr, gateway_addr_len); + rt.rtmsg_flags |= RTF_GATEWAY; + } + + ret = ioctl(fd, SIOCADDRT, &rt); + if (ret < 0 && errno != EEXIST) { // ignore File Exists error + close(fd); + return -1; + } + + close(fd); + return 0; +} + +int main(int argc, char **argv) +{ + int ret; + int ifindex = 0; + uint8_t client_addr[INDEX_1] = {0x50}; // 1-byte address of the client: 0x50 + uint8_t server_addr[INDEX_2] = {0xDE, 0x00}; // 2-byte address of the server: 0xDE00 + uint8_t *dst_addr; + uint8_t dst_addr_len; + + if (argc == DEMO_INPUT_1) { + if (!strcmp(*(argv + 1), "server")) { + printf("server cfg route, dst-addr=0x%02x\n", client_addr[INDEX_0]); + dst_addr = client_addr; + dst_addr_len = 1; + } else if (!strcmp(*(argv + 1), "client")) { + printf("client cfg route, dst-addr=0x%02x%02x\n", + server_addr[INDEX_0], server_addr[INDEX_1]); + dst_addr = server_addr; + dst_addr_len = 2; + } else { + printf("invalid route cfg input\n"); + return -1; + } + } else { + printf("unsupport route cfg input\n"); + return -1; + } + + ret = nip_get_ifindex(NIC_NAME, &ifindex); + if (ret != 0) { + printf("get %s ifindex fail, ret=%d\n", NIC_NAME, ret); + return -1; + } + + ret = nip_route_add(ifindex, dst_addr, dst_addr_len, NULL, 0); + if (ret != 0) { + printf("get %s ifindex fail, ret=%d\n", NIC_NAME, ret); + return -1; + } + + printf("%s %s(ifindex=%d) cfg route success\n", *argv, NIC_NAME, ifindex); + return 0; +} + diff --git a/newip/examples/nip_tcp_client_demo.c b/newip/examples/nip_tcp_client_demo.c new file mode 100644 index 0000000000000000000000000000000000000000..4a602a9b42aae3ff162ef17dd190e5993295ac38 --- /dev/null +++ b/newip/examples/nip_tcp_client_demo.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nip_uapi.h" +#include "nip_lib.h" +#include "newip_route.h" + +#define __USE_GNU +#include +#include + +int _send(int cfd, int pkt_num) +{ + char buf[BUFLEN] = {0}; + struct timeval sys_time; + + gettimeofday(&sys_time, NULL); + sprintf(buf, "%ld %6ld NIP_TCP # %6d", sys_time.tv_sec, sys_time.tv_usec, pkt_num); + + if (send(cfd, buf, PKTLEN, 0) < 0) { + perror("sendto"); + return -1; + } + + return 0; +} + +int _recv(int cfd, int pkt_num, int *success) +{ + char buf[BUFLEN] = {0}; + fd_set readfds; + int tmp; + struct timeval tv; + + FD_ZERO(&readfds); + FD_SET(cfd, &readfds); + tv.tv_sec = 2; + tv.tv_usec = 0; + if (select(cfd + 1, &readfds, NULL, NULL, &tv) < 0) { + perror("select"); + return -1; + } + + if (FD_ISSET(cfd, &readfds)) { + int ret; + int no = 0; + + ret = recv(cfd, buf, PKTLEN, MSG_WAITALL); + if (ret > 0) { + *success += 1; + ret = sscanf(buf, "%d %d NIP_TCP # %d", &tmp, &tmp, &no); + if (ret <= 0) { + perror("sscanf"); + return -1; + } + printf("Received --%s sock %d success:%6d/%6d/no=%6d\n", + buf, cfd, *success, pkt_num + 1, no); + } else { + printf("recv fail, ret=%d\n", ret); + return -1; + } + } + + return 0; +} + +void *send_recv(void *args) +{ + int cfd = ((struct thread_args *)args)->cfd; + int success = 0; + + for (int i = 0; i < PKTCNT; i++) { + if (_send(cfd, i) != 0) + goto END; + + if (_recv(cfd, i, &success) != 0) + goto END; + + usleep(SLEEP_US); + } + +END: return NULL; +} + +int main(int argc, char **argv) +{ + int cfd; + pthread_t th; + struct thread_args th_args; + struct sockaddr_nin si_server; + + cfd = socket(AF_NINET, SOCK_STREAM, IPPROTO_TCP); + if (cfd < 0) { + perror("socket"); + return -1; + } + printf("creat newip socket, fd=%d\n", cfd); + + memset((char *)&si_server, 0, sizeof(si_server)); + si_server.sin_family = AF_NINET; + si_server.sin_port = htons(TCP_SERVER_PORT); + // 2-byte address of the server: 0xDE00 + si_server.sin_addr.nip_addr_field8[INDEX_0] = 0xDE; + si_server.sin_addr.nip_addr_field8[INDEX_1] = 0x00; + si_server.sin_addr.bitlen = NIP_ADDR_BIT_LEN_16; // 2-byte: 16bit + if (connect(cfd, (struct sockaddr *)&si_server, sizeof(si_server)) < 0) { + perror("connect"); + return -1; + } + printf("connect success, addr=0x%02x%02x, port=%d\n", + si_server.sin_addr.nip_addr_field8[INDEX_0], + si_server.sin_addr.nip_addr_field8[INDEX_1], TCP_SERVER_PORT); + + th_args.si_server = si_server; + th_args.si_server.sin_port = htons(TCP_SERVER_PORT); + th_args.cfd = cfd; + pthread_create(&th, NULL, send_recv, &th_args); + /* Wait for the thread to end and synchronize operations between threads */ + pthread_join(th, NULL); + close(cfd); + return 0; +} + diff --git a/newip/examples/nip_tcp_server_demo.c b/newip/examples/nip_tcp_server_demo.c new file mode 100644 index 0000000000000000000000000000000000000000..156f796a534e9c1169699eac26e929a5f300f4ea --- /dev/null +++ b/newip/examples/nip_tcp_server_demo.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include +#include +#include +#include +#include + +#define __USE_GNU +#include +#include + +#include "nip_uapi.h" +#include "nip_lib.h" +#include "newip_route.h" + +void *recv_send(void *args) +{ + int cfd, ret; + char buf[BUFLEN] = {0}; + + memcpy(&cfd, args, sizeof(int)); + for (int i = 0; i < PKTCNT; i++) { + int recv_num = recv(cfd, buf, PKTLEN, MSG_WAITALL); + + if (recv_num < 0) { + perror("recv"); + goto END; + } else if (recv_num == 0) { /* no data */ + ; + } else { + printf("Received -- %s --:%d\n", buf, recv_num); + ret = send(cfd, buf, recv_num, 0); + if (ret < 0) { + perror("send"); + goto END; + } + printf("Sending -- %s --:%d\n", buf, recv_num); + } + } +END: close(cfd); + return NULL; +} + +int main(int argc, char **argv) +{ + pthread_t th; + int fd, cfd, addr_len; + struct sockaddr_nin si_local; + struct sockaddr_nin si_remote; + + fd = socket(AF_NINET, SOCK_STREAM, IPPROTO_TCP); + if (fd < 0) { + perror("socket"); + return -1; + } + + memset((char *)&si_local, 0, sizeof(si_local)); + si_local.sin_family = AF_NINET; + si_local.sin_port = htons(TCP_SERVER_PORT); + // 2-byte address of the server: 0xDE00 + si_local.sin_addr.nip_addr_field8[INDEX_0] = 0xDE; + si_local.sin_addr.nip_addr_field8[INDEX_1] = 0x00; + si_local.sin_addr.bitlen = NIP_ADDR_BIT_LEN_16; // 2-byte: 16bit + + if (bind(fd, (const struct sockaddr *)&si_local, sizeof(si_local)) < 0) { + perror("bind"); + goto END; + } + printf("bind success, addr=0x%02x%02x, port=%d\n", + si_local.sin_addr.nip_addr_field8[INDEX_0], + si_local.sin_addr.nip_addr_field8[INDEX_1], TCP_SERVER_PORT); + + if (listen(fd, LISTEN_MAX) < 0) { + perror("listen"); + goto END; + } + + addr_len = sizeof(si_remote); + memset(&si_remote, 0, sizeof(si_remote)); + cfd = accept(fd, (struct sockaddr *)&si_remote, (socklen_t *)&addr_len); + pthread_create(&th, NULL, recv_send, &cfd); + /* Wait for the thread to end and synchronize operations between threads */ + pthread_join(th, NULL); +END: close(fd); + return 0; +} + diff --git a/newip/examples/nip_uapi.h b/newip/examples/nip_uapi.h new file mode 100644 index 0000000000000000000000000000000000000000..c8740b03f127a7a53903891b93413b1ba6640417 --- /dev/null +++ b/newip/examples/nip_uapi.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#ifndef _NIP_UAPI_H +#define _NIP_UAPI_H + +#include "nip.h" + +/* The following structure must be larger than V4. System calls use V4. + * If the definition is smaller than V4, the read process will have memory overruns + * v4: include\linux\socket.h --> sockaddr (16Byte) + */ +#define POD_SOCKADDR_SIZE 3 +struct sockaddr_nin { + unsigned short sin_family; /* [2Byte] AF_NINET */ + unsigned short sin_port; /* [2Byte] Transport layer port, big-endian */ + struct nip_addr sin_addr; /* [9Byte] NIP address */ + + unsigned char sin_zero[POD_SOCKADDR_SIZE]; /* [3Byte] Byte alignment */ +}; + +struct nip_ifreq { + struct nip_addr ifrn_addr; + int ifrn_ifindex; +}; + +struct thread_args { + int cfd; + struct sockaddr_nin si_server; +}; + +#endif /* _NIP_UAPI_H */ diff --git a/newip/examples/nip_udp_client_demo.c b/newip/examples/nip_udp_client_demo.c new file mode 100644 index 0000000000000000000000000000000000000000..8f6f728446c470983dafe04b305c624f93ad6282 --- /dev/null +++ b/newip/examples/nip_udp_client_demo.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include + +#define __USE_GNU +#include +#include + +#include "nip_uapi.h" +#include "nip_lib.h" +#include "newip_route.h" + +int _sendto(int cfd, struct sockaddr_nin *si_server, int pkt_num) +{ + char buf[BUFLEN] = {0}; + struct timeval sys_time; + socklen_t slen = sizeof(struct sockaddr_nin); + + gettimeofday(&sys_time, NULL); + sprintf(buf, "%ld %6ld NIP_UDP # %6d", sys_time.tv_sec, sys_time.tv_usec, pkt_num); + + if (sendto(cfd, buf, BUFLEN, 0, (struct sockaddr *)si_server, slen) < 0) { + printf("client sendto fail, pkt_num=%d", pkt_num); + return -1; + } + + return 0; +} + +int _recvfrom(int cfd, struct sockaddr_nin *si_server, int pkt_num, int *success) +{ + char buf[BUFLEN] = {0}; + fd_set readfds; + int tmp; + struct timeval tv; + socklen_t slen = sizeof(struct sockaddr_nin); + + FD_ZERO(&readfds); + FD_SET(cfd, &readfds); + tv.tv_sec = 2; + tv.tv_usec = 0; + if (select(cfd + 1, &readfds, NULL, NULL, &tv) < 0) { + printf("client select fail, pkt_num=%d", pkt_num); + return -1; + } + + if (FD_ISSET(cfd, &readfds)) { + int ret; + int no = 0; + + ret = recvfrom(cfd, buf, BUFLEN, 0, (struct sockaddr *)si_server, &slen); + if (ret > 0) { + *success += 1; + ret = sscanf(buf, "%d %d NIP_UDP # %d", &tmp, &tmp, &no); + if (ret) + printf("Received --%s sock %d success:%6d/%6d/no=%6d\n", + buf, cfd, *success, pkt_num + 1, no); + } else { + printf("client recvfrom fail, ret=%d\n", ret); + return -1; + } + } + + return 0; +} + +void *send_recv(void *args) +{ + int success = 0; + int cfd = ((struct thread_args *)args)->cfd; + struct sockaddr_nin si_server = ((struct thread_args *)args)->si_server; + + for (int i = 0; i < PKTCNT; i++) { + if (_sendto(cfd, &si_server, i) != 0) + goto END; + + if (_recvfrom(cfd, &si_server, i, &success) != 0) + goto END; + + usleep(SLEEP_US); + } + +END: return NULL; +} + +int main(int argc, char **argv) +{ + pthread_t th; + int cfd; + struct thread_args th_args; + struct sockaddr_nin si_server; + + cfd = socket(AF_NINET, SOCK_DGRAM, IPPROTO_UDP); + if (cfd < 0) { + perror("socket"); + return -1; + } + + memset((char *)&si_server, 0, sizeof(si_server)); + si_server.sin_family = AF_NINET; + si_server.sin_port = htons(UDP_SERVER_PORT); + // 2-byte address of the server: 0xDE00 + si_server.sin_addr.nip_addr_field8[INDEX_0] = 0xDE; + si_server.sin_addr.nip_addr_field8[INDEX_1] = 0x00; + si_server.sin_addr.bitlen = NIP_ADDR_BIT_LEN_16; // 2-byte: 16bit + + th_args.si_server = si_server; + th_args.si_server.sin_port = htons(UDP_SERVER_PORT); + th_args.cfd = cfd; + pthread_create(&th, NULL, send_recv, &th_args); + /* Wait for the thread to end and synchronize operations between threads */ + pthread_join(th, NULL); + close(cfd); + return 0; +} + diff --git a/newip/examples/nip_udp_server_demo.c b/newip/examples/nip_udp_server_demo.c new file mode 100644 index 0000000000000000000000000000000000000000..c4265dd2437fed01e4d118439e86f4a0ac4cb8f6 --- /dev/null +++ b/newip/examples/nip_udp_server_demo.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include + +#define __USE_GNU +#include +#include + +#include "nip_uapi.h" +#include "nip_lib.h" +#include "newip_route.h" + +void *recv_send(void *args) +{ + char buf[BUFLEN] = {0}; + int fd, ret, recv_num; + int count = 0; + socklen_t slen; + struct sockaddr_nin si_remote; + + memcpy(&fd, args, sizeof(int)); + while (count < PKTCNT) { + slen = sizeof(si_remote); + memset(buf, 0, sizeof(char) * BUFLEN); + memset(&si_remote, 0, sizeof(si_remote)); + recv_num = recvfrom(fd, buf, BUFLEN, 0, (struct sockaddr *)&si_remote, &slen); + if (recv_num < 0) { + printf("server recvfrom fail, ret=%d\n", ret); + goto END; + } else if (recv_num == 0) { /* no data */ + ; + } else { + printf("Received -- %s -- from 0x%x:%d\n", buf, + si_remote.sin_addr.nip_addr_field16[0], ntohs(si_remote.sin_port)); + slen = sizeof(si_remote); + ret = sendto(fd, buf, BUFLEN, 0, (struct sockaddr *)&si_remote, slen); + if (ret < 0) { + printf("server sendto fail, ret=%d\n", ret); + goto END; + } + printf("Sending -- %s -- to 0x%0x:%d\n", buf, + si_remote.sin_addr.nip_addr_field8[0], ntohs(si_remote.sin_port)); + } + count++; + } +END: return NULL; +} + +int main(int argc, char **argv) +{ + int fd; + pthread_t th; + struct sockaddr_nin si_local; + + fd = socket(AF_NINET, SOCK_DGRAM, IPPROTO_UDP); + if (fd < 0) { + perror("socket"); + return -1; + } + + memset((char *)&si_local, 0, sizeof(si_local)); + si_local.sin_family = AF_NINET; + si_local.sin_port = htons(UDP_SERVER_PORT); + // 2-byte address of the server: 0xDE00 + si_local.sin_addr.nip_addr_field8[INDEX_0] = 0xDE; + si_local.sin_addr.nip_addr_field8[INDEX_1] = 0x00; + si_local.sin_addr.bitlen = NIP_ADDR_BIT_LEN_16; // 2-byte: 16bit + + if (bind(fd, (const struct sockaddr *)&si_local, sizeof(si_local)) < 0) { + perror("bind"); + goto END; + } + + printf("bind success, addr=0x%02x%02x, port=%d\n", + si_local.sin_addr.nip_addr_field8[INDEX_0], + si_local.sin_addr.nip_addr_field8[INDEX_1], UDP_SERVER_PORT); + + pthread_create(&th, NULL, recv_send, &fd); + /* Wait for the thread to end and synchronize operations between threads */ + pthread_join(th, NULL); + +END: close(fd); + return 0; +} + diff --git a/newip/figures/image-20220901152539801.png b/newip/figures/image-20220901152539801.png new file mode 100644 index 0000000000000000000000000000000000000000..4adcdeffc67754f3a7da8527c67eca22ddd9bbb4 Binary files /dev/null and b/newip/figures/image-20220901152539801.png differ diff --git a/newip/figures/image-20220915140627223.png b/newip/figures/image-20220915140627223.png new file mode 100644 index 0000000000000000000000000000000000000000..5d27f2620ca837e6b90416e16aa7456919762104 Binary files /dev/null and b/newip/figures/image-20220915140627223.png differ diff --git a/newip/figures/image-20220915162621809.png b/newip/figures/image-20220915162621809.png new file mode 100644 index 0000000000000000000000000000000000000000..76a2d26c0af54daf903d70b8cee350bf34e83c78 Binary files /dev/null and b/newip/figures/image-20220915162621809.png differ diff --git a/newip/figures/image-20220915165414926.png b/newip/figures/image-20220915165414926.png new file mode 100644 index 0000000000000000000000000000000000000000..5635e7de0f5ff8a0f02fe778c5f38803e3f1b05a Binary files /dev/null and b/newip/figures/image-20220915165414926.png differ diff --git a/newip/src/common/nip_addr.c b/newip/src/common/nip_addr.c new file mode 100644 index 0000000000000000000000000000000000000000..f108a2b328ec22308f27d4c54a682da2fa65114a --- /dev/null +++ b/newip/src/common/nip_addr.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include "nip_addr.h" + +/* This is similar to 0.0.0.0 in IPv4. Does not appear as a real address, + * just a constant used by the native for special processing + */ +const struct nip_addr nip_any_addr = { + .bitlen = NIP_ADDR_BIT_LEN_16, + .nip_addr_field8[0] = 0xFF, /* 0xFF09 addr, big-endian */ + .nip_addr_field8[1] = 0x09, +}; + +const struct nip_addr nip_broadcast_addr_arp = { + .bitlen = NIP_ADDR_BIT_LEN_16, + .nip_addr_field8[0] = 0xFF, /* 0xFF04 addr, big-endian */ + .nip_addr_field8[1] = 0x04, +}; + +enum addr_check_ret { + NOT_CURRENT_ADDR = -1, + CURRENT_ADDR_VALID = 0, + ADDR_2BYTE_INVALID = 1, + ADDR_3BYTE_INVALID = 2, + ADDR_5BYTE_INVALID = 3, + ADDR_7BYTE_INVALID = 4, + ADDR_BITLEN_INVALID = 5, + NIP_ADDR_UNKNOWN, +}; + +#define NIP_TRUE 1 +#define NIP_FALSE 0 + +/* Short address range: + * 【1-byte】0 ~ 220 + * 00 ~ DC + */ +static inline int is_1byte_addr_flag(unsigned char first_byte) +{ + return first_byte <= ADDR_FIRST_DC ? NIP_TRUE : NIP_FALSE; +} + +/* Short address range: + * 【2-byte】221 ~ 5119 + * DD/DE/.../F0 is a 2-byte address descriptor followed by the address value + * DDDD ~ DDFF : 221 ~ 255 + * DE00 ~ DEFF : 256 ~ 511 + * DF00 ~ DFFF : 512 ~ 767 + * ... + * F000 ~ F0FF : 4864 ~ 5119 + */ +static inline int is_2byte_addr_flag(unsigned char first_byte) +{ + return (first_byte > ADDR_FIRST_DC) && (first_byte <= ADDR_FIRST_F0) ? + NIP_TRUE : NIP_FALSE; +} + +/* Short address range: + * 【3-byte】5120 ~ 65535 + * F1 is a 3-byte address descriptor followed by the address value + * F1 1400 ~ F1 FFFF + */ +static inline int is_3byte_addr_flag(unsigned char first_byte) +{ + return first_byte == ADDR_FIRST_F1 ? NIP_TRUE : NIP_FALSE; +} + +/* Short address range: + * 【5-byte】65536 ~ 4,294,967,295 + * F2 is a 5-byte address descriptor followed by the address value + * F2 0001 0000 ~ F2 FFFF FFFF + */ +static inline int is_5byte_addr_flag(unsigned char first_byte) +{ + return first_byte == ADDR_FIRST_F2 ? NIP_TRUE : NIP_FALSE; +} + +/* Short address range: + * 【7-byte】4,294,967,296 ~ 281,474,976,710,655 + * F3 is a 7-byte address descriptor followed by the address value + * F3 0001 0000 0000 ~ F3 FFFF FFFF FFFF + */ +static inline int is_7byte_addr_flag(unsigned char first_byte) +{ + return first_byte == ADDR_FIRST_F3 ? NIP_TRUE : NIP_FALSE; +} + +/* Short address range: + * 【8-byte】 + * F4 is a 8-byte address descriptor followed by the address value + * F400 0000 0000 0000 ~ F4FF FFFF FFFF FFFF + */ +static inline int is_8byte_addr_flag(unsigned char first_byte) +{ + return first_byte == ADDR_FIRST_FE ? NIP_TRUE : NIP_FALSE; +} + +/* Short address range: + * 【public addr】 + * 0xFF00 - The loopback address + * 0xFF01 - Public address for access authentication + * 0xFF02 - Public address of access authentication + * 0xFF03 - The neighbor found a public address + * 0xFF04 - Address resolution (ARP) + * 0xFF05 - DHCP public address + * 0xFF06 - Public address for minimalist access authentication + * 0xFF07 - Self-organizing protocol public address + * 0xFF08 - The IEEE EUI - 64 addresses + * 0xFF09 - any_addr + */ +static inline int is_public_addr_flag(unsigned char first_byte) +{ + return first_byte == ADDR_FIRST_FF ? NIP_TRUE : NIP_FALSE; +} + +/* Short address range: + * 【1-byte】0 ~ 220 + * 00 ~ DC + */ +static int nip_addr_1byte_check(unsigned char first_byte, unsigned char second_byte, + unsigned char third_byte, int addr_len) +{ + int ret = NOT_CURRENT_ADDR; + + if (is_1byte_addr_flag(first_byte) && addr_len == NIP_ADDR_LEN_1) + ret = CURRENT_ADDR_VALID; + + return ret; +} + +/* Short address range: + * 【2-byte】221 ~ 5119 + * DD/DE/.../F0 is a 2-byte address descriptor followed by the address value + * DDDD ~ DDFF : 221 ~ 255 + * DE00 ~ DEFF : 256 ~ 511 + * DF00 ~ DFFF : 512 ~ 767 + * ... + * F000 ~ F0FF : 4864 ~ 5119 + */ +static int nip_addr_2byte_check(unsigned char first_byte, unsigned char second_byte, + unsigned char third_byte, int addr_len) +{ + int ret = NOT_CURRENT_ADDR; + + if (is_2byte_addr_flag(first_byte) && addr_len == NIP_ADDR_LEN_2) { + if (first_byte > ADDR_FIRST_DC + 1 || + second_byte >= ADDR_SECOND_MIN_DD) + ret = CURRENT_ADDR_VALID; + else + ret = ADDR_2BYTE_INVALID; + } + + return ret; +} + +/* Short address range: + * 【3-byte】5120 ~ 65535 + * F1 is a 3-byte address descriptor followed by the address value + * F1 1400 ~ F1 FFFF + */ +static int nip_addr_3byte_check(unsigned char first_byte, unsigned char second_byte, + unsigned char third_byte, int addr_len) +{ + int ret = NOT_CURRENT_ADDR; + + if (is_3byte_addr_flag(first_byte) && addr_len == NIP_ADDR_LEN_3) { + if (second_byte >= ADDR_SECOND_MIN_F1) + ret = CURRENT_ADDR_VALID; + else + ret = ADDR_3BYTE_INVALID; + } + + return ret; +} + +/* Short address range: + * 【5-byte】65536 ~ 4,294,967,295 + * F2 is a 5-byte address descriptor followed by the address value + * F2 0001 0000 ~ F2 FFFF FFFF + */ +static int nip_addr_5byte_check(unsigned char first_byte, unsigned char second_byte, + unsigned char third_byte, int addr_len) +{ + int ret = NOT_CURRENT_ADDR; + + if (is_5byte_addr_flag(first_byte) && addr_len == NIP_ADDR_LEN_5) { + if (second_byte > 0 || third_byte >= ADDR_THIRD_MIN_F2) + ret = CURRENT_ADDR_VALID; + else + ret = ADDR_5BYTE_INVALID; + } + + return ret; +} + +/* Short address range: + * 【7-byte】4,294,967,296 ~ 281,474,976,710,655 + * F3 is a 7-byte address descriptor followed by the address value + * F3 0001 0000 0000 ~ F3 FFFF FFFF FFFF + */ +static int nip_addr_7byte_check(unsigned char first_byte, unsigned char second_byte, + unsigned char third_byte, int addr_len) +{ + int ret = NOT_CURRENT_ADDR; + + if (is_7byte_addr_flag(first_byte) && addr_len == NIP_ADDR_LEN_7) { + if (second_byte > 0 || third_byte >= ADDR_THIRD_MIN_F3) + ret = CURRENT_ADDR_VALID; + else + ret = ADDR_7BYTE_INVALID; + } + + return ret; +} + +/* Short address range: + * 【8-byte】 + * F4 is a 8-byte address descriptor followed by the address value + * F400 0000 0000 0000 ~ F4FF FFFF FFFF FFFF + */ +static int nip_addr_8byte_check(unsigned char first_byte, unsigned char second_byte, + unsigned char third_byte, int addr_len) +{ + int ret = NOT_CURRENT_ADDR; + + if (is_8byte_addr_flag(first_byte) && addr_len == NIP_ADDR_LEN_8) + ret = CURRENT_ADDR_VALID; + + return ret; +} + +/* Short address range: + * 【public addr】 + * 0xFF00 - The loopback address + * 0xFF01 - Public address for access authentication + * 0xFF02 - Public address of access authentication + * 0xFF03 - The neighbor found a public address + * 0xFF04 - Address resolution (ARP) + * 0xFF05 - DHCP public address + * 0xFF06 - Public address for minimalist access authentication + * 0xFF07 - Self-organizing protocol public address + * 0xFF08 - The IEEE EUI - 64 addresses + * 0xFF09 - any_addr + */ +static int nip_addr_public_check(unsigned char first_byte, unsigned char second_byte, + unsigned char third_byte, int addr_len) +{ + int ret = NOT_CURRENT_ADDR; + + if (is_public_addr_flag(first_byte) && addr_len == NIP_ADDR_LEN_2) + ret = CURRENT_ADDR_VALID; + + return ret; +} + +static int nip_addr_unknown(unsigned char first_byte, unsigned char second_byte, + unsigned char third_byte, int addr_len) +{ + return NIP_ADDR_UNKNOWN; +} + +#define CHECK_FUN_MAX 8 +static int (*nip_addr_check_fun[CHECK_FUN_MAX])(unsigned char first_byte, + unsigned char second_byte, + unsigned char third_byte, + int addr_len) = { + nip_addr_1byte_check, + nip_addr_2byte_check, + nip_addr_3byte_check, + nip_addr_5byte_check, + nip_addr_7byte_check, + nip_addr_8byte_check, + nip_addr_public_check, + nip_addr_unknown, +}; + +int nip_addr_invalid(const struct nip_addr *addr) +{ + int i; + int addr_len; + int ret = NIP_ADDR_UNKNOWN; + unsigned char first_byte, second_byte, third_byte; + + first_byte = addr->nip_addr_field8[NIP_8BIT_ADDR_INDEX_0]; + second_byte = addr->nip_addr_field8[NIP_8BIT_ADDR_INDEX_1]; + third_byte = addr->nip_addr_field8[NIP_8BIT_ADDR_INDEX_2]; + addr_len = addr->bitlen / NIP_ADDR_BIT_LEN_8; + + /* The value of the field after the effective length of the short address should be 0 */ + for (i = addr_len; i < NIP_8BIT_ADDR_INDEX_MAX; i++) { + if (addr->nip_addr_field8[i] > 0x00) + return ADDR_BITLEN_INVALID; + } + + for (i = 0; i < CHECK_FUN_MAX; i++) { + ret = nip_addr_check_fun[i](first_byte, second_byte, third_byte, addr_len); + if (ret == CURRENT_ADDR_VALID) + return ret; + else if (ret == NOT_CURRENT_ADDR) + continue; + else + return ret; + } + + return ret; +} + +/* 0xFF00 - The loopback address + * 0xFF01 - Public address for access authentication + * 0xFF02 - Public address of access authentication + * 0xFF03 - The neighbor found a public address + * 0xFF04 - Address resolution (ARP) + * 0xFF05 - DHCP public address + * 0xFF06 - Public address for minimalist access authentication + * 0xFF07 - Self-organizing protocol public address + * 0xFF08 - The IEEE EUI - 64 addresses + * 0xFF09 - any_addr + */ +int nip_addr_public(const struct nip_addr *addr) +{ + if (is_public_addr_flag(addr->nip_addr_field8[NIP_8BIT_ADDR_INDEX_0]) && + addr->bitlen == NIP_ADDR_BIT_LEN_16) + return 1; + else + return 0; +} + +/* judge whether the nip_addr is equal to 0xFF09 */ +int nip_addr_any(const struct nip_addr *ad) +{ + int result = 0; + + if (ad->bitlen == NIP_ADDR_BIT_LEN_16) { + if (ad->nip_addr_field16[0] == nip_any_addr.nip_addr_field16[0] && + ad->nip_addr_field16[1] == nip_any_addr.nip_addr_field16[1]) + result = 1; + } + return result; +} + +int get_nip_addr_len(const struct nip_addr *addr) +{ + int len = 0; + unsigned char first_byte = addr->nip_addr_field8[0]; + + if (is_1byte_addr_flag(first_byte)) + len = NIP_ADDR_LEN_1; + else if (is_2byte_addr_flag(first_byte) || is_public_addr_flag(first_byte)) + len = NIP_ADDR_LEN_2; + else if (is_3byte_addr_flag(first_byte)) + len = NIP_ADDR_LEN_3; + else if (is_5byte_addr_flag(first_byte)) + len = NIP_ADDR_LEN_5; + else if (is_7byte_addr_flag(first_byte)) + len = NIP_ADDR_LEN_7; + else if (is_8byte_addr_flag(first_byte)) + len = NIP_ADDR_LEN_8; + + return len; +} + +unsigned char *build_nip_addr(const struct nip_addr *addr, unsigned char *buf) +{ + int i; + unsigned char *p = buf; + int addr_len = get_nip_addr_len(addr); + + if (addr_len == 0) + return 0; + + for (i = 0; i < addr_len; i++) { + *p = addr->nip_addr_field8[i]; + p++; + } + + return p; +} + +unsigned char *decode_nip_addr(unsigned char *buf, struct nip_addr *addr) +{ + int i; + int ret; + int addr_len; + unsigned char *p = buf; + + addr->nip_addr_field8[0] = *p; + addr_len = get_nip_addr_len(addr); + if (addr_len == 0) + return 0; + + for (i = 0; i < addr_len; i++) { + addr->nip_addr_field8[i] = *p; + p++; + } + addr->bitlen = addr_len * NIP_ADDR_BIT_LEN_8; + + ret = nip_addr_invalid(addr); + if (ret) + return 0; + + return p; +} + diff --git a/newip/src/common/nip_addr.h b/newip/src/common/nip_addr.h new file mode 100644 index 0000000000000000000000000000000000000000..abb49ca8cbbff8816bdb1c839ca0d37765d470c3 --- /dev/null +++ b/newip/src/common/nip_addr.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#ifndef _UAPI_NEWIP_ADDR_H +#define _UAPI_NEWIP_ADDR_H + +#define NIP_ADDR_LEN_1 1 +#define NIP_ADDR_LEN_2 2 +#define NIP_ADDR_LEN_3 3 +#define NIP_ADDR_LEN_4 4 +#define NIP_ADDR_LEN_5 5 +#define NIP_ADDR_LEN_7 7 +#define NIP_ADDR_LEN_8 8 + +#define NIP_ADDR_BIT_LEN_8 8 +#define NIP_ADDR_BIT_LEN_16 16 +#define NIP_ADDR_BIT_LEN_24 24 +#define NIP_ADDR_BIT_LEN_40 40 +#define NIP_ADDR_BIT_LEN_MAX 64 + +enum nip_addr_check_value { + ADDR_FIRST_DC = 0xDC, + ADDR_FIRST_F0 = 0xF0, + ADDR_FIRST_F1, + ADDR_FIRST_F2, + ADDR_FIRST_F3, + ADDR_FIRST_FE = 0xFE, + ADDR_FIRST_FF = 0xFF, + ADDR_SECOND_MIN_DD = 0xDD, + ADDR_SECOND_MIN_F1 = 0x14, /* f1 14 00 */ + ADDR_THIRD_MIN_F2 = 0x01, /* f2 00 01 00 00 */ + ADDR_THIRD_MIN_F3 = 0x01, /* F3 0001 0000 0000 */ +}; + +enum nip_8bit_addr_index { + NIP_8BIT_ADDR_INDEX_0 = 0, + NIP_8BIT_ADDR_INDEX_1 = 1, + NIP_8BIT_ADDR_INDEX_2 = 2, + NIP_8BIT_ADDR_INDEX_3 = 3, + NIP_8BIT_ADDR_INDEX_4 = 4, + NIP_8BIT_ADDR_INDEX_5 = 5, + NIP_8BIT_ADDR_INDEX_6 = 6, + NIP_8BIT_ADDR_INDEX_7 = 7, + NIP_8BIT_ADDR_INDEX_MAX, +}; + +enum nip_16bit_addr_index { + NIP_16BIT_ADDR_INDEX_0 = 0, + NIP_16BIT_ADDR_INDEX_1 = 1, + NIP_16BIT_ADDR_INDEX_2 = 2, + NIP_16BIT_ADDR_INDEX_3 = 3, + NIP_16BIT_ADDR_INDEX_MAX, +}; + +enum nip_32bit_addr_index { + NIP_32BIT_ADDR_INDEX_0 = 0, + NIP_32BIT_ADDR_INDEX_1 = 1, + NIP_32BIT_ADDR_INDEX_MAX, +}; + +#define nip_addr_field8 v.u.field8 +#define nip_addr_field16 v.u.field16 +#define nip_addr_field32 v.u.field32 + +#pragma pack(1) +struct nip_addr_field { + union { + unsigned char field8[NIP_8BIT_ADDR_INDEX_MAX]; + unsigned short field16[NIP_16BIT_ADDR_INDEX_MAX]; /* big-endian */ + unsigned int field32[NIP_32BIT_ADDR_INDEX_MAX]; /* big-endian */ + } u; +}; + +struct nip_addr { + unsigned char bitlen; /* The address length is in bit (not byte) */ + struct nip_addr_field v; +}; + +#pragma pack() + +enum nip_index { + INDEX_0 = 0, + INDEX_1 = 1, + INDEX_2 = 2, + INDEX_3 = 3, + INDEX_4 = 4, + INDEX_5 = 5, + INDEX_6 = 6, + INDEX_7 = 7, + INDEX_8 = 8, + INDEX_9 = 9, + INDEX_10 = 10, + INDEX_11 = 11, + INDEX_12 = 12, + INDEX_13 = 13, + INDEX_14 = 14, + INDEX_15 = 15, + INDEX_MAX, +}; + +extern const struct nip_addr nip_any_addr; +extern const struct nip_addr nip_broadcast_addr_arp; + +int nip_addr_invalid(const struct nip_addr *addr); +int nip_addr_public(const struct nip_addr *addr); +int nip_addr_any(const struct nip_addr *ad); +int get_nip_addr_len(const struct nip_addr *addr); +unsigned char *build_nip_addr(const struct nip_addr *addr, unsigned char *buf); +unsigned char *decode_nip_addr(unsigned char *buf, struct nip_addr *addr); + +#endif /* _UAPI_NEWIP_ADDR_H */ + diff --git a/newip/src/common/nip_checksum.c b/newip/src/common/nip_checksum.c new file mode 100644 index 0000000000000000000000000000000000000000..a85c1f6a0d1290f5c1f9165af39987305ba4a62c --- /dev/null +++ b/newip/src/common/nip_checksum.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include "nip_hdr.h" +#include "nip_checksum.h" + +#define USHORT_PAYLOAD 16 +#define NIP_CHECKSUM_UINT8_PAYLOAD 8 +unsigned int _nip_check_sum(const unsigned char *data, unsigned short data_len) +{ + unsigned int i = 0; + unsigned int sum = 0; + + while (i + 1 < data_len) { + sum += (data[i] << NIP_CHECKSUM_UINT8_PAYLOAD) + data[i + 1]; + i += 2; /* Offset 2 bytes */ + } + + if (i < (unsigned int)data_len) + sum += (data[i] << NIP_CHECKSUM_UINT8_PAYLOAD); + + return sum; +} + +unsigned int _nip_header_chksum(struct nip_pseudo_header *chksum_header) +{ + int i, j; + int addr_len; + unsigned char pseudo_header[NIP_HDR_MAX] = {0}; + unsigned short hdr_len = 0; + + addr_len = chksum_header->saddr.bitlen / NIP_ADDR_BIT_LEN_8; + if (addr_len) { + j = 0; + for (i = 0; i < addr_len; i++, j++) + pseudo_header[j] = chksum_header->saddr.nip_addr_field8[i]; + hdr_len += addr_len; + } + + addr_len = chksum_header->daddr.bitlen / NIP_ADDR_BIT_LEN_8; + if (addr_len) { + j = hdr_len; + for (i = 0; i < addr_len; i++, j++) + pseudo_header[j] = chksum_header->daddr.nip_addr_field8[i]; + hdr_len += addr_len; + } + + /* chksum_header->check_len is network order.(big end) */ + *(unsigned short *)(pseudo_header + hdr_len) = chksum_header->check_len; + hdr_len += sizeof(chksum_header->check_len); + *(pseudo_header + hdr_len) = chksum_header->nexthdr; + hdr_len += sizeof(chksum_header->nexthdr); + + return _nip_check_sum(pseudo_header, hdr_len); +} + +/* The checksum is calculated when the packet is received + * Note: + * 1.chksum_header->check_len is network order.(big end) + * 2.check_len is host order. + */ +unsigned short nip_check_sum_parse(unsigned char *data, + unsigned short check_len, + struct nip_pseudo_header *chksum_header) +{ + unsigned int sum = 0; + + sum = _nip_check_sum(data, check_len); + sum += _nip_header_chksum(chksum_header); + + while (sum >> USHORT_PAYLOAD) + sum = (sum >> USHORT_PAYLOAD) + (sum & 0xffff); + return (unsigned short)sum; +} + +/* The checksum is calculated when the packet is sent + * Note: + * 1.chksum_header->check_len is network order.(big end) + * 2.data_len is host order. + */ +unsigned short nip_check_sum_build(unsigned char *data, + unsigned short data_len, + struct nip_pseudo_header *chksum_header) +{ + unsigned int sum = 0; + + sum = _nip_check_sum(data, data_len); + sum += _nip_header_chksum(chksum_header); + + while (sum >> USHORT_PAYLOAD) + sum = (sum >> USHORT_PAYLOAD) + (sum & 0xffff); + return (unsigned short)(~sum); +} + diff --git a/newip/src/common/nip_checksum.h b/newip/src/common/nip_checksum.h new file mode 100644 index 0000000000000000000000000000000000000000..3ca09a354e127d8f16a0aaa1f31f3072d36914fe --- /dev/null +++ b/newip/src/common/nip_checksum.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#ifndef _NIP_CHECKSUM_H +#define _NIP_CHECKSUM_H + +#include "nip_addr.h" + +struct nip_pseudo_header { + struct nip_addr saddr; /* Source address, network order.(big end) */ + struct nip_addr daddr; /* Destination address, network order.(big end) */ + unsigned short check_len; /* network order.(big end) */ + unsigned char nexthdr; /* Upper-layer Protocol Type: IPPROTO_UDP */ +}; + +/* The checksum is calculated when the packet is received + * Note: + * 1.chksum_header->check_len is network order.(big end) + * 2.data_len is host order. + */ +unsigned short nip_check_sum_parse(unsigned char *data, + unsigned short check_len, + struct nip_pseudo_header *chksum_header); + +/* The checksum is calculated when the packet is sent + * Note: + * 1.chksum_header->check_len is network order.(big end) + * 2.data_len is host order. + */ +unsigned short nip_check_sum_build(unsigned char *data, + unsigned short data_len, + struct nip_pseudo_header *chksum_header); + +#endif /* _NIP_CHECKSUM_H */ + diff --git a/newip/src/common/nip_hdr.h b/newip/src/common/nip_hdr.h new file mode 100644 index 0000000000000000000000000000000000000000..d92c9b249265826fc52d4669310a9c8e7117fe73 --- /dev/null +++ b/newip/src/common/nip_hdr.h @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#ifndef _NEWIP_HDR_H +#define _NEWIP_HDR_H + +#include "nip_addr.h" + +/* Ethernet head 14B, +2B byte alignment, +66 to avoid + * HMAC driver SKB space expansion caused by Coredum problem + */ +/* This parameter is used only to apply for the length of the packet buffer, + * but not to determine the actual packet header length + */ +#define NIP_ETH_HDR_BASE_LEN 14 +#define NIP_ETH_HDR_LEN (NIP_ETH_HDR_BASE_LEN + 2 + 66) + +/* bitmap1 + bitmap2 + TTL + total len + nexthd + daddr + saddr + * 1B 1B 1B 2B 1B 9B 9B = 24B + * V4 TCP 1448 + * NIP TCP 1430 + 30 = 1460 + */ +/* This interface is only used to define the buffer length. + * To calculate the packet header length, use the "get_nip_hdr_len" func + */ +#define NIP_HDR_MAX 24 +#define NIP_UDP_HDR_LEN 8 +#define NIP_MIN_MTU (NIP_HDR_MAX + NIP_UDP_HDR_LEN) +#define NIP_BYTE_ALIGNMENT 2 + +#define NIP_BITMAP_HAVE_MORE_BIT 0x01 + +/* Bitmap 1st Byte: bit0 - bit7 */ +#define NIP_BITMAP_INVALID_SET 0x80 /* Bit 0 is set */ +#define NIP_BITMAP_INCLUDE_TTL 0x40 /* Bit 1 is set */ +#define NIP_BITMAP_INCLUDE_TOTAL_LEN 0x20 /* Bit 2 is set */ +#define NIP_BITMAP_INCLUDE_NEXT_HDR 0x10 /* Bit 3 is set */ +#define NIP_BITMAP_INCLUDE_RES1 0x08 /* Bit 4 is set */ +#define NIP_BITMAP_INCLUDE_DADDR 0x04 /* Bit 5 is set */ +#define NIP_BITMAP_INCLUDE_SADDR 0x02 /* Bit 6 is set */ +#define NIP_BITMAP_HAVE_BYTE_2 NIP_BITMAP_HAVE_MORE_BIT /* Bit 7 is set */ + +/* Bitmap 2nd Byte: bit0 - bit7 */ +#define NIP_BITMAP_INCLUDE_HDR_LEN 0x80 /* Bit 0 is set */ +#define NIP_BITMAP_INCLUDE_RES2 0x40 /* Bit 1 is set */ +#define NIP_BITMAP_INCLUDE_RES3 0x20 /* Bit 2 is set */ +#define NIP_BITMAP_INCLUDE_RES4 0x10 /* Bit 3 is set */ +#define NIP_BITMAP_INCLUDE_RES5 0x08 /* Bit 4 is set */ +#define NIP_BITMAP_INCLUDE_RES6 0x04 /* Bit 5 is set */ +#define NIP_BITMAP_INCLUDE_RES7 0x02 /* Bit 6 is set */ +#define NIP_BITMAP_HAVE_BYTE_3 NIP_BITMAP_HAVE_MORE_BIT /* Bit 7 is set */ + +/* Bitmap 1st Byte: + * | valid | ttl | total_len | next_hdr | res1 | daddr | saddr | have byte2 | + * | 0 | 1 | 0 | 1 | 0 | 1 | 1 | 0 | + */ +#define NIP_UDP_BITMAP_1 0x56 +#define NIP_UDP_BITMAP_1_INC_2 0x57 + +/* Bitmap 1st Byte: + * | valid | ttl | total_len | next_hdr | res1 | daddr | saddr | have byte2 | + * | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | + */ +#define NIP_NORMAL_BITMAP_1 0x76 +#define NIP_NORMAL_BITMAP_1_INC_2 0x77 + +/* Bitmap 2nd Byte: + * | hdr_len | res2 | res2 | res2 | res2 | res2 | res2 | have byte3 | + * | 0 or 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | + */ +#define NIP_NODATA_BITMAP_2 0x00 +#define NIP_NORMAL_BITMAP_2 0x80 + +/* invalid Bitmap 2nd Byte: + * | hdr_len | res2 | res2 | res2 | res2 | res2 | res2 | have byte3 | + * | 0 or 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | + */ +#define NIP_INVALID_BITMAP_2 0x7F + +#define NIP_DEFAULT_TTL 128 +#define NIP_ARP_DEFAULT_TTL 64 +#define IPPROTO_NIP_ICMP 0xB1 + +enum NIP_HDR_TYPE { + NIP_HDR_UDP = 0, + NIP_HDR_COMM = 1, + + NIP_HDR_TYPE_MAX, +}; + +enum NIP_HDR_DECAP_ERR { + NIP_HDR_BITMAP_INVALID = 1, + NIP_HDR_BITMAP_NUM_OUT_RANGE = 2, + NIP_HDR_NO_TTL = 3, + NIP_HDR_NO_NEXT_HDR = 4, + NIP_HDR_NO_DADDR = 5, + NIP_HDR_DECAP_DADDR_ERR = 6, + NIP_HDR_DADDR_INVALID = 7, + NIP_HDR_DECAP_SADDR_ERR = 8, + NIP_HDR_SADDR_INVALID = 9, + NIP_HDR_RCV_BUF_READ_OUT_RANGE = 10, + NIP_HDR_UNKNOWN_AND_NO_HDR_LEN = 11, + NIP_HDR_LEN_INVALID = 12, + NIP_HDR_LEN_OUT_RANGE = 13, + + NIP_HDR_DECAP_ERRCODE_MAX, +}; + +/* The newIP header contains variable-length fields. + * The header structure is defined only for function parameter transmission. + * The fields are parsed in the original packet and saved + */ +struct nip_hdr_decap { + struct nip_addr saddr; /* Source address, network order.(big end) */ + struct nip_addr daddr; /* Destination address, network order.(big end) */ + + unsigned char ttl; /* Hop count limit */ + unsigned char nexthdr; /* Upper-layer Protocol Type: IPPROTO_UDP */ + unsigned char hdr_len; /* Indicates the length of the packet header */ + unsigned char hdr_real_len; /* Indicates the actual length of the packet header */ + + unsigned short total_len; /* Packet length (Header + packet), network order.(big end) */ + unsigned short no_hdr_len : 1; /* The header does not contain a header length field */ + unsigned short include_unknown_bit : 1; /* There is no other bitmap field */ + unsigned short include_saddr : 1; + unsigned short include_daddr : 1; + unsigned short include_ttl : 1; + unsigned short include_nexthdr : 1; + unsigned short include_hdr_len : 1; + unsigned short include_total_len : 1; + unsigned short res : 8; + + unsigned int rcv_buf_len; +}; + +/* The newIP packet header function is an incoming or outgoing parameter, + * which is not the content encapsulated in the packet + */ +#define BITMAP_MAX 8 +#define RES_NUM 2 +struct nip_hdr_encap { + struct nip_addr daddr; /* Destination address, network order.(big end) */ + struct nip_addr saddr; /* Source address, network order.(big end) */ + + unsigned char ttl; /* Hop count limit */ + unsigned char nexthdr; /* Upper-layer Protocol Type: IPPROTO_UDP */ + unsigned short total_len; /* Packet header length + packet data length */ + + void *usr_data; /* User data pointer */ + unsigned int usr_data_len; /* Length of data sent by the user */ + unsigned int trans_hdr_len; /* Transport layer header length */ + + unsigned short sport; + unsigned short dport; + + /* The following are the output parameters */ + unsigned char bitmap[BITMAP_MAX]; /* Bitmap currently supports a maximum of 8 bytes */ + unsigned int bitmap_num; /* Number of valid elements in the bitmap array */ + + unsigned char *hdr_buf; /* Cache the newIP header */ + unsigned int hdr_buf_pos; /* Buf Buffer writable address offset */ + unsigned short *frag_id_pos; /* Fragment Offset in the original packet */ + unsigned char *hdr_len_pos; /* Indicates the actual length of the packet header */ + unsigned short *total_len_pos; /* Total length position of the packet */ + + /* Whether the bitmap of the packet header carries a flag */ + unsigned char encap_ttl : 1; + unsigned char encap_hdr_len : 1; + unsigned char encap_daddr : 1; + unsigned char encap_saddr : 1; + unsigned char encap_total_len : 1; + unsigned char encap_res : 3; +}; + +/* Packet segment information */ +struct nip_pkt_seg_info { + unsigned int mid_pkt_num; /* Number of intermediate segments */ + unsigned int last_pkt_num; /* Number of last segments */ + + unsigned int mid_usr_pkt_len; /* Middle segment data length (8B aligned) */ + unsigned int last_usr_pkt_len; /* Length of the last data segment */ + + unsigned char *usr_data; /* Holds a pointer to the user's raw data */ + unsigned int usr_data_len; /* Length of user data read this time */ +}; + +void nip_calc_pkt_frag_num(unsigned int mtu, + unsigned int nip_hdr_len, + unsigned int usr_data_len, + struct nip_pkt_seg_info *seg_info); + +void nip_hdr_udp_encap(struct nip_hdr_encap *head); + +/* need update total len after this func, call nip_update_total_len */ +void nip_hdr_comm_encap(struct nip_hdr_encap *head); + +/* input must be network order. */ +void nip_update_total_len(struct nip_hdr_encap *head, unsigned short total_len); + +/* Note: a function call requires its own byte order conversion.(niph->total_len) */ +int nip_hdr_parse(unsigned char *rcv_buf, unsigned int buf_len, struct nip_hdr_decap *niph); + +/* The length of the packet header is obtained according to the packet type, + * source ADDRESS, and destination address. + * If the packet does not carry the source address or destination address, fill in the blank + */ +int get_nip_hdr_len(enum NIP_HDR_TYPE hdr_type, + const struct nip_addr *saddr, + const struct nip_addr *daddr); + +struct udp_hdr { + unsigned short sport; + unsigned short dport; + unsigned short len; + unsigned short checksum; +}; + +/* input must be network order. */ +static inline void nip_build_udp_hdr(unsigned short sport, unsigned short dport, + unsigned short len, unsigned char *buf, + unsigned short checksum) +{ + struct udp_hdr *uh; + + uh = (struct udp_hdr *)buf; + uh->sport = sport; + uh->dport = dport; + uh->len = len; + uh->checksum = checksum; +} + +#endif /* _NEWIP_HDR_H */ + diff --git a/newip/src/common/nip_hdr_decap.c b/newip/src/common/nip_hdr_decap.c new file mode 100644 index 0000000000000000000000000000000000000000..d49a834833f63362eb7ef54257514f1355c310a1 --- /dev/null +++ b/newip/src/common/nip_hdr_decap.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include "nip_hdr.h" + +/* Must carry the current field */ +static int _get_nip_hdr_bitmap(unsigned char *buf, + unsigned char bitmap[], + unsigned char bitmap_index_max) +{ + int i = 0; + unsigned char *p = buf; + + if (*p & NIP_BITMAP_INVALID_SET) + return -NIP_HDR_BITMAP_INVALID; + + do { + if (i >= bitmap_index_max) + return -NIP_HDR_BITMAP_NUM_OUT_RANGE; + + bitmap[i] = *p; + p++; + } while (bitmap[i++] & NIP_BITMAP_HAVE_MORE_BIT); + + return i; +} + +/* Must carry the current field */ +static int _get_nip_hdr_ttl(const unsigned char *buf, + unsigned char bitmap, + struct nip_hdr_decap *niph) +{ + if (!(bitmap & NIP_BITMAP_INCLUDE_TTL)) + return -NIP_HDR_NO_TTL; + + niph->ttl = *buf; + niph->include_ttl = 1; + + return sizeof(niph->ttl); +} + +/* Optional fields */ +/* Communication between devices of the same version may not carry packet Header length, + * but communication between devices of different versions must carry packet header length + */ +static int _get_nip_hdr_len(const unsigned char *buf, + unsigned char bitmap, + struct nip_hdr_decap *niph) +{ + if (!(bitmap & NIP_BITMAP_INCLUDE_HDR_LEN)) + return 0; + + /* Total_len is a network sequence and cannot be + * compared directly with the local sequence + */ + niph->hdr_len = *buf; + niph->include_hdr_len = 1; + + if (niph->include_total_len && niph->hdr_len >= niph->rcv_buf_len) + return -NIP_HDR_LEN_OUT_RANGE; + + return sizeof(niph->hdr_len); +} + +/* Must carry the current field */ +static int _get_nip_hdr_nexthdr(const unsigned char *buf, + unsigned char bitmap, + struct nip_hdr_decap *niph) +{ + if (!(bitmap & NIP_BITMAP_INCLUDE_NEXT_HDR)) + return -NIP_HDR_NO_NEXT_HDR; + + niph->nexthdr = *buf; + niph->include_nexthdr = 1; + + return sizeof(niph->nexthdr); +} + +/* Must carry the current field */ +/* Note: niph->saddr is network order.(big end) */ +static int _get_nip_hdr_daddr(unsigned char *buf, + unsigned char bitmap, + struct nip_hdr_decap *niph) +{ + unsigned char *p; + + if (!(bitmap & NIP_BITMAP_INCLUDE_DADDR)) + return -NIP_HDR_NO_DADDR; + + p = decode_nip_addr(buf, &niph->daddr); + if (!p) + return -NIP_HDR_DECAP_DADDR_ERR; + + if (nip_addr_invalid(&niph->daddr)) + return -NIP_HDR_DADDR_INVALID; + + niph->include_daddr = 1; + return (niph->daddr.bitlen / NIP_ADDR_BIT_LEN_8); +} + +/* Optional fields */ +/* Note: niph->daddr is network order.(big end) */ +static int _get_nip_hdr_saddr(unsigned char *buf, + unsigned char bitmap, + struct nip_hdr_decap *niph) +{ + unsigned char *p; + + if (!(bitmap & NIP_BITMAP_INCLUDE_SADDR)) + return 0; + + p = decode_nip_addr(buf, &niph->saddr); + if (!p) + return -NIP_HDR_DECAP_SADDR_ERR; + + if (nip_addr_invalid(&niph->saddr)) + return -NIP_HDR_SADDR_INVALID; + + niph->include_saddr = 1; + return (niph->saddr.bitlen / NIP_ADDR_BIT_LEN_8); +} + +/* Optional fields: tcp/arp need, udp needless */ +/* Note: niph->total_len is network order.(big end), need change to host order */ +static int _get_nip_total_len(unsigned char *buf, + unsigned char bitmap, + struct nip_hdr_decap *niph) +{ + if (!(bitmap & NIP_BITMAP_INCLUDE_TOTAL_LEN)) + return 0; + + /* Total_len is a network sequence and cannot be + * compared directly with the local sequence + */ + niph->total_len = *((unsigned short *)buf); + niph->include_total_len = 1; + + return sizeof(niph->total_len); +} + +static int _nip_hdr_bitmap0_parse(unsigned char *buf, + unsigned char bitmap, + struct nip_hdr_decap *niph) +{ + int len; + int len_total = 0; + + len = _get_nip_hdr_ttl(buf, bitmap, niph); + if (len < 0) + return len; + len_total += len; + + /* Optional fields */ + len = _get_nip_total_len(buf + len_total, bitmap, niph); + if (len < 0) + return len; + len_total += len; + + len = _get_nip_hdr_nexthdr(buf + len_total, bitmap, niph); + if (len < 0) + return len; + len_total += len; + + len = _get_nip_hdr_daddr(buf + len_total, bitmap, niph); + if (len < 0) + return len; + len_total += len; + + len = _get_nip_hdr_saddr(buf + len_total, bitmap, niph); + if (len < 0) + return len; + len_total += len; + + return len_total; +} + +static int _nip_hdr_bitmap1_parse(unsigned char *buf, + unsigned char bitmap, + struct nip_hdr_decap *niph) +{ + int len; + int len_total = 0; + + /* If add new field needs to be modified with the macro definition */ + if (bitmap & NIP_INVALID_BITMAP_2) + niph->include_unknown_bit = 1; + + /* Optional fields */ + len = _get_nip_hdr_len(buf + len_total, bitmap, niph); + if (len < 0) + return len; + len_total += len; + + return len_total; +} + +static int _nip_hdr_unknown_bit_check(unsigned char *buf, + unsigned char bitmap, + struct nip_hdr_decap *niph) +{ + niph->include_unknown_bit = 1; + return 0; +} + +#define FACTORY_NUM_MAX 3 +static int (*hdr_parse_factory[FACTORY_NUM_MAX])(unsigned char *, + unsigned char, + struct nip_hdr_decap *) = { + _nip_hdr_bitmap0_parse, + _nip_hdr_bitmap1_parse, + _nip_hdr_unknown_bit_check, +}; + +static int nip_hdr_check(struct nip_hdr_decap *niph) +{ + if (niph->include_unknown_bit && !niph->include_hdr_len) + /* different ver pkt but no hdr len */ + return -NIP_HDR_UNKNOWN_AND_NO_HDR_LEN; + + if (niph->include_hdr_len) { + if (niph->hdr_len == 0 || + niph->hdr_len < niph->hdr_real_len) + return -NIP_HDR_LEN_INVALID; + } + + return 0; +} + +/* Note: + * 1.niph->total_len is network order.(big end), need change to host order + * 2.niph->saddr/daddr is network order.(big end) + */ +int nip_hdr_parse(unsigned char *rcv_buf, unsigned int buf_len, struct nip_hdr_decap *niph) +{ + int i = 0; + int ret; + unsigned char *buf = rcv_buf; + unsigned char bitmap[BITMAP_MAX] = {0}; + int num = _get_nip_hdr_bitmap(buf, bitmap, BITMAP_MAX); + + if (num <= 0 || !rcv_buf) + return num; + + niph->hdr_real_len = num * sizeof(bitmap[0]); + buf += niph->hdr_real_len; + + niph->rcv_buf_len = buf_len; + while (i < num) { + int len; + + if (i >= FACTORY_NUM_MAX) + break; + len = hdr_parse_factory[i](buf, bitmap[i], niph); + if (len < 0) + return len; + + buf += len; + niph->hdr_real_len += len; + if (niph->hdr_real_len >= buf_len) + return -NIP_HDR_RCV_BUF_READ_OUT_RANGE; + i++; + } + + ret = nip_hdr_check(niph); + if (ret < 0) + return ret; + + return niph->hdr_len > niph->hdr_real_len ? + niph->hdr_len : niph->hdr_real_len; +} + diff --git a/newip/src/common/nip_hdr_encap.c b/newip/src/common/nip_hdr_encap.c new file mode 100644 index 0000000000000000000000000000000000000000..7ad87d4668e3b6df997f253e620d2fac9bf3c719 --- /dev/null +++ b/newip/src/common/nip_hdr_encap.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ +#include "nip_hdr.h" + +#define INTEGER_MULTIPLE_OF_8 (~7) /* ~7 is an integer multiple of 8 */ +#define FMT_FACTORY_NUM_MAX 1 +#define ENCAP_FACTORY_NUM_MAX 1 + +void nip_calc_pkt_frag_num(unsigned int mtu, + unsigned int nip_hdr_len, + unsigned int usr_data_len, + struct nip_pkt_seg_info *seg_info) +{ + unsigned int mid_usr_pkt_len = (mtu - nip_hdr_len - NIP_UDP_HDR_LEN) & + INTEGER_MULTIPLE_OF_8; + unsigned int mid_pkt_num = usr_data_len / mid_usr_pkt_len; + unsigned int last_usr_pkt_len = 0; + + if (usr_data_len != 0) { + last_usr_pkt_len = usr_data_len % mid_usr_pkt_len; + if (last_usr_pkt_len == 0) { + last_usr_pkt_len = mid_usr_pkt_len; + mid_pkt_num--; + } + } + + seg_info->last_pkt_num = 1; + seg_info->mid_pkt_num = mid_pkt_num; + seg_info->mid_usr_pkt_len = mid_usr_pkt_len; + seg_info->last_usr_pkt_len = last_usr_pkt_len; +} + +static inline void _nip_hdr_ttl_encap(struct nip_hdr_encap *head) +{ + *(head->hdr_buf + head->hdr_buf_pos) = head->ttl; + head->hdr_buf_pos += sizeof(head->ttl); +} + +static inline void _nip_hdr_len_encap(struct nip_hdr_encap *head) +{ + head->hdr_len_pos = head->hdr_buf + head->hdr_buf_pos; + head->hdr_buf_pos += 1; +} + +static inline void _nip_update_hdr_len(struct nip_hdr_encap *head) +{ + *head->hdr_len_pos = head->hdr_buf_pos; +} + +static inline void _nip_hdr_nexthdr_encap(struct nip_hdr_encap *head) +{ + *(head->hdr_buf + head->hdr_buf_pos) = head->nexthdr; + head->hdr_buf_pos += sizeof(head->nexthdr); +} + +static inline void _nip_hdr_daddr_encap(struct nip_hdr_encap *head) +{ + (void)build_nip_addr(&head->daddr, (head->hdr_buf + head->hdr_buf_pos)); + head->hdr_buf_pos += (head->daddr.bitlen / NIP_ADDR_BIT_LEN_8); +} + +static inline void _nip_hdr_saddr_encap(struct nip_hdr_encap *head) +{ + (void)build_nip_addr(&head->saddr, (head->hdr_buf + head->hdr_buf_pos)); + head->hdr_buf_pos += (head->saddr.bitlen / NIP_ADDR_BIT_LEN_8); +} + +static inline void _nip_hdr_total_len_encap(struct nip_hdr_encap *head) +{ + head->total_len_pos = (unsigned short *)(head->hdr_buf + head->hdr_buf_pos); + head->hdr_buf_pos += sizeof(head->total_len); +} + +/* total_len must be network order.(big end) */ +void nip_update_total_len(struct nip_hdr_encap *head, unsigned short total_len) +{ + *head->total_len_pos = total_len; +} + +#define BITMAP1_OFFSET 1 +#define BITMAP2_OFFSET 2 +static inline void _nip_hdr_encap_udp_bitmap(struct nip_hdr_encap *head) +{ + /* bitmap(1B) + ttl(1B) + total_len(2B) + nexthdr(1B) + daddr(xB) + saddr(xB) */ + /* If the length of the destination address and the source address is even, + * the length of the packet header must be odd. You need to add 1-byte alignment + * and 1-byte bitmap + */ + if (((head->daddr.bitlen / NIP_ADDR_BIT_LEN_8) + (head->saddr.bitlen / NIP_ADDR_BIT_LEN_8)) + % NIP_BYTE_ALIGNMENT != 0) { + head->hdr_buf[0] = NIP_UDP_BITMAP_1; + head->hdr_buf_pos = BITMAP1_OFFSET; + } else { + head->hdr_buf[0] = NIP_UDP_BITMAP_1_INC_2; + head->hdr_buf[1] = NIP_NODATA_BITMAP_2; + head->hdr_buf_pos = BITMAP2_OFFSET; + } +} + +static inline void _nip_hdr_encap_comm_bitmap(struct nip_hdr_encap *head) +{ + /* bitmap(1B) + ttl(1B) + nexthdr(1B) + daddr(xB) + saddr(xB) */ + /* If the length of the destination address and the source address is even, + * the length of the packet header must be odd. You need to add 1-byte alignment + * and 1-byte bitmap + */ + if (((head->daddr.bitlen / NIP_ADDR_BIT_LEN_8) + (head->saddr.bitlen / NIP_ADDR_BIT_LEN_8)) + % NIP_BYTE_ALIGNMENT != 0) { + head->hdr_buf[0] = NIP_NORMAL_BITMAP_1; + head->hdr_buf_pos = BITMAP1_OFFSET; + } else { + head->hdr_buf[0] = NIP_NORMAL_BITMAP_1_INC_2; + head->hdr_buf[1] = NIP_NODATA_BITMAP_2; + head->hdr_buf_pos = BITMAP2_OFFSET; + } +} + +#define NEWIP_BYTE_ALIGNMENT_ENABLE 1 // 0: disable; 1: enable + +void nip_hdr_udp_encap(struct nip_hdr_encap *head) +{ + /* Encapsulate the bitmap into the newIP packet header BUF */ +#if (NEWIP_BYTE_ALIGNMENT_ENABLE == 1) + _nip_hdr_encap_udp_bitmap(head); +#else + head->hdr_buf[0] = NIP_UDP_BITMAP_1; + head->hdr_buf_pos = 1; +#endif + + /* Encapsulate bitmap fields into newIP packet header BUF */ + _nip_hdr_ttl_encap(head); + _nip_hdr_nexthdr_encap(head); + _nip_hdr_daddr_encap(head); + _nip_hdr_saddr_encap(head); +} + +/* need update total len after this func, call nip_update_total_len */ +void nip_hdr_comm_encap(struct nip_hdr_encap *head) +{ + /* Encapsulate the bitmap into the newIP packet header BUF */ +#if (NEWIP_BYTE_ALIGNMENT_ENABLE == 1) + _nip_hdr_encap_comm_bitmap(head); +#else + head->hdr_buf[0] = NIP_NORMAL_BITMAP_1; + head->hdr_buf_pos = 1; +#endif + + /* Encapsulate bitmap fields into newIP packet header BUF */ + _nip_hdr_ttl_encap(head); + _nip_hdr_total_len_encap(head); /* ARP/TCP need include hdr total len */ + _nip_hdr_nexthdr_encap(head); + _nip_hdr_daddr_encap(head); + _nip_hdr_saddr_encap(head); +} + +#if (NEWIP_BYTE_ALIGNMENT_ENABLE == 1) // include bitmap2 +#define NIP_COMM_HDR_LEN_NOINCLUDE_ADDR 6 // include total len +#define NIP_UDP_HDR_LEN_NOINCLUDE_ADDR 4 // not include total len +#else +#define NIP_COMM_HDR_LEN_NOINCLUDE_ADDR 5 // include total len +#define NIP_UDP_HDR_LEN_NOINCLUDE_ADDR 3 // not include total len +#endif +/* bitmap1 + bitmap2 + TTL + total len + nexthd + daddr + saddr + * 1B 1B 1B 2B 1B 7B 7B = 20B + * NIP_HDR_MAX 20 + * V4 TCP 1448 + * NIP TCP 1430 + 30 = 1460 + */ +/* The length of the packet header is obtained according to the packet type, + * source ADDRESS, and destination address. + * If the packet does not carry the source address or destination address, fill in the blank + */ +int get_nip_hdr_len(enum NIP_HDR_TYPE hdr_type, + const struct nip_addr *saddr, + const struct nip_addr *daddr) +{ + int saddr_len = 0; + int daddr_len = 0; + enum NIP_HDR_TYPE base_len = hdr_type == NIP_HDR_UDP ? + NIP_UDP_HDR_LEN_NOINCLUDE_ADDR : + NIP_COMM_HDR_LEN_NOINCLUDE_ADDR; + + if (hdr_type >= NIP_HDR_TYPE_MAX) + return 0; + + if (saddr) { + saddr_len = get_nip_addr_len(saddr); + if (saddr_len == 0) + return 0; + } + + if (daddr) { + daddr_len = get_nip_addr_len(daddr); + if (daddr_len == 0) + return 0; + } + + return base_len + saddr_len + daddr_len; +} + diff --git a/newip/src/linux-5.10/drivers/net/bt/Makefile b/newip/src/linux-5.10/drivers/net/bt/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..74ae4156bc4d8c13c7fe3595f8e518678da596f7 --- /dev/null +++ b/newip/src/linux-5.10/drivers/net/bt/Makefile @@ -0,0 +1,3 @@ +# enable btdev: +# kernel/linux/linux-5.10/drivers/net/Makefile add obj-y += bt/ +obj-y += btdev.o diff --git a/newip/src/linux-5.10/drivers/net/bt/btdev.c b/newip/src/linux-5.10/drivers/net/bt/btdev.c new file mode 100644 index 0000000000000000000000000000000000000000..ce4292974044e5ebfbe90fefd4274f4538c4651f --- /dev/null +++ b/newip/src/linux-5.10/drivers/net/bt/btdev.c @@ -0,0 +1,1186 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ + +#include "btdev.h" + +static struct bt_drv *bt_drv; + +static int bt_seq_show(struct seq_file *m, void *v) +{ + struct bt_virnet *vnet = NULL; + + pr_devel("bt seq_show"); + seq_printf(m, "Total device: %d (bitmap: 0x%X) Ring size: %d\n", + bt_get_total_device(bt_drv), bt_drv->bitmap, + BT_RING_BUFFER_SIZE); + + list_for_each_entry(vnet, &bt_drv->devices_table->head, virnet_entry) { + seq_printf(m, "dev: %12s, interface: %5s, state: %12s, MTU: %4d\n", + bt_virnet_get_cdev_name(vnet), bt_virnet_get_ndev_name(vnet), + bt_virnet_get_state_rep(vnet), vnet->ndev->mtu); + seq_printf(m, "ring head: %4d, ring tail: %4d, packets num: %4d\n", + vnet->tx_ring->head, vnet->tx_ring->tail, + bt_virnet_get_ring_packets(vnet)); + } + + return OK; +} + +static int bt_proc_open(struct inode *inode, struct file *file) +{ + pr_devel("bt proc_open"); + return single_open(file, bt_seq_show, PDE_DATA(inode)); +} + +static struct proc_ops bt_proc_fops = { + .proc_open = bt_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release}; + +static int bt_io_file_open(struct inode *node, struct file *filp) +{ + struct bt_virnet *vnet = NULL; + int ret = OK; + + pr_devel("bt io file open called"); + + list_for_each_entry(vnet, &bt_drv->devices_table->head, virnet_entry) { + if (bt_virnet_get_cdev(vnet) == node->i_cdev) { + struct net_device *ndev; + + if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { + if (unlikely(!atomic_dec_and_test(&vnet->io_file + ->read_open_limit))) { + atomic_inc(&vnet->io_file->read_open_limit); + pr_err("file %s has been opened for read twice already", + bt_virnet_get_cdev_name(vnet)); + return -EBUSY; + } + } else if ((filp->f_flags & O_ACCMODE) == O_WRONLY) { + if (unlikely(!atomic_dec_and_test(&vnet->io_file + ->write_open_limit))) { + atomic_inc(&vnet->io_file->write_open_limit); + pr_err("file %s has been opened for write twice already", + bt_virnet_get_cdev_name(vnet)); + return -EBUSY; + } + } else if ((filp->f_flags & O_ACCMODE) == O_RDWR) { + if (unlikely(!atomic_dec_and_test(&vnet->io_file + ->read_open_limit))) { + atomic_inc(&vnet->io_file->read_open_limit); + pr_err("file %s has been opened for read twice already", + bt_virnet_get_cdev_name(vnet)); + return -EBUSY; + } + + if (unlikely(!atomic_dec_and_test(&vnet->io_file + ->write_open_limit))) { + atomic_inc(&vnet->io_file->write_open_limit); + pr_err("file %s has been opened for write twice already", + bt_virnet_get_cdev_name(vnet)); + return -EBUSY; + } + } + + rtnl_lock(); + ndev = vnet->ndev; + if (unlikely(!(ndev->flags & IFF_UP))) { + ret = dev_change_flags(ndev, ndev->flags | IFF_UP, NULL); + if (unlikely(ret < 0)) { + rtnl_unlock(); + pr_err("bt dev_change_flags error: ret=%d", ret); + return -EBUSY; + } + } + rtnl_unlock(); + + SET_STATE(vnet, BT_VIRNET_STATE_CONNECTED); + filp->private_data = vnet; + return OK; + } + } + + return -EIO; +} + +static int bt_io_file_release(struct inode *node, struct file *filp) +{ + struct bt_virnet *vnet = filp->private_data; + + pr_devel("bt io file release called"); + + if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { + atomic_inc(&vnet->io_file->read_open_limit); + } else if ((filp->f_flags & O_ACCMODE) == O_WRONLY) { + atomic_inc(&vnet->io_file->write_open_limit); + } else if ((filp->f_flags & O_ACCMODE) == O_RDWR) { + atomic_inc(&vnet->io_file->read_open_limit); + atomic_inc(&vnet->io_file->write_open_limit); + } + + SET_STATE(vnet, BT_VIRNET_STATE_DISCONNECTED); + + return OK; +} + +static ssize_t bt_io_file_read(struct file *filp, + char __user *buffer, + size_t size, loff_t *off) +{ + struct bt_virnet *vnet = filp->private_data; + ssize_t out_sz; + struct sk_buff *skb = NULL; + + pr_devel("bt io file read called"); + + while (unlikely(bt_ring_is_empty(vnet->tx_ring))) { + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(vnet->rx_queue, + !bt_ring_is_empty(vnet->tx_ring))) + return -ERESTARTSYS; + } + + skb = bt_ring_current(vnet->tx_ring); + out_sz = skb->len - MACADDR_LEN; + if (unlikely(out_sz > size)) { + pr_err("io file read: buffer too small: skb's len=%ld buffer's len=%ld", + (long)out_sz, (long)size); + return -EINVAL; + } + + bt_ring_consume(vnet->tx_ring); + if (copy_to_user(buffer, skb->data + MACADDR_LEN, out_sz)) { + pr_err("io file read: copy_to_user failed"); + return -EIO; + } + + dev_kfree_skb(skb); + skb = NULL; + + if (unlikely(netif_queue_stopped(vnet->ndev))) { + pr_devel("consume data: wake the queue"); + netif_wake_queue(vnet->ndev); + } + + return out_sz; +} + +static ssize_t bt_io_file_write(struct file *filp, + const char __user *buffer, + size_t size, loff_t *off) +{ + struct bt_virnet *vnet = filp->private_data; + struct sk_buff *skb = NULL; + int ret; + int len; + ssize_t in_sz; + + pr_devel("bt io file write called: %lu bytes", size); + in_sz = size + MACADDR_LEN; + + skb = netdev_alloc_skb(bt_virnet_get_ndev(vnet), in_sz + 2); + if (unlikely(!skb)) + return -ENOMEM; + + skb_reserve(skb, 2); + skb_put(skb, in_sz); + + memset(skb->data, 0, MACADDR_LEN); + if (copy_from_user(skb->data + MACADDR_LEN, buffer, size)) + return -EIO; + + len = skb->len; + skb->dev = bt_virnet_get_ndev(vnet); + skb->protocol = eth_type_trans(skb, bt_virnet_get_ndev(vnet)); + ret = netif_rx_ni(skb); + + if (ret == NET_RX_SUCCESS) { + vnet->ndev->stats.rx_packets++; + vnet->ndev->stats.rx_bytes += len; + } else { + vnet->ndev->stats.rx_errors++; + vnet->ndev->stats.rx_dropped++; + } + + return size; +} + +static int bt_virnet_change_mtu(struct net_device *dev, int mtu) +{ + pr_devel("bt virnet change mtu called"); + dev->mtu = mtu; + return OK; +} + +static int bt_set_mtu(struct net_device *dev, int mtu) +{ + int err = OK; + + pr_devel("bt set_mtu called"); + rtnl_lock(); + err = dev_set_mtu(dev, mtu); + if (err < 0) + pr_err("bt set_mtu failed to changed MTU to %d, err:%d", mtu, err); + + rtnl_unlock(); + + return err; +} + +static int bt_cmd_enable_virnet(struct bt_virnet *vnet, unsigned long arg) +{ + int ret; + + WARN_ON(!vnet); + + if (unlikely(vnet->state != BT_VIRNET_STATE_DISABLED)) { + pr_err("bt enable can only be set at DISABLED state"); + return -EINVAL; // enable failed + } + + rtnl_lock(); + ret = dev_change_flags(vnet->ndev, vnet->ndev->flags | IFF_UP, NULL); + if (unlikely(ret < 0)) { + rtnl_unlock(); + pr_err("bt cmd enable virnet: dev_change_flags error: ret=%d", ret); + return -EIO; + } + rtnl_unlock(); + + SET_STATE(vnet, BT_VIRNET_STATE_CONNECTED); + return OK; +} + +static int bt_cmd_disable_virnet(struct bt_virnet *vnet, unsigned long arg) +{ + int ret; + + WARN_ON(!vnet); + if (unlikely(vnet->state != BT_VIRNET_STATE_CONNECTED)) { + pr_err("bt disable can only be set at CONNECTED state"); + return -EINVAL; + } + + rtnl_lock(); + ret = dev_change_flags(vnet->ndev, vnet->ndev->flags & ~IFF_UP, NULL); + if (unlikely(ret < 0)) { + rtnl_unlock(); + pr_err("bt cmd disable virnet: dev_change_flags error: ret=%d", ret); + return -EIO; + } + rtnl_unlock(); + + SET_STATE(vnet, BT_VIRNET_STATE_DISABLED); + return OK; +} + +static int bt_cmd_change_mtu(struct bt_virnet *vnet, unsigned long arg) +{ + int mtu; + int ret; + + WARN_ON(!vnet); + + if (unlikely(get_user(mtu, (int __user *)arg))) { + pr_err("get_user failed"); + return -EIO; + } + + ret = bt_set_mtu(vnet->ndev, mtu); + + if (unlikely(ret < 0)) { + pr_err("bt_dev_ioctl: changed mtu failed"); + return -EIO; + } + return OK; +} + +static int bt_cmd_peek_packet(struct bt_virnet *vnet, unsigned long arg) +{ + struct sk_buff *skb = NULL; + + pr_devel("bt peek packet called"); + + if (unlikely(bt_ring_is_empty(vnet->tx_ring))) { + pr_err("bt peek packet ring is empty"); + return -EAGAIN; + } + + skb = bt_ring_current(vnet->tx_ring); + if (unlikely(put_user(skb->len - MACADDR_LEN, (int __user *)arg))) { + pr_err("put_user failed"); + return -EIO; + } + + return OK; +} + +static long bt_io_file_ioctl(struct file *filep, + unsigned int cmd, + unsigned long arg) +{ + long ret; + + struct bt_virnet *vnet = filep->private_data; + + pr_devel("bt io file ioctl called"); + switch (cmd) { + case BT_IOC_CHANGE_MTU: + ret = bt_cmd_change_mtu(vnet, arg); + break; + case BT_IOC_ENABLE: + ret = bt_cmd_enable_virnet(vnet, arg); + break; + case BT_IOC_DISABLE: + ret = bt_cmd_disable_virnet(vnet, arg); + break; + case BT_IOC_PEEK_PACKET: + ret = bt_cmd_peek_packet(vnet, arg); + break; + default: + pr_err("not a valid cmd"); + return -ENOIOCTLCMD; + } + + return ret; +} + +static unsigned int bt_io_file_poll(struct file *filp, poll_table *wait) +{ + struct bt_virnet *vnet = filp->private_data; + unsigned int mask = 0; + + poll_wait(filp, &vnet->rx_queue, wait); + poll_wait(filp, &vnet->tx_queue, wait); + + if (!bt_ring_is_empty(vnet->tx_ring)) // readable + mask |= POLLIN | POLLRDNORM; + + if (!bt_ring_is_full(vnet->tx_ring)) // writable + mask |= POLLOUT | POLLWRNORM; + + return mask; +} + +static const struct file_operations bt_io_file_ops = { + .owner = THIS_MODULE, + .open = bt_io_file_open, + .release = bt_io_file_release, + .read = bt_io_file_read, + .write = bt_io_file_write, + .poll = bt_io_file_poll, + .unlocked_ioctl = bt_io_file_ioctl, + .compat_ioctl = bt_io_file_ioctl}; + +static int bt_mng_file_open(struct inode *node, struct file *filp) +{ + pr_devel("bt mng file open called"); + + if (unlikely(!atomic_dec_and_test(&bt_drv->mng_file->open_limit))) { + atomic_inc(&bt_drv->mng_file->open_limit); + pr_err("file %s has been opened already", + bt_drv->mng_file->bt_cdev->dev_filename); + return -EBUSY; + } + filp->private_data = bt_drv; + return OK; +} + +static int bt_mng_file_release(struct inode *node, struct file *filp) +{ + struct bt_drv *drv = filp->private_data; + + pr_devel("bt mng file release called"); + + atomic_inc(&drv->mng_file->open_limit); + return OK; +} + +static int bt_cmd_create_virnet(struct bt_drv *bt_mng, unsigned long arg) +{ + int id; + int ret; + struct bt_virnet *vnet = NULL; + struct bt_uioc_args vp; + unsigned long size; + + WARN_ON(!bt_mng); + + mutex_lock(&bt_mng->bitmap_lock); + id = bt_get_unused_id(&bt_mng->bitmap); + pr_devel("create io_file: get unused bit: %d", id); + + if (unlikely(bt_mng->devices_table->num == BT_VIRNET_MAX_NUM)) { + pr_err("reach the limit of max virnets"); + mutex_unlock(&bt_mng->bitmap_lock); + return -EIO; + } + vnet = bt_virnet_create(bt_mng, id); + if (unlikely(!vnet)) { + pr_err("bt virnet create failed"); + mutex_unlock(&bt_mng->bitmap_lock); + return -EIO; + } + + vnet->bt_table_head = bt_mng->devices_table; + ret = bt_table_add_device(bt_mng->devices_table, vnet); + if (unlikely(ret < 0)) { + pr_err("bt table add device failed: ret=%d", ret); + bt_virnet_destroy(vnet); + mutex_unlock(&bt_mng->bitmap_lock); + return -EIO; // failed to create + } + + bt_set_bit(&bt_mng->bitmap, id); + mutex_unlock(&bt_mng->bitmap_lock); + + memcpy(vp.ifa_name, bt_virnet_get_ndev_name(vnet), + sizeof(vp.ifa_name)); + memcpy(vp.cfile_name, bt_virnet_get_cdev_name(vnet), + sizeof(vp.cfile_name)); + + mdelay(DELAY_100_MS); + + size = copy_to_user((void __user *)arg, &vp, sizeof(struct bt_uioc_args)); + if (unlikely(size)) { + pr_err("copy_to_user failed: left size=%lu", size); + return -EIO; + } + return OK; +} + +static int bt_cmd_delete_virnet(struct bt_drv *bt_mng, unsigned long arg) +{ + int id; + struct bt_virnet *vnet = NULL; + struct bt_uioc_args vp; + unsigned long size; + + WARN_ON(!bt_mng); + + size = copy_from_user(&vp, (void __user *)arg, + sizeof(struct bt_uioc_args)); + if (unlikely(size)) { + pr_err("copy_from_user failed: left size=%lu", size); + return -EIO; + } + + vnet = bt_table_find(bt_mng->devices_table, vp.ifa_name); + if (unlikely(!vnet)) { + pr_err("virnet: %s cannot be found in bt table", vp.ifa_name); + return -EIO; // not found + } + + mutex_lock(&bt_mng->bitmap_lock); + id = MINOR(bt_virnet_get_cdev_number(vnet)); + bt_table_remove_device(bt_mng->devices_table, vnet); + bt_virnet_destroy(vnet); + bt_clear_bit(&bt_mng->bitmap, id); + mutex_unlock(&bt_mng->bitmap_lock); + return OK; +} + +static int bt_cmd_query_all_virnets(struct bt_drv *bt_mng, unsigned long arg) +{ + WARN_ON(!bt_mng); + if (unlikely(put_user(bt_mng->bitmap, (u32 *)arg))) { + pr_err("put_user failed"); + return -EIO; + } + return OK; +} + +static int bt_cmd_delete_all_virnets(struct bt_drv *bt_mng, unsigned long arg) +{ + WARN_ON(!bt_mng); + bt_table_delete_all(bt_mng); + return OK; +} + +static long bt_mng_file_ioctl(struct file *filep, + unsigned int cmd, + unsigned long arg) +{ + int ret; + + struct bt_drv *bt_mng = filep->private_data; + + pr_devel("bt mng file ioctl called"); + switch (cmd) { + case BT_IOC_CREATE: + ret = bt_cmd_create_virnet(bt_mng, arg); + break; + case BT_IOC_DELETE: + ret = bt_cmd_delete_virnet(bt_mng, arg); + break; + case BT_IOC_QUERY_ALL: + ret = bt_cmd_query_all_virnets(bt_mng, arg); + break; + case BT_IOC_DELETE_ALL: + ret = bt_cmd_delete_all_virnets(bt_mng, arg); + break; + default: + pr_err("not a valid command"); + return -ENOIOCTLCMD; + } + return ret; +} + +static const struct file_operations bt_mng_file_ops = { + .owner = THIS_MODULE, + .open = bt_mng_file_open, + .release = bt_mng_file_release, + .unlocked_ioctl = bt_mng_file_ioctl, + .compat_ioctl = bt_mng_file_ioctl}; + +static netdev_tx_t bt_virnet_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + int ret; + struct bt_virnet *vnet = NULL; + int len = skb->len; + + pr_alert("alert: bt virnet_xmit: called"); + vnet = bt_table_find(bt_drv->devices_table, dev->name); + WARN_ON(!vnet); + + ret = bt_virnet_produce_data(vnet, (void *)skb); + + if (unlikely(ret < 0)) { + pr_devel("virnet xmit: produce data failed: ring is full, need to stop queue"); + netif_stop_queue(vnet->ndev); + return NETDEV_TX_BUSY; + } + + vnet->ndev->stats.tx_packets++; + vnet->ndev->stats.tx_bytes += len; + + return NETDEV_TX_OK; +} + +static const struct net_device_ops bt_virnet_ops = { + .ndo_start_xmit = bt_virnet_xmit, + .ndo_change_mtu = bt_virnet_change_mtu}; + +static struct bt_table *bt_table_init(void) +{ + struct bt_table *tbl = kmalloc(sizeof(*tbl), GFP_KERNEL); + + if (unlikely(!tbl)) { + pr_err("alloc struct bt_table failed: oom"); + return NULL; + } + + INIT_LIST_HEAD(&tbl->head); + mutex_init(&tbl->tbl_lock); + tbl->num = 0; + return tbl; +} + +static int bt_table_add_device(struct bt_table *tbl, struct bt_virnet *vn) +{ + struct bt_virnet *vnet = NULL; + + WARN_ON(!tbl); + WARN_ON(!vn); + + vnet = bt_table_find(tbl, bt_virnet_get_ndev_name(vn)); + if (unlikely(vnet)) { + pr_err("found duplicated device"); + return -ENOIOCTLCMD; // duplicated + } + + mutex_lock(&tbl->tbl_lock); + list_add_tail(&vn->virnet_entry, &tbl->head); + ++tbl->num; + mutex_unlock(&tbl->tbl_lock); + + return OK; +} + +static void bt_table_remove_device(struct bt_table *tbl, struct bt_virnet *vn) +{ + WARN_ON(!tbl); + WARN_ON(!vn); + mutex_lock(&tbl->tbl_lock); + list_del(&vn->virnet_entry); + --tbl->num; + mutex_unlock(&tbl->tbl_lock); +} + +static struct bt_virnet *bt_table_find(struct bt_table *tbl, const char *ifa_name) +{ + struct bt_virnet *vnet = NULL; + + WARN_ON(!tbl); + + if (unlikely(!ifa_name)) + return NULL; + + list_for_each_entry(vnet, &tbl->head, virnet_entry) { + if (!strcmp(bt_virnet_get_ndev_name(vnet), ifa_name)) + return vnet; + } + return NULL; +} + +static void __bt_table_delete_all(struct bt_drv *drv) +{ + u32 id; + struct bt_virnet *vnet = NULL, *tmp_vnet = NULL; + + WARN_ON(!drv); + list_for_each_entry_safe(vnet, + tmp_vnet, + &drv->devices_table->head, + virnet_entry) { + id = MINOR(bt_virnet_get_cdev_number(vnet)); + list_del(&vnet->virnet_entry); + bt_clear_bit(&drv->bitmap, id); + bt_virnet_destroy(vnet); + } + drv->devices_table->num = 0; +} + +static void bt_table_delete_all(struct bt_drv *bt_drv) +{ + WARN_ON(!bt_drv); + mutex_lock(&bt_drv->bitmap_lock); + mutex_lock(&bt_drv->devices_table->tbl_lock); + + __bt_table_delete_all(bt_drv); + + mutex_unlock(&bt_drv->devices_table->tbl_lock); + mutex_unlock(&bt_drv->bitmap_lock); +} + +static void bt_table_destroy(struct bt_drv *bt_drv) +{ + WARN_ON(!bt_drv); + __bt_table_delete_all(bt_drv); + kfree(bt_drv->devices_table); + bt_drv->devices_table = NULL; +} + +static struct bt_ring *__bt_ring_create(int size) +{ + struct bt_ring *ring = kmalloc(sizeof(*ring), GFP_KERNEL); + + if (unlikely(!ring)) { + pr_err("ring create alloc failed: oom"); + return NULL; + } + + if (unlikely(size < 0)) + return NULL; + + ring->head = 0; + ring->tail = 0; + ring->data = kmalloc_array(size, sizeof(void *), GFP_KERNEL); + if (unlikely(!ring->data)) { + pr_err("ring create alloc data failed: oom"); + kfree(ring); + return NULL; + } + ring->size = size; + + return ring; +} + +static struct bt_ring *bt_ring_create(void) +{ + return __bt_ring_create(BT_RING_BUFFER_SIZE); +} + +static int bt_ring_is_empty(const struct bt_ring *ring) +{ + WARN_ON(!ring); + return ring->head == ring->tail; +} + +static int bt_ring_is_full(const struct bt_ring *ring) +{ + WARN_ON(!ring); + return (ring->head + 1) % ring->size == ring->tail; +} + +static void bt_ring_produce(struct bt_ring *ring, void *data) +{ + WARN_ON(!ring); + WARN_ON(!data); + + smp_mb(); // Make sure the read and write order is correct + ring->data[ring->head] = data; + ring->head = (ring->head + 1) % ring->size; + smp_wmb(); // Make sure the write order is correct +} + +static void *bt_ring_current(struct bt_ring *ring) +{ + void *data = NULL; + + WARN_ON(!ring); + data = ring->data[ring->tail]; + return data; +} + +static void bt_ring_consume(struct bt_ring *ring) +{ + WARN_ON(!ring); + + smp_rmb(); // Make sure the read order is correct + ring->tail = (ring->tail + 1) % ring->size; + smp_mb(); // Make sure the read and write order is correct +} + +static void bt_ring_destroy(struct bt_ring *ring) +{ + WARN_ON(!ring); + kfree(ring->data); + kfree(ring); +} + +static int bt_virnet_produce_data(struct bt_virnet *dev, void *data) +{ + WARN_ON(!dev); + WARN_ON(!data); + if (unlikely(bt_ring_is_full(dev->tx_ring))) { + pr_devel("ring is full"); + return -ENFILE; + } + + smp_wmb(); // Make sure the write order is correct + bt_ring_produce(dev->tx_ring, data); + smp_wmb(); // Make sure twrite order is correct + + wake_up(&dev->rx_queue); + return OK; +} + +/** + * register all the region + */ +static int bt_cdev_region_init(int major, int count) +{ + return register_chrdev_region(MKDEV(major, 0), count, "bt"); +} + +static struct class *bt_dev_class_create(void) +{ + struct class *cls = class_create(THIS_MODULE, "bt"); + + if (IS_ERR(cls)) { + pr_err("create struct class failed"); + return NULL; + } + return cls; +} + +static void bt_dev_class_destroy(struct class *cls) +{ + WARN_ON(!cls); + class_destroy(cls); +} + +static int bt_cdev_device_create(struct bt_cdev *dev, + struct class *cls, + u32 id) +{ + struct device *device = NULL; + dev_t devno = MKDEV(BT_DEV_MAJOR, id); + + WARN_ON(!dev); + WARN_ON(!cls); + + pr_devel("bt cdev_device_create: id=%d", id); + + dev->bt_class = cls; + + device = device_create(cls, NULL, devno, NULL, "%s%u", BT_DEV_NAME_PREFIX, id); + if (IS_ERR(device)) { + pr_err("create device failed"); + return -EIO; + } + snprintf(dev->dev_filename, sizeof(dev->dev_filename), "%s%u", BT_DEV_PATH_PREFIX, id); + return OK; +} + +static void bt_cdev_device_destroy(struct bt_cdev *dev) +{ + WARN_ON(!dev); + device_destroy(dev->bt_class, dev->cdev->dev); +} + +static struct bt_cdev *bt_cdev_create(const struct file_operations *ops, + u32 id) +{ + int ret; + int minor = id; + struct bt_cdev *dev = NULL; + struct cdev *chrdev = NULL; + + WARN_ON(!ops); + + pr_devel("bt cdev create called"); + + dev = kmalloc(sizeof(*dev), GFP_KERNEL); + if (unlikely(!dev)) { + pr_err("bt cdev_create alloc failed: oom"); + goto err1; + } + + chrdev = cdev_alloc(); + if (unlikely(!chrdev)) { + pr_err("bt cdev_create: cdev_alloc() failed: oom"); + goto err2; + } + + cdev_init(chrdev, ops); + dev->cdev = chrdev; + + ret = cdev_add(chrdev, MKDEV(BT_DEV_MAJOR, minor), 1); + if (unlikely(ret < 0)) { + pr_err("cdev add failed"); + goto err3; + } + + if (unlikely(bt_cdev_device_create(dev, bt_drv->bt_class, minor) < 0)) { + pr_err("bt cdev_device_create failed"); + goto err3; + } + return dev; + +err3: + cdev_del(chrdev); + +err2: + kfree(dev); + +err1: + return NULL; +} + +/** + * delete one char device + */ +static void bt_cdev_delete(struct bt_cdev *bt_cdev) +{ + dev_t devno; + + WARN_ON(!bt_cdev); + if (unlikely(bt_cdev)) { + devno = bt_cdev->cdev->dev; + + unregister_chrdev(MAJOR(devno), bt_cdev->dev_filename + 5); + bt_cdev_device_destroy(bt_cdev); + + cdev_del(bt_cdev->cdev); + } else { + pr_err("bt cdev_delete: cdev is null"); + return; + } +} + +/** + * create and add data char device + */ +static struct bt_io_file *bt_create_io_file(u32 id) +{ + struct bt_io_file *file = kmalloc(sizeof(*file), GFP_KERNEL); + + if (unlikely(!file)) { + pr_err("bt create_io_file alloc failed: oom"); + return NULL; + } + file->bt_cdev = bt_cdev_create(&bt_io_file_ops, id); + if (unlikely(!file->bt_cdev)) { + pr_err("bt create_io_file: create cdev failed"); + kfree(file); + return NULL; + } + atomic_set(&file->read_open_limit, 1); + atomic_set(&file->write_open_limit, 1); + return file; +} + +static struct bt_io_file **bt_create_io_files(void) +{ + int i; + struct bt_io_file **all_files = kmalloc(BT_VIRNET_MAX_NUM * sizeof(struct bt_io_file *), + GFP_KERNEL); + + if (unlikely(!all_files)) { + pr_err("bt create_io_files alloc failed: oom"); + return NULL; + } + for (i = 0; i < BT_VIRNET_MAX_NUM; ++i) + all_files[i] = bt_create_io_file(i + 1); + + return all_files; +} + +static void bt_delete_io_file(struct bt_io_file *file) +{ + if (unlikely(!file)) + return; + + bt_cdev_delete(file->bt_cdev); + kfree(file); +} + +static void bt_delete_io_files(struct bt_drv *bt_mng) +{ + int i; + + for (i = 0; i < BT_VIRNET_MAX_NUM; ++i) + bt_delete_io_file(bt_mng->io_files[i]); + + kfree(bt_mng->io_files); + bt_mng->io_files = NULL; +} + +/** + * create and add management char device + */ +static struct bt_mng_file *bt_create_mng_file(int id) +{ + struct bt_mng_file *file = kmalloc(sizeof(*file), GFP_KERNEL); + + if (unlikely(!file)) { + pr_err("bt create_mng_file: oom"); + return NULL; + } + + file->bt_cdev = bt_cdev_create(&bt_mng_file_ops, id); + if (unlikely(!file->bt_cdev)) { + pr_err("bt create_mng_file: create cdev failed"); + kfree(file); + return NULL; + } + + atomic_set(&file->open_limit, 1); + + return file; +} + +static void bt_delete_mng_file(struct bt_mng_file *file) +{ + if (unlikely(!file)) + return; + + bt_cdev_delete(file->bt_cdev); + kfree(file); +} + +/** + * unregister the region + */ +static void bt_cdev_region_destroy(int major, int count) +{ + return unregister_chrdev_region(MKDEV(major, 0), count); +} + +/** + * create one net device + */ +static struct net_device *bt_net_device_create(u32 id) +{ + struct net_device *ndev = NULL; + int err; + char ifa_name[IFNAMSIZ]; + + snprintf(ifa_name, sizeof(ifa_name), "%s%d", BT_VIRNET_NAME_PREFIX, id); + ndev = alloc_netdev(0, ifa_name, NET_NAME_UNKNOWN, ether_setup); + if (unlikely(!ndev)) { + pr_err("alloc_netdev failed"); + return NULL; + } + + ndev->netdev_ops = &bt_virnet_ops; + ndev->flags |= IFF_NOARP; + ndev->flags &= ~IFF_BROADCAST & ~IFF_MULTICAST; + ndev->min_mtu = 1; + ndev->max_mtu = ETH_MAX_MTU; + + err = register_netdev(ndev); + if (unlikely(err)) { + pr_err("create net_device failed"); + free_netdev(ndev); + return NULL; + } + + return ndev; +} + +/** + * destroy one net device + */ +static void bt_net_device_destroy(struct net_device *dev) +{ + WARN_ON(!dev); + unregister_netdev(dev); + free_netdev(dev); +} + +static struct bt_io_file *bt_get_io_file(struct bt_drv *drv, int id) +{ + WARN_ON(id < 1); + WARN_ON(id > BT_VIRNET_MAX_NUM); + return drv->io_files[id - 1]; +} + +/** + * create an virtual net_device + */ +static struct bt_virnet *bt_virnet_create(struct bt_drv *bt_mng, u32 id) +{ + struct bt_virnet *vnet = kmalloc(sizeof(*vnet), GFP_KERNEL); + + if (unlikely(!vnet)) { + pr_err("error: bt_virnet init failed"); + goto failure1; + } + + vnet->tx_ring = bt_ring_create(); + if (unlikely(!vnet->tx_ring)) { + pr_err("create ring failed"); + goto failure2; + } + + vnet->ndev = bt_net_device_create(id); + if (unlikely(!vnet->ndev)) { + pr_err("create net device failed"); + goto failure3; + } + + vnet->io_file = bt_get_io_file(bt_mng, id); + if (unlikely(!vnet->io_file)) { + pr_err("create cdev failed"); + goto failure4; + } + + init_waitqueue_head(&vnet->rx_queue); + init_waitqueue_head(&vnet->tx_queue); + + SET_STATE(vnet, BT_VIRNET_STATE_CREATED); + return vnet; + +failure4: + bt_net_device_destroy(vnet->ndev); + +failure3: + bt_ring_destroy(vnet->tx_ring); + +failure2: + kfree(vnet); + +failure1: + return NULL; +} + +static void bt_virnet_destroy(struct bt_virnet *vnet) +{ + WARN_ON(!vnet); + bt_ring_destroy(vnet->tx_ring); + bt_net_device_destroy(vnet->ndev); + + SET_STATE(vnet, BT_VIRNET_STATE_DELETED); + + kfree(vnet); +} + +static void bt_module_release(void) +{ + bt_table_destroy(bt_drv); + bt_delete_io_files(bt_drv); + bt_delete_mng_file(bt_drv->mng_file); + bt_dev_class_destroy(bt_drv->bt_class); + bt_cdev_region_destroy(BT_DEV_MAJOR, BT_VIRNET_MAX_NUM); + + kfree(bt_drv); + bt_drv = NULL; + remove_proc_entry("bt_info_proc", NULL); +} + +/** + * module init function + */ +static int __init bt_module_init(void) +{ + int mid; + struct proc_dir_entry *entry = NULL; + + pr_devel("bt module_init called"); + bt_drv = kmalloc(sizeof(*bt_drv), GFP_KERNEL); + if (unlikely(!bt_drv)) { + pr_err("module init: alloc struct bt_drv failed: oom"); + goto failure1; + } + + if (unlikely(bt_cdev_region_init(BT_DEV_MAJOR, BT_VIRNET_MAX_NUM) < 0)) { + pr_err("bt_cdev_region_init: failed"); + goto failure2; + } + + bt_drv->devices_table = bt_table_init(); + if (unlikely(!bt_drv->devices_table)) { + pr_err("bt_table_init(): failed"); + goto failure2; + } + + bt_drv->bt_class = bt_dev_class_create(); + if (IS_ERR(bt_drv->bt_class)) { + pr_err("class create failed"); + goto failure3; + } + + bt_drv->io_files = bt_create_io_files(); + + mutex_init(&bt_drv->bitmap_lock); + bt_drv->bitmap = 0; + + mutex_lock(&bt_drv->bitmap_lock); + mid = bt_get_unused_id(&bt_drv->bitmap); + pr_devel("create mng_file: get unused bit: %d", mid); + + bt_drv->mng_file = bt_create_mng_file(mid); + if (unlikely(!bt_drv->mng_file)) { + pr_err("bt_ctrl_cdev_init failed"); + mutex_unlock(&bt_drv->bitmap_lock); + goto failure4; + } + bt_set_bit(&bt_drv->bitmap, mid); + mutex_unlock(&bt_drv->bitmap_lock); + + entry = proc_create_data("bt_info_proc", 0, NULL, &bt_proc_fops, NULL); + if (unlikely(!entry)) { + pr_err("create proc data failed"); + goto failure5; + } + + return OK; + +failure5: + bt_delete_mng_file(bt_drv->mng_file); + +failure4: + bt_dev_class_destroy(bt_drv->bt_class); + +failure3: + bt_table_destroy(bt_drv); + +failure2: + kfree(bt_drv); + +failure1: + return -1; +} + +module_init(bt_module_init); +module_exit(bt_module_release); +MODULE_LICENSE("GPL"); diff --git a/newip/src/linux-5.10/drivers/net/bt/btdev.h b/newip/src/linux-5.10/drivers/net/bt/btdev.h new file mode 100644 index 0000000000000000000000000000000000000000..4c4d9654d42f261b52f907071f9a62607f5487d5 --- /dev/null +++ b/newip/src/linux-5.10/drivers/net/bt/btdev.h @@ -0,0 +1,252 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ + +#ifndef _BTDEV_H_ +#define _BTDEV_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* must include btdev_user.h first before any macro definition */ +#include "btdev_user.h" + +#define OK 0 +#define DELAY_100_MS 100 +#define MACADDR_LEN (2 * ETH_ALEN) + +#define BT_DEV_MAJOR 125 +#define BT_DEV_MINOR 0 +#define BT_RING_BUFFER_SIZE 4096 +#define STRTOLL_BASE 10 +#define BT_DEV_ID_OFFSET (sizeof(BT_DEV_PATH_PREFIX) - 1) +#define BT_STATISTIC_KTIME_MAX ULONG_MAX + +/** + * for debug + */ +#define DEBUG + +/** + * ring buffer + */ +struct bt_ring { + u32 head; + u32 tail; + u32 size; + void **data; +}; + +/** + * one char device + */ +struct bt_cdev { + struct cdev *cdev; + struct class *bt_class; + char dev_filename[BT_PATHNAME_MAX]; +}; + +struct bt_mng_file { + struct bt_cdev *bt_cdev; + atomic_t open_limit; +}; + +struct bt_io_file { + struct bt_cdev *bt_cdev; + atomic_t read_open_limit; + atomic_t write_open_limit; +}; + +/** + * virnet list + */ +struct bt_table { + struct list_head head; + struct mutex tbl_lock; // lock for table + u32 num; +}; + +/** + * bt virnet state + */ +enum bt_virnet_state { + BT_VIRNET_STATE_CREATED, + BT_VIRNET_STATE_CONNECTED, + BT_VIRNET_STATE_DISCONNECTED, + BT_VIRNET_STATE_DISABLED, + BT_VIRNET_STATE_DELETED, + BT_VIRNET_STAET_NUM +}; + +/** + * one virnet device + */ +struct bt_virnet { + struct bt_ring *tx_ring; + struct bt_io_file *io_file; + struct net_device *ndev; + struct list_head virnet_entry; + struct bt_table *bt_table_head; + enum bt_virnet_state state; + struct semaphore sem; + wait_queue_head_t rx_queue, tx_queue; +}; + +/** + * instance of the module + */ +struct bt_drv { + struct bt_table *devices_table; + struct bt_mng_file *mng_file; + struct bt_io_file **io_files; + u32 bitmap; + struct mutex bitmap_lock; // lock for bitmap + struct class *bt_class; +}; + +/** + * state to string + */ +static const char *bt_virnet_state_rep[BT_VIRNET_STAET_NUM] = { + "CREATED", + "CONNECTED", + "DISCONNECTED", + "DISABLED", + "ENABLED"}; + +/** + * inline functions + */ +static inline int bt_get_unused_id(const u32 *bitmap) +{ + int i; + + WARN_ON(!bitmap); + for (i = 0; i < BT_VIRNET_MAX_NUM + 1; ++i) { + if (!(*bitmap & (1 << i))) + return i; + } + return -1; // all used +} + +static inline void bt_set_bit(u32 *bitmap, u32 idx) +{ + WARN_ON(!bitmap); + *bitmap |= (1 << idx); +} + +static inline void bt_clear_bit(u32 *bitmap, u32 idx) +{ + WARN_ON(!bitmap); + *bitmap &= ~(1 << idx); +} + +#define SET_STATE(vn, st) bt_virnet_set_state(vn, st) +static inline void bt_virnet_set_state(struct bt_virnet *vn, + enum bt_virnet_state state) +{ + WARN_ON(!vn); + vn->state = state; +} + +static inline const struct cdev *bt_virnet_get_cdev(const struct bt_virnet *vn) +{ + WARN_ON(!vn); + return vn->io_file->bt_cdev->cdev; +} + +static inline const dev_t bt_virnet_get_cdev_number(const struct bt_virnet *vn) +{ + WARN_ON(!vn); + return vn->io_file->bt_cdev->cdev->dev; +} + +static inline const char *bt_virnet_get_cdev_name(const struct bt_virnet *vn) +{ + WARN_ON(!vn); + return vn->io_file->bt_cdev->dev_filename; +} + +static inline struct net_device *bt_virnet_get_ndev(const struct bt_virnet *vn) +{ + WARN_ON(!vn); + return vn->ndev; +} + +static inline const char *bt_virnet_get_ndev_name(const struct bt_virnet *vn) +{ + WARN_ON(!vn); + return vn->ndev->name; +} + +static inline const char *bt_virnet_get_state_rep(const struct bt_virnet *vn) +{ + WARN_ON(!vn); + return bt_virnet_state_rep[vn->state]; +} + +static inline int bt_get_total_device(const struct bt_drv *bt_drv) +{ + WARN_ON(!bt_drv); + return bt_drv->devices_table->num; +} + +static inline int bt_virnet_get_ring_packets(const struct bt_virnet *vn) +{ + int packets = 0; + + WARN_ON(!vn); + packets = vn->tx_ring->head - vn->tx_ring->tail; + if (unlikely(packets < 0)) + packets += BT_RING_BUFFER_SIZE; + + return packets; +} + +static struct bt_table *bt_table_init(void); +static int bt_table_add_device(struct bt_table *tbl, struct bt_virnet *vn); +static void bt_table_remove_device(struct bt_table *tbl, struct bt_virnet *vn); +static void bt_table_delete_all(struct bt_drv *bt_drv); +static struct bt_virnet *bt_table_find(struct bt_table *tbl, const char *ifa_name); +static void bt_table_destroy(struct bt_drv *bt_drv); +static void bt_delete_io_files(struct bt_drv *bt_mng); +static struct bt_io_file **bt_create_io_files(void); + +static struct bt_ring *bt_ring_create(void); +static int bt_ring_is_empty(const struct bt_ring *ring); +static int bt_ring_is_full(const struct bt_ring *ring); +static void *bt_ring_current(struct bt_ring *ring); +static void bt_ring_produce(struct bt_ring *ring, void *data); +static void bt_ring_consume(struct bt_ring *ring); +static void bt_ring_destroy(struct bt_ring *ring); + +static int bt_virnet_produce_data(struct bt_virnet *dev, void *data); +static struct bt_virnet *bt_virnet_create(struct bt_drv *bt_mng, u32 id); +static void bt_virnet_destroy(struct bt_virnet *vnet); + +#endif diff --git a/newip/src/linux-5.10/drivers/net/bt/btdev_user.h b/newip/src/linux-5.10/drivers/net/bt/btdev_user.h new file mode 100644 index 0000000000000000000000000000000000000000..22630db7b1327884279db1d47a926676cbc66d88 --- /dev/null +++ b/newip/src/linux-5.10/drivers/net/bt/btdev_user.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ + +#ifndef _BTDEV_USER_H_ +#define _BTDEV_USER_H_ + +#include +#include + +#define BT_VIRNET_NAME_PREFIX "btn" +#define BT_DEV_NAME_PREFIX "btdev" +#define BT_DEV_PATH_PREFIX "/dev/" BT_DEV_NAME_PREFIX + +#define BT_DEV_PATH(idx) (BT_DEV_PATH_PREFIX#idx) +#define BT_DEV_NAME(idx) (BT_DEV_NAME_PREFIX#idx) + +#define BT_DEV_NAME_MNG_FILE BT_DEV_NAME(0) +#define BT_DEV_PATH_MNG_FILE BT_DEV_PATH(0) +#define BT_DEV_NAME_IO_FILE(idx) BT_DEV_NAME(idx) +#define BT_DEV_PATH_IO_FILE(idx) BT_DEV_PATH(idx) +#define BT_VIRNET_NAME(idx) (BT_VIRNET_NAME_PREFIX#idx) + +#define BT_PATHNAME_MAX 256 +#define BT_VIRNET_MAX_NUM 16 +#define BT_VIRNET_DATA_HEAD_LEN 2 + +/** + * ioctl cmd + */ +#define BT_IOC_CREATE _IO('b', 1) +#define BT_IOC_DELETE _IO('b', 2) +#define BT_IOC_CHANGE_MTU _IO('b', 3) +#define BT_IOC_QUERY_ALL _IO('b', 4) +#define BT_IOC_DELETE_ALL _IO('b', 5) +#define BT_IOC_ENABLE _IO('b', 6) +#define BT_IOC_DISABLE _IO('b', 7) +#define BT_IOC_PEEK_PACKET _IO('b', 8) + +/** + * user space ioctl arguments + */ +struct bt_uioc_args { + char ifa_name[IFNAMSIZ]; + char cfile_name[BT_PATHNAME_MAX]; +}; + +#endif diff --git a/newip/src/linux-5.10/include/linux/newip_route.h b/newip/src/linux-5.10/include/linux/newip_route.h new file mode 100644 index 0000000000000000000000000000000000000000..d38c087369299a45afcd44a3bcbf6f8ae98ff10a --- /dev/null +++ b/newip/src/linux-5.10/include/linux/newip_route.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Linux NewIP INET implementation + * + * Based on include/uapi/linux/ipv6_route.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_NEWIP_ROUTE_H +#define _LINUX_NEWIP_ROUTE_H + +#include + +#endif + diff --git a/newip/src/linux-5.10/include/linux/nip.h b/newip/src/linux-5.10/include/linux/nip.h new file mode 100644 index 0000000000000000000000000000000000000000..65e9d9b891ba47757d126fba5edd506fe5cf7f13 --- /dev/null +++ b/newip/src/linux-5.10/include/linux/nip.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Based on include/linux/ipv6.h + * Based on include/net/sock.h + */ +#ifndef _NIP_H +#define _NIP_H + +#include +#include +#include +#include + +#define ETH_P_NEWIP 0xEADD /* NIP */ + +/* struct sock_common __sk_common */ +#define sk_nip_daddr __sk_common.nip_daddr +#define sk_nip_rcv_saddr __sk_common.nip_rcv_saddr + +/* struct request_sock req */ +#define ir_nip_rmt_addr req.__req_common.nip_daddr +#define ir_nip_loc_addr req.__req_common.nip_rcv_saddr + +struct nip_devconf { + __s32 forwarding; + __s32 mtu; + __s32 ignore_routes_with_linkdown; + + __s32 disable_nip; + __s32 nndisc_notify; + __s32 use_oif_addrs_only; + __s32 keep_addr_on_down; + + struct ctl_table_header *sysctl_header; +}; + +/* This structure contains results of exthdrs parsing + * The common CB structure: struct sk_buff->char cb[48] + * TCP CB structure : struct tcp_skb_cb + * struct tcp_skb_cb->header is union, include IPv4/IPv6/NewIP xx_skb_parm, max size is 24 + * sizeof(struct ninet_skb_parm)=19 + * sizeof(struct inet_skb_parm)=24 + * sizeof(struct inet6_skb_parm)=20 + * sizeof(struct tcp_skb_cb->exclude skb_parm)=24 |__ total size is 48, struct sk_buff->char cb[48] + * sizeof(struct tcp_skb_cb->include skb_parm)=24 | + */ +#pragma pack(1) +struct ninet_skb_parm { + struct nip_addr dstaddr; + struct nip_addr srcaddr; + u8 nexthdr; +}; +#pragma pack() + +struct tcp_nip_common { + u32 ack_retrans_num; + u32 ack_retrans_seq; + u32 nip_ssthresh; + u32 nip_ssthresh_reset; + bool nip_keepalive_enable; + u32 idle_ka_probes_out; + u32 nip_keepalive_out; + u32 last_rcv_nxt; + u32 dup_ack_cnt; + u32 keepalive_time_bak; + u32 keepalive_probes_bak; + u32 keepalive_intvl_bak; +}; + +struct tcp_nip_request_sock { + struct tcp_request_sock tcp_nip_rsk_tcp; + struct tcp_nip_common common; +}; + +struct nip_udp_sock { + struct udp_sock udp; +}; + +struct tcp_nip_sock { + struct tcp_sock tcp; + struct tcp_nip_common common; +}; + +#endif /* _NIP_H */ diff --git a/newip/src/linux-5.10/include/linux/nip_icmp.h b/newip/src/linux-5.10/include/linux/nip_icmp.h new file mode 100644 index 0000000000000000000000000000000000000000..bb67221e2be007f64c3b9e4a27481e1a765cef94 --- /dev/null +++ b/newip/src/linux-5.10/include/linux/nip_icmp.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the NewIP ICMP protocol. + * + * Based on include/linux/icmp.h + */ +#ifndef _LINUX_NIP_ICMP_H +#define _LINUX_NIP_ICMP_H + +#include +#include +#include + +static inline struct nip_icmp_hdr *nip_icmp_header(const struct sk_buff *skb) +{ + return (struct nip_icmp_hdr *)skb_transport_header(skb); +} + +int nip_icmp_init(void); + +#endif diff --git a/newip/src/linux-5.10/include/net/flow_nip.h b/newip/src/linux-5.10/include/net/flow_nip.h new file mode 100644 index 0000000000000000000000000000000000000000..fe625d0b63d570fc418016d21e1aad0239d94a91 --- /dev/null +++ b/newip/src/linux-5.10/include/net/flow_nip.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP Generic internet FLOW. + * + * Based on include/net/flow.h + */ +#ifndef _NET_FLOW_NIP_H +#define _NET_FLOW_NIP_H + +#include + +struct flow_nip { + struct flowi_common __fl_common; +#define flowin_oif __fl_common.flowic_oif +#define flowin_iif __fl_common.flowic_iif + struct nip_addr daddr; + struct nip_addr saddr; + union flowi_uli uli; +#define fln_sport uli.ports.sport +#define fln_dport uli.ports.dport +} __attribute__((__aligned__(BITS_PER_LONG / 8))); + +#endif diff --git a/newip/src/linux-5.10/include/net/if_ninet.h b/newip/src/linux-5.10/include/net/if_ninet.h new file mode 100644 index 0000000000000000000000000000000000000000..347099cb38dcc846382b56ea9b828b5ad3d47c40 --- /dev/null +++ b/newip/src/linux-5.10/include/net/if_ninet.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP inet interface/address list definitions + * Linux NewIP INET implementation + * + * Based on include/net/if_inet6.h + */ +#ifndef _NET_IF_NINET_H +#define _NET_IF_NINET_H + +#include + +enum { + NINET_IFADDR_STATE_NEW, + NINET_IFADDR_STATE_DEAD, +}; + +struct ninet_ifaddr { + struct nip_addr addr; + + /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */ + __u32 valid_lft; + __u32 preferred_lft; + refcount_t refcnt; + + /* protect one ifaddr itself */ + spinlock_t lock; + + int state; + + __u32 flags; + + unsigned long cstamp; /* created timestamp */ + unsigned long tstamp; /* updated timestamp */ + + struct ninet_dev *idev; + struct nip_rt_info *rt; + + struct hlist_node addr_lst; + struct list_head if_list; + + struct rcu_head rcu; +}; + +struct ninet_dev { + struct net_device *dev; + + struct list_head addr_list; + + rwlock_t lock; + refcount_t refcnt; + __u32 if_flags; + int dead; + + struct neigh_parms *nd_parms; + struct nip_devconf cnf; + + unsigned long tstamp; /* newip InterfaceTable update timestamp */ + struct rcu_head rcu; +}; + +int ninet_gifconf(struct net_device *dev, char __user *buf, int len, int size); + +#endif diff --git a/newip/src/linux-5.10/include/net/netns/nip.h b/newip/src/linux-5.10/include/net/netns/nip.h new file mode 100644 index 0000000000000000000000000000000000000000..ed9ceb2e2806d12e28e1fdd0a64c0881dc674559 --- /dev/null +++ b/newip/src/linux-5.10/include/net/netns/nip.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP in net namespaces + * + * Based on include/net/netns/ipv6.h + */ +#ifndef __NETNS_NEWIP_H__ +#define __NETNS_NEWIP_H__ + +#include +#include + +struct ctl_table_header; + +struct netns_sysctl_newip { + int nip_rt_gc_interval; +}; +struct netns_newip { + uint32_t resv; + struct netns_sysctl_newip sysctl; + struct nip_devconf *devconf_dflt; + + struct nip_rt_info *nip_null_entry; + struct nip_rt_info *nip_broadcast_entry; + + struct dst_ops nip_dst_ops; + struct nip_fib_table *nip_fib_main_tbl; + struct nip_fib_table *nip_fib_local_tbl; +}; + +#endif + diff --git a/newip/src/linux-5.10/include/net/ninet_connection_sock.h b/newip/src/linux-5.10/include/net/ninet_connection_sock.h new file mode 100644 index 0000000000000000000000000000000000000000..31dc6066df8a1eff5408983ae0993fc0f22ecb52 --- /dev/null +++ b/newip/src/linux-5.10/include/net/ninet_connection_sock.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP NET + * Generic infrastructure for NewIP INET connection oriented protocols. + * + * Based on include/net/inet_connection_sock.h + */ +#ifndef _NINET_CONNECTION_SOCK_H +#define _NINET_CONNECTION_SOCK_H + +#include +#include +#include + +struct inet_bind_bucket; +struct request_sock; +struct sk_buff; +struct sock; +struct sockaddr; + +int ninet_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl); +void ninet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, + unsigned long timeout); + +#endif /* _NINET_CONNECTION_SOCK_H */ diff --git a/newip/src/linux-5.10/include/net/ninet_hashtables.h b/newip/src/linux-5.10/include/net/ninet_hashtables.h new file mode 100644 index 0000000000000000000000000000000000000000..8171e0153af4a900d33553767349aae6b5c9aeae --- /dev/null +++ b/newip/src/linux-5.10/include/net/ninet_hashtables.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Based on include/net/inet6_hashtables.h + */ +#ifndef NINET_HASHTABLES_H +#define NINET_HASHTABLES_H + +#if IS_ENABLED(CONFIG_NEWIP) +#include +#include +#include + +#include + +#include +#include + +struct inet_hashinfo; + +int ninet_hash(struct sock *sk); +void ninet_unhash(struct sock *sk); +int ninet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk); + +int __ninet_hash(struct sock *sk, struct sock *osk); + + +static inline unsigned int __ninet_ehashfn(const u32 lhash, + const u16 lport, + const u32 fhash, + const __be16 fport, + const u32 initval) +{ + const u32 ports = (((u32) lport) << 16) | (__force u32) fport; + + return jhash_3words(lhash, fhash, ports, initval); +} + +struct sock *__ninet_lookup_established(struct net *net, + struct inet_hashinfo *hashinfo, + const struct nip_addr *saddr, + const __be16 sport, + const struct nip_addr *daddr, + const u16 hnum, const int dif); + +struct sock *ninet_lookup_listener(struct net *net, + struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + const struct nip_addr *saddr, + const __be16 sport, + const struct nip_addr *daddr, + const unsigned short hnum, const int dif, const int sdif); + +static inline struct sock *__ninet_lookup(struct net *net, + struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + const struct nip_addr *saddr, + const __be16 sport, + const struct nip_addr *daddr, + const u16 hnum, + const int dif, bool *refcounted) +{ + struct sock *sk = __ninet_lookup_established(net, hashinfo, saddr, + sport, daddr, hnum, dif); + *refcounted = true; + if (sk) + return sk; + *refcounted = false; + return ninet_lookup_listener(net, hashinfo, skb, doff, saddr, sport, + daddr, hnum, dif, 0); +} + +static inline struct sock *__ninet_lookup_skb(struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + const __be16 sport, + const __be16 dport, + int iif, bool *refcounted) +{ + struct sock *sk; + + *refcounted = true; + sk = skb_steal_sock(skb, refcounted); + if (sk) + return sk; + + return __ninet_lookup(dev_net(skb->dev), hashinfo, skb, + doff, &(NIPCB(skb)->srcaddr), sport, + &(NIPCB(skb)->dstaddr), ntohs(dport), + iif, refcounted); +} + +#define NINET_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ + (((__sk)->sk_portpair == (__ports)) && \ + ((__sk)->sk_family == AF_NINET) && \ + nip_addr_eq(&(__sk)->sk_nip_daddr, (__saddr)) && \ + nip_addr_eq(&(__sk)->sk_nip_rcv_saddr, (__daddr)) && \ + (!(__sk)->sk_bound_dev_if || \ + ((__sk)->sk_bound_dev_if == (__dif))) && \ + net_eq(sock_net(__sk), (__net))) + +int ninet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk); + +u64 secure_newip_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport); +__u32 secure_tcp_nip_sequence_number(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport); + +u32 ninet_ehashfn(const struct net *net, + const struct nip_addr *laddr, const u16 lport, + const struct nip_addr *faddr, const __be16 fport); + +#endif /* IS_ENABLED(CONFIG_NEWIP) */ +#endif /* _NINET_HASHTABLES_H */ diff --git a/newip/src/linux-5.10/include/net/nip.h b/newip/src/linux-5.10/include/net/nip.h new file mode 100644 index 0000000000000000000000000000000000000000..6343aa7a20afc0eaf5cee9af2a0986ed14ea1ebe --- /dev/null +++ b/newip/src/linux-5.10/include/net/nip.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the NewIP module. + * + * Based on include/net/ip.h + * Based on include/net/protocol.h + */ +#ifndef _NET_NEWIP_H +#define _NET_NEWIP_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "if_ninet.h" +#include "flow_nip.h" + +#define NIP_MAX_SOCKET_NUM 1024 + +struct ninet_protocol { + void (*early_demux)(struct sk_buff *skb); + + int (*handler)(struct sk_buff *skb); + + void (*err_handler)(struct sk_buff *skb, + struct ninet_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info); + unsigned int flags; +}; + +#define NIPCB(skb) ((struct ninet_skb_parm *)&(TCP_SKB_CB(skb)->header.hnip)) + +extern const struct ninet_protocol __rcu *ninet_protos[MAX_INET_PROTOS]; +extern const struct proto_ops ninet_dgram_ops; +extern const struct proto_ops ninet_stream_ops; +extern struct neigh_table nnd_tbl; + +int tcp_nip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl); +void tcp_nip_actual_send_reset(struct sock *sk, struct sk_buff *skb, u32 seq, + u32 ack_seq, u32 win, int rst, u32 priority); +int nip_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); +struct nip_rt_info *nip_dst_alloc(struct net *net, struct net_device *dev, + int flags); + +static inline bool nip_addr_and_ifindex_eq(const struct nip_addr *a1, + const struct nip_addr *a2, int ifindex1, int ifindex2) +{ + return (a1->bitlen == a2->bitlen) && (a1->bitlen <= NIP_ADDR_BIT_LEN_MAX) && + (memcmp(&a1->v.u, &a2->v.u, a1->bitlen >> 3) == 0) && (ifindex1 == ifindex2); +}; + +static inline bool nip_addr_eq(const struct nip_addr *a1, + const struct nip_addr *a2) +{ + return (a1->bitlen == a2->bitlen) && (a1->bitlen <= NIP_ADDR_BIT_LEN_MAX) && + (memcmp(&a1->v.u, &a2->v.u, a1->bitlen >> 3) == 0); +}; + +static inline u32 nip_addr_hash(const struct nip_addr *a) +{ + u32 tmp[4]; + u8 len = a->bitlen >> 3; + + /* set unused bit to 0 */ + memset(tmp, 0, NIP_ADDR_BIT_LEN_16); + memcpy(tmp, &a->v.u, + len > NIP_ADDR_BIT_LEN_16 ? NIP_ADDR_BIT_LEN_16 : len); + + return (__force u32)(tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3]); +} + +int nip_send_skb(struct sk_buff *skb); + +void ninet_destroy_sock(struct sock *sk); +int nip_datagram_dst_update(struct sock *sk, bool fix_sk_saddr); +int ninet_add_protocol(const struct ninet_protocol *prot, unsigned char protocol); +int ninet_del_protocol(const struct ninet_protocol *prot, unsigned char protocol); +int ninet_register_protosw(struct inet_protosw *p); +void ninet_unregister_protosw(struct inet_protosw *p); +int nip_input(struct sk_buff *skb); +int nip_output(struct net *net, struct sock *sk, struct sk_buff *skb); +int nip_forward(struct sk_buff *skb); + +unsigned int tcp_nip_sync_mss(struct sock *sk, u32 pmtu); +unsigned int tcp_nip_current_mss(struct sock *sk); +int tcp_nip_send_mss(struct sock *sk, int *size_goal, int flags); + +struct nip_addr *nip_nexthop(struct nip_rt_info *rt, struct nip_addr *daddr); +struct dst_entry *nip_sk_dst_lookup_flow(struct sock *sk, struct flow_nip *fln); +struct dst_entry *nip_dst_lookup_flow(struct net *net, const struct sock *sk, + struct flow_nip *fln, + const struct nip_addr *final_dst); +u_char *nip_get_mac(struct nip_addr *nipaddr, struct net_device *dev); +struct net_device *nip_get_defaultdev(void); +int nip_init_dev(void); + +int _nip_udp_output(struct sock *sk, void *from, int datalen, + int transhdrlen, const struct nip_addr *saddr, + ushort sport, const struct nip_addr *daddr, + ushort dport, struct dst_entry *dst); + +/* functions defined in nip_sockglue.c */ +int nip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen); +int nip_getsockopt(struct sock *sk, int level, + int optname, char __user *optval, int __user *optlen); + +/* functions defined in nip_addrconf.c */ +int nip_addrconf_get_ifaddr(struct net *net, unsigned int cmd, void __user *arg); + +#endif diff --git a/newip/src/linux-5.10/include/net/nip_addrconf.h b/newip/src/linux-5.10/include/net/nip_addrconf.h new file mode 100644 index 0000000000000000000000000000000000000000..84552723acbab9e2c449700f34a741562bec6c72 --- /dev/null +++ b/newip/src/linux-5.10/include/net/nip_addrconf.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Based on include/net/addrconf.h + */ +#ifndef _NIP_ADDRCONF_H +#define _NIP_ADDRCONF_H + +#include +#include + +#include +#include +#include + +#define ADDRCONF_NOTIFY_PRIORITY 0 +#define NIN_ADDR_HSIZE_SHIFT (4) +#define NIN_ADDR_HSIZE (1 << NIN_ADDR_HSIZE_SHIFT) + +#define DST_HOST 0x0001 /* NIP */ + +int nip_addrconf_add_ifaddr(struct net *net, void __user *arg); +int nip_addrconf_del_ifaddr(struct net *net, void __user *arg); + +int nip_dev_get_saddr(struct net *net, const struct net_device *dev, + const struct nip_addr *daddr, struct nip_addr *saddr); + +int nip_addrconf_init(void); +void nip_addrconf_cleanup(void); +void nip_addr_to_str(const struct nip_addr *addr, unsigned char *buf, int buf_len); + +/** + * __nin_dev_get - get ninet_dev pointer from netdevice + * @dev: network device + * + * Caller must hold rcu_read_lock or RTNL, because this function + * does not take a reference on the ninet_dev. + */ +static inline struct ninet_dev *__nin_dev_get(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->nip_ptr); +} + +/** + * nin_dev_get - get ninet_dev pointer from netdevice + * @dev: network device + */ +static inline struct ninet_dev *nin_dev_get(const struct net_device *dev) +{ + struct ninet_dev *idev; + + rcu_read_lock(); + idev = rcu_dereference(dev->nip_ptr); + if (idev) + refcount_inc(&idev->refcnt); + rcu_read_unlock(); + return idev; +} + +static inline struct neigh_parms *__nin_dev_nd_parms_get_rcu( + const struct net_device *dev) +{ + struct ninet_dev *idev = __nin_dev_get(dev); + + return idev ? idev->nd_parms : NULL; +} + +void nin_dev_finish_destroy(struct ninet_dev *idev); + +static inline void nin_dev_put(struct ninet_dev *idev) +{ + if (refcount_dec_and_test(&idev->refcnt)) + nin_dev_finish_destroy(idev); +} + +static inline void nin_dev_put_clear(struct ninet_dev **pidev) +{ + struct ninet_dev *idev = *pidev; + + if (idev) { + nin_dev_put(idev); + *pidev = NULL; + } +} + +static inline void __nin_dev_put(struct ninet_dev *idev) +{ + refcount_dec(&idev->refcnt); +} + +static inline void nin_dev_hold(struct ninet_dev *idev) +{ + refcount_inc(&idev->refcnt); +} + +void ninet_ifa_finish_destroy(struct ninet_ifaddr *ifp); + +static inline void nin_ifa_put(struct ninet_ifaddr *ifp) +{ + if (refcount_dec_and_test(&ifp->refcnt)) + ninet_ifa_finish_destroy(ifp); +} + +static inline void __nin_ifa_put(struct ninet_ifaddr *ifp) +{ + refcount_dec(&ifp->refcnt); +} + +static inline void nin_ifa_hold(struct ninet_ifaddr *ifp) +{ + refcount_inc(&ifp->refcnt); +} + +#endif diff --git a/newip/src/linux-5.10/include/net/nip_fib.h b/newip/src/linux-5.10/include/net/nip_fib.h new file mode 100644 index 0000000000000000000000000000000000000000..0d24d932d0717ba44e5cf74a45f46d684889b900 --- /dev/null +++ b/newip/src/linux-5.10/include/net/nip_fib.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Linux NewIP INET implementation + * + * Based on include/net/ip6_fib.h + */ +#ifndef _NET_NEWIP_FIB_H +#define _NET_NEWIP_FIB_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "nip.h" +#include "flow_nip.h" + +#define NIN_ROUTE_HSIZE_SHIFT 4 +#define NIN_ROUTE_HSIZE (1 << NIN_ROUTE_HSIZE_SHIFT) + +struct nip_fib_config { + u32 fc_table; + u32 fc_metric; + int fc_ifindex; + u32 fc_flags; + u32 fc_protocol; + u32 fc_type; /* only 8 bits are used */ + + struct nip_addr fc_dst; + struct nip_addr fc_src; + struct nip_addr fc_gateway; + + struct nl_info fc_nlinfo; + unsigned long fc_expires; +}; + +struct nip_fib_node { + struct hlist_node fib_hlist; + struct nip_rt_info *nip_route_info; + struct rcu_head rcu; +}; + +struct nip_fib_table; + +struct nip_rt_info { + struct dst_entry dst; + struct dst_entry *from; + struct nip_fib_table *rt_table; + struct nip_fib_node __rcu *rt_node; + struct ninet_dev *rt_idev; + struct nip_rt_info *__percpu *rt_pcpu; + + atomic_t rt_ref; + + uint32_t rt_flags; + struct nip_addr gateway; + struct nip_addr rt_dst; + struct nip_addr rt_src; + + u32 rt_metric; + u32 rt_pmtu; + u8 rt_protocol; +}; + +static inline struct ninet_dev *nip_dst_idev(struct dst_entry *dst) +{ + return ((struct nip_rt_info *)dst)->rt_idev; +} + +struct nip_fib_table { + u32 nip_tb_id; + spinlock_t nip_tb_lock; + struct hlist_head nip_tb_head[NIN_ROUTE_HSIZE]; + unsigned int flags; +}; + +#define NIP_RT_TABLE_MAIN RT_TABLE_MAIN +#define NIP_RT_TABLE_LOCAL RT_TABLE_LOCAL + +typedef struct nip_rt_info *(*nip_pol_lookup_t) (struct net *, + struct nip_fib_table *, + struct flow_nip *, int); + +struct nip_fib_table *nip_fib_get_table(struct net *net, u32 id); + +struct dst_entry *nip_fib_rule_lookup(struct net *net, struct flow_nip *fln, + int flags, int *tbl_type, nip_pol_lookup_t lookup); + +#define NIP_RT_EXPIRES_FLAGS 12 +static inline void nip_rt_set_expires(struct nip_rt_info *rt, + unsigned long expires) +{ + rt->dst.expires = expires; + + rt->rt_flags |= NIP_RT_EXPIRES_FLAGS; +} + +static inline void nip_rt_clean_expires(struct nip_rt_info *rt) +{ + rt->rt_flags &= ~NIP_RT_EXPIRES_FLAGS; + rt->dst.expires = 0; +} + +static inline void nip_rt_put(struct nip_rt_info *rt) +{ + BUILD_BUG_ON(offsetof(struct nip_rt_info, dst) != 0); + dst_release(&rt->dst); +} + +void nip_rt_free_pcpu(struct nip_rt_info *non_pcpu_rt); + +static inline void nip_rt_hold(struct nip_rt_info *rt) +{ + atomic_inc(&rt->rt_ref); +} + +static inline void nip_rt_release(struct nip_rt_info *rt) +{ + if (atomic_dec_and_test(&rt->rt_ref)) { + nip_rt_free_pcpu(rt); + dst_dev_put(&rt->dst); + + dst_release(&rt->dst); + } +} + +int nip_fib_init(void); + +void nip_fib_gc_cleanup(void); + +struct nip_fib_node *nip_fib_locate(struct hlist_head *nip_tb_head, + const struct nip_addr *daddr); + +void nip_fib_clean_all(struct net *net, + int (*func)(struct nip_rt_info *, void *arg), void *arg); + +int nip_fib_add(struct nip_fib_table *table, struct nip_rt_info *rt); + +int nip_fib_del(struct nip_rt_info *rt_info, struct nl_info *info); + +int nip_set_route_netlink(struct net *net, struct nip_rtmsg *rtmsg); + +int nip_del_route_netlink(struct net *net, struct nip_rtmsg *rtmsg); + +#endif /* _NET_NEWIP_FIB_H */ diff --git a/newip/src/linux-5.10/include/net/nip_route.h b/newip/src/linux-5.10/include/net/nip_route.h new file mode 100644 index 0000000000000000000000000000000000000000..870c093cf786cf762c803e193b8b7057aad93698 --- /dev/null +++ b/newip/src/linux-5.10/include/net/nip_route.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Based on include/net/ip6_route.h + */ +#ifndef _NET_NIP_ROUTE_H +#define _NET_NIP_ROUTE_H + +#include +#include "nip_fib.h" +#include "nip_addrconf.h" + +#define NIP_RT_PRIO_USER 1024 + +struct nip_rt_info *nip_addrconf_dst_alloc(struct ninet_dev *idev, + const struct nip_addr *addr); + + +int nip_route_input(struct sk_buff *skb); +struct dst_entry *nip_route_input_lookup(struct net *net, + struct net_device *dev, + struct flow_nip *fln, int flags, int *tbl_type); + +struct dst_entry *nip_route_output_flags(struct net *net, const struct sock *sk, + struct flow_nip *fln, int flags); + + +static inline struct dst_entry *nip_route_output(struct net *net, + const struct sock *sk, + struct flow_nip *fln) +{ + return nip_route_output_flags(net, sk, fln, 0); +} + +struct nip_rt_info *nip_pol_route(struct net *net, struct nip_fib_table *table, + int oif, struct flow_nip *fln, int flags); + +bool nip_bind_addr_check(struct net *net, + struct nip_addr *addr); + +int nip_ins_rt(struct nip_rt_info *rt); +int nip_del_rt(struct nip_rt_info *rt); + +static inline int nip_route_get_saddr(struct net *net, struct nip_rt_info *rt, + const struct nip_addr *daddr, + struct nip_addr *saddr) +{ + struct ninet_dev *idev = + rt ? nip_dst_idev((struct dst_entry *)rt) : NULL; + int err = 0; + + err = nip_dev_get_saddr(net, idev ? idev->dev : NULL, daddr, saddr); + + return err; +} + +void nip_rt_ifdown(struct net *net, struct net_device *dev); + +int nip_route_ioctl(struct net *net, unsigned int cmd, struct nip_rtmsg *rtmsg); + +int nip_route_init(void); + +void nip_route_cleanup(void); + +#endif /* _NET_NIP_ROUTE_H */ diff --git a/newip/src/linux-5.10/include/net/nip_udp.h b/newip/src/linux-5.10/include/net/nip_udp.h new file mode 100644 index 0000000000000000000000000000000000000000..571de5a45ffb0ef5e4f639c1d8aff0c7a56383f9 --- /dev/null +++ b/newip/src/linux-5.10/include/net/nip_udp.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the NewIP UDP module. + * + * Based on include/net/udp.h + */ +#ifndef _NET_NEWIP_UDP_H +#define _NET_NEWIP_UDP_H + +#include +#include +#include +#include +#include + +#define NIP_UDP_HSLOT_COUNT 10 + +int nip_udp_init(void); + +int nip_udp_output(struct sock *sk, struct msghdr *msg, size_t len); + +int nip_udp_input(struct sk_buff *skb); +int nip_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len); + +#endif diff --git a/newip/src/linux-5.10/include/net/nndisc.h b/newip/src/linux-5.10/include/net/nndisc.h new file mode 100644 index 0000000000000000000000000000000000000000..d8c776f8377293db8f3d3ed140ffd941a3f34716 --- /dev/null +++ b/newip/src/linux-5.10/include/net/nndisc.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Based on include/net/ndisc.h + */ +#ifndef _NNDISC_H +#define _NNDISC_H + +#include +#include +#include +#include +#include +#include +#include + +#define NEWIP_NEIGH_BUCKET_MAX 8 +extern struct neigh_table nnd_tbl; + +#define NIP_ARP_NS 0x01 /* ARP request */ +#define NIP_ARP_NA 0x02 /* ARP response */ + +struct nnd_msg { + struct nip_icmp_hdr icmph; + __u8 data[0]; +}; + +static inline bool neigh_key_eq800(const struct neighbour *n, const void *pkey) +{ + struct nip_addr *a1, *a2; + + a1 = (struct nip_addr *)(pkey); + a2 = (struct nip_addr *)(n->primary_key); + +#define RIGHT_POS_3 3 + return a1->bitlen == a2->bitlen && a1->bitlen <= NIP_ADDR_BIT_LEN_MAX && + memcmp(&a1->v.u, &a2->v.u, a1->bitlen >> RIGHT_POS_3) == 0; +} + +static inline u32 nndisc_hashfn(const void *pkey, const struct net_device *dev, + __u32 *hash_rnd) +{ + return (*(int *)pkey % NEWIP_NEIGH_BUCKET_MAX); +} + +static inline struct neighbour *__nip_neigh_lookup_noref(struct net_device *dev, + const void *pkey) +{ + return ___neigh_lookup_noref(&nnd_tbl, neigh_key_eq800, nndisc_hashfn, + pkey, dev); +} + +static inline struct neighbour *__nip_neigh_lookup(struct net_device *dev, + const void *pkey) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __nip_neigh_lookup_noref(dev, pkey); + if (n && !refcount_inc_not_zero(&n->refcnt)) + n = NULL; + rcu_read_unlock_bh(); + + return n; +} + +int nndisc_rcv(struct sk_buff *skb); + +int nndisc_init(void); + +#endif diff --git a/newip/src/linux-5.10/include/net/tcp_nip.h b/newip/src/linux-5.10/include/net/tcp_nip.h new file mode 100644 index 0000000000000000000000000000000000000000..8d112abb300e52e1d8c2d672237dbe5f3be7567c --- /dev/null +++ b/newip/src/linux-5.10/include/net/tcp_nip.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the NewIP TCP module. + * + * Based on include/net/tcp.h + * Based on include/linux/tcp.h + */ +#ifndef _TCP_NIP_H +#define _TCP_NIP_H + +#define FASTRETRANS_DEBUG 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern struct proto tcp_nip_prot; + +#define TCP_HDR_LEN_OFFSET 6 +#define TCP_HDR_LEN_POS_PAYLOAD 12 +#define TCP_NIP_4BYTE_PAYLOAD 2 + +#define TCP_OPT_MSS_PAYLOAD 24 +#define TCP_OLEN_MSS_PAYLOAD 16 + +#define TCP_NUM_2 2 +#define TCP_NUM_4 4 + +#define TCP_ARRAY_INDEX_2 2 + +#define TCP_NIP_KEEPALIVE_CYCLE_MS_DIVISOR 20 /* 1 HZ = 1 seconds */ +#define TCP_NIP_CSK_KEEPALIVE_CYCLE 10 /* 1 HZ = 1 seconds */ + +#define TCP_NIP_WINDOW_MAX 65535U + +#define TCP_NIP_WRITE_TIMER_DEFERRED (TCP_MTU_REDUCED_DEFERRED + 1) +#define TCP_NIP_DELACK_TIMER_DEFERRED (TCP_NIP_WRITE_TIMER_DEFERRED + 1) + +/* init */ +int tcp_nip_init(void); +void tcp_nip_exit(void); + +void tcp_nip_done(struct sock *sk); +int tcp_direct_connect(struct sock *sk, void __user *arg); +void tcp_nip_rcv_established( + struct sock *sk, + struct sk_buff *skb, + const struct tcphdr *th, + unsigned int len); + +void __tcp_nip_push_pending_frames( + struct sock *sk, + unsigned int cur_mss, + int nonagle); + +u32 __nip_tcp_select_window(struct sock *sk); +unsigned short nip_get_output_checksum_tcp(struct sk_buff *skb, struct nip_addr src_addr, + struct nip_addr dst_addr); +void tcp_nip_rearm_rto(struct sock *sk); + +int tcp_nip_rcv_state_process(struct sock *sk, struct sk_buff *skb); + +/* tcp_nip_output */ +int tcp_nip_transmit_skb( + struct sock *sk, + struct sk_buff *skb, + int clone_it, + gfp_t gfp_mask); +int __tcp_nip_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); +int tcp_nip_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); +void tcp_nip_send_fin(struct sock *sk); +void tcp_nip_send_active_reset(struct sock *sk, gfp_t priority); +void tcp_nip_send_probe0(struct sock *sk); +int tcp_nip_write_wakeup(struct sock *sk, int mib); + +/* tcp_nip_timer */ +void tcp_nip_init_xmit_timers(struct sock *sk); +void tcp_nip_clear_xmit_timers(struct sock *sk); +void tcp_nip_delack_timer_handler(struct sock *sk); +void tcp_nip_write_timer_handler(struct sock *sk); + +static inline struct sk_buff *tcp_nip_send_head(const struct sock *sk) +{ + return sk->sk_send_head; +} + +static inline void tcp_nip_add_write_queue_tail( + struct sock *sk, + struct sk_buff *skb) +{ + __skb_queue_tail(&sk->sk_write_queue, skb); + + if (sk->sk_send_head == NULL) + sk->sk_send_head = skb; +} + +static inline void tcp_nip_write_queue_purge(struct sock *sk) +{ + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { + tcp_skb_tsorted_anchor_cleanup(skb); + sk_wmem_free_skb(sk, skb); + } + + tcp_clear_all_retrans_hints(tcp_sk(sk)); + sk->sk_send_head = NULL; + tcp_sk(sk)->packets_out = 0; + inet_csk(sk)->icsk_backoff = 0; +} + +static inline bool tcp_nip_write_queue_empty(struct sock *sk) +{ + return skb_queue_empty(&sk->sk_write_queue); +} + +static inline struct tcp_nip_sock *tcp_nip_sk(const struct sock *sk) +{ + return (struct tcp_nip_sock *)sk; +} + +static inline struct tcp_nip_request_sock *tcp_nip_rsk(const struct request_sock *req) +{ + return (struct tcp_nip_request_sock *)req; +} + +/* connect */ +int __tcp_nip_connect(struct sock *sk); +int _tcp_nip_conn_request(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct sk_buff *skb); +struct sk_buff *tcp_nip_make_synack( + const struct sock *sk, + struct dst_entry *dst, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + enum tcp_synack_type synack_type); +int nip_send_synack(struct request_sock *req, struct sk_buff *skb); +struct sock *tcp_nip_check_req(struct sock *sk, struct sk_buff *skb, + struct request_sock *req); +int tcp_nip_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb); +int tcp_nip_rtx_synack(const struct sock *sk, struct request_sock *req); + +/* client send ack */ +void tcp_nip_send_ack(struct sock *sk); +struct sock *tcp_nip_create_openreq_child(const struct sock *sk, + struct request_sock *req, + struct sk_buff *skb); +void tcp_nip_initialize_rcv_mss(struct sock *sk); + +/* release */ +void tcp_nip_release_cb(struct sock *sk); + +void tcp_nip_keepalive_enable(struct sock *sk); +void tcp_nip_keepalive_disable(struct sock *sk); + +#endif /* _NIP_TCP_H */ diff --git a/newip/src/linux-5.10/include/net/transp_nip.h b/newip/src/linux-5.10/include/net/transp_nip.h new file mode 100644 index 0000000000000000000000000000000000000000..35fe52c298aa33a1567bd8f121c322bc41234ab2 --- /dev/null +++ b/newip/src/linux-5.10/include/net/transp_nip.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Based on include/net/transp_v6.h + */ +#ifndef _TRANSP_NIP_H +#define _TRANSP_NIP_H + +extern struct proto nip_udp_prot; + +int nip_udp_init(void); +void nip_udp_exit(void); + +int nip_udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); + +void nip_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb); +void nip_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb); +void nip_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb); + +void nip_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, + __u16 destp, int bucket); + +void ninet_destroy_sock(struct sock *sk); + +#endif diff --git a/newip/src/linux-5.10/include/uapi/linux/newip_route.h b/newip/src/linux-5.10/include/uapi/linux/newip_route.h new file mode 100644 index 0000000000000000000000000000000000000000..15495b3a9a8292401f7a28051e8059c220abb7e0 --- /dev/null +++ b/newip/src/linux-5.10/include/uapi/linux/newip_route.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Linux NewIP INET implementation + * + * Based on include/uapi/linux/ipv6_route.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _UAPI_LINUX_NEWIP_ROUTE_H +#define _UAPI_LINUX_NEWIP_ROUTE_H + +#include "nip_addr.h" + +struct nip_rtmsg { + struct nip_addr rtmsg_dst; + struct nip_addr rtmsg_src; + struct nip_addr rtmsg_gateway; + char dev_name[10]; + unsigned int rtmsg_type; + int rtmsg_ifindex; + unsigned int rtmsg_metric; + unsigned long rtmsg_info; + unsigned int rtmsg_flags; +}; +#endif /* _UAPI_LINUX_NEWIP_ROUTE_H */ diff --git a/newip/src/linux-5.10/include/uapi/linux/nip.h b/newip/src/linux-5.10/include/uapi/linux/nip.h new file mode 100644 index 0000000000000000000000000000000000000000..cd96325682c05fa5d1abb7530e685e3d78f14b1e --- /dev/null +++ b/newip/src/linux-5.10/include/uapi/linux/nip.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Based on include/uapi/linux/ipv6.h + * Based on include/uapi/linux/in6.h + */ +#ifndef _UAPI_NEWIP_H +#define _UAPI_NEWIP_H + +#include +#include +#include +#include "nip_addr.h" +#include + +struct nip_ifreq { + struct nip_addr ifrn_addr; + int ifrn_ifindex; +}; + +/* The following structure must be larger than V4. System calls use V4. + * If the definition is smaller than V4, the read process will have memory overruns + * v4: include\linux\socket.h --> sockaddr (16Byte) + */ +#define POD_SOCKADDR_SIZE 3 +struct sockaddr_nin { + unsigned short sin_family; /* [2Byte] AF_NINET */ + unsigned short sin_port; /* [2Byte] Transport layer port, big-endian */ + struct nip_addr sin_addr; /* [9Byte] NIP address */ + + unsigned char sin_zero[POD_SOCKADDR_SIZE]; /* [3Byte] Byte alignment */ +}; + +struct nip_devreq { + char nip_ifr_name[IFNAMSIZ]; /* if name, e.g. "eth0", "wlan0" */ + + union { + struct sockaddr_nin addr; + short flags; + } devreq; +}; + +#define nip_dev_addr devreq.addr /* nip address */ +#define nip_dev_flags devreq.flags /* net device flags */ + +#endif /* _UAPI_NEWIP_H */ diff --git a/newip/src/linux-5.10/include/uapi/linux/nip_icmp.h b/newip/src/linux-5.10/include/uapi/linux/nip_icmp.h new file mode 100644 index 0000000000000000000000000000000000000000..7970fe14624e77d48cec6a726d5481efd1555e01 --- /dev/null +++ b/newip/src/linux-5.10/include/uapi/linux/nip_icmp.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the NewIP ICMP protocol. + * + * Based on include/uapi/linux/icmp.h + */ +#ifndef _UAPI_LINUX_NIP_ICMP_H +#define _UAPI_LINUX_NIP_ICMP_H + +#include +#include + +struct nip_icmp_hdr { + __u8 nip_icmp_type; + __u8 nip_icmp_code; + __sum16 nip_icmp_cksum; +}; + +#endif diff --git a/newip/src/linux-5.10/net/newip/Kconfig b/newip/src/linux-5.10/net/newip/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..603a4208ddc60b01bb0e2d1243ef765ab020c9c5 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/Kconfig @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2022 Huawei Device Co., Ltd. +# + +# net/newip/Kconfig +config NEWIP + def_bool $(success,$(srctree)/scripts/ohos-check-dir.sh $(srctree)/net/newip) + depends on INET && NET + help + Support for NewIP. + +config NEWIP_FAST_KEEPALIVE + bool "NewIP fast keepalive" + default n + depends on NEWIP + help + Support for NewIP fast keepalive. + +config NEWIP_HOOKS + def_bool NEWIP && VENDOR_HOOKS + help + Enable NewIP hooks implemented as tracepoints + Allow NewIP modules to attach to tracepoint "hooks" defined via + DECLARE_TRACE or DECLARE_HOOK \ No newline at end of file diff --git a/newip/src/linux-5.10/net/newip/Makefile b/newip/src/linux-5.10/net/newip/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..7d55621c599c7c49b6fab1a1a9803c6d83cd8f02 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2022 Huawei Device Co., Ltd. +# +# Makefile for the Linux newip layer +# + +# net/newip/Makefile +obj-$(CONFIG_NEWIP) += newip.o + +newip-objs := nip_addr.o nip_hdr_encap.o nip_hdr_decap.o nip_checksum.o af_ninet.o nip_input.o udp.o protocol.o nip_output.o nip_addrconf.o nip_addrconf_core.o route.o nip_fib.o nip_fib_rules.o nndisc.o icmp.o tcp_nip_parameter.o devninet.o +newip-objs += tcp_nip.o ninet_connection_sock.o ninet_hashtables.o tcp_nip_output.o tcp_nip_input.o tcp_nip_timer.o nip_sockglue.o + +newip-objs += nip_hooks_register.o + diff --git a/newip/src/linux-5.10/net/newip/af_ninet.c b/newip/src/linux-5.10/net/newip/af_ninet.c new file mode 100644 index 0000000000000000000000000000000000000000..f35ea70f4adfe59119a9d1c9b50642ce0e7cccfe --- /dev/null +++ b/newip/src/linux-5.10/net/newip/af_ninet.c @@ -0,0 +1,773 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET socket protocol family + * Linux NewIP INET implementation + * + * Based on linux/net/ipv6/af_inet6.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for signal_pending() */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_NEWIP_HOOKS +#include +#include "nip_hooks_register.h" +#endif +#include "tcp_nip_parameter.h" + +MODULE_DESCRIPTION("NewIP protocol stack"); + +/* The inetsw_nip table contains everything that ninet_create needs to + * build a new socket + */ +static struct list_head inetsw_nip[SOCK_MAX]; +static DEFINE_SPINLOCK(inetsw_nip_lock); +/* count the socket number */ +atomic_t g_nip_socket_number = ATOMIC_INIT(0); + +static int disable_nip_mod; +module_param_named(disable, disable_nip_mod, int, 0444); +MODULE_PARM_DESC(disable, "Disable NewIP module such that it is non_functional"); + +bool newip_mod_enabled(void) +{ + return disable_nip_mod == 0; +} +EXPORT_SYMBOL_GPL(newip_mod_enabled); + +static int ninet_create(struct net *net, struct socket *sock, int protocol, + int kern) +{ + struct inet_sock *inet; + struct sock *sk; + struct inet_protosw *answer; + struct proto *answer_prot; + unsigned char answer_flags; + int err; + int num; + + if (protocol < 0 || + protocol >= IPPROTO_MAX || + sock->type >= SOCK_MAX) + return -EINVAL; + + num = atomic_add_return(1, &g_nip_socket_number); + if (num > NIP_MAX_SOCKET_NUM) { + nip_dbg("The number of socket is biger than %u", NIP_MAX_SOCKET_NUM); + err = -EPERM; + goto number_sub; + } + + sock->state = SS_UNCONNECTED; + /* look for the requested type/protocol pair. */ + err = -ESOCKTNOSUPPORT; + rcu_read_lock(); + list_for_each_entry_rcu(answer, &inetsw_nip[sock->type], list) { + err = 0; + /* Check the non-wild matcg */ + if (protocol == answer->protocol) { + if (protocol != IPPROTO_IP) + break; + } else { + /* check for the two wild case. */ + if (protocol == IPPROTO_IP) { + protocol = answer->protocol; + break; + } + if (answer->protocol == IPPROTO_IP) + break; + } + err = -EPROTONOSUPPORT; + } + + if (err) + goto out_rcu_unlock; + + sock->ops = answer->ops; + answer_prot = answer->prot; + answer_flags = answer->flags; + rcu_read_unlock(); + + WARN_ON(!answer_prot->slab); + + err = -ENOBUFS; + sk = sk_alloc(net, PF_NINET, GFP_KERNEL, answer_prot, kern); + if (!sk) + goto number_sub; + + sock_init_data(sock, sk); + + err = 0; + if (answer_flags & INET_PROTOSW_REUSE) + sk->sk_reuse = SK_CAN_REUSE; + inet = inet_sk(sk); + inet->is_icsk = (answer_flags & INET_PROTOSW_ICSK) != 0; + inet->nodefrag = 0; + + if (sock->type == SOCK_RAW) { + inet->inet_num = protocol; + if (protocol == IPPROTO_RAW) + inet->hdrincl = 1; + } + + sk->sk_destruct = inet_sock_destruct; + sk->sk_family = PF_NINET; + sk->sk_protocol = protocol; + sk->sk_backlog_rcv = answer->prot->backlog_rcv; + sk->sk_nip_daddr = nip_any_addr; + sk->sk_nip_rcv_saddr = nip_any_addr; + + inet->uc_ttl = -1; + inet->mc_loop = 1; + inet->mc_ttl = 1; + inet->mc_all = 1; + inet->mc_index = 0; + inet->mc_list = NULL; + inet->rcv_tos = 0; + sk_refcnt_debug_inc(sk); + + if (inet->inet_num) { + inet->inet_sport = htons(inet->inet_num); + err = sk->sk_prot->hash(sk); + if (err) { + sk_common_release(sk); + goto number_sub; + } + } + if (sk->sk_prot->init) { + err = sk->sk_prot->init(sk); + if (err) { + sk_common_release(sk); + goto number_sub; + } + } +out: + nip_dbg("The final number of socket is: %d", num); + return err; +out_rcu_unlock: + rcu_read_unlock(); +number_sub: + atomic_dec_if_positive(&g_nip_socket_number); + num = atomic_read(&g_nip_socket_number); + nip_dbg("[error] The final number of socket is: %d (after dec)", num); + goto out; +} + +int ninet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +{ + struct sockaddr_nin *addr = (struct sockaddr_nin *)uaddr; + struct sock *sk = sock->sk; + struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); + u_short snum; + int err = 0; + + /* If the socket has its own bind function then use it */ + if (sk->sk_prot->bind) + return sk->sk_prot->bind(sk, uaddr, addr_len); + + if (addr_len < sizeof(struct sockaddr_nin)) + return -EINVAL; + + snum = ntohs(addr->sin_port); + if (snum && snum < PROT_SOCK) + return -EACCES; + + if (nip_bind_addr_check(net, &addr->sin_addr) == false) { + nip_dbg("binding-addr invalid, bitlen=%u", addr->sin_addr.bitlen); + return -EADDRNOTAVAIL; + } + lock_sock(sk); + + /* check these errors (active socket, double bind) */ + if (sk->sk_state != TCP_CLOSE || inet->inet_num) { + err = -EINVAL; + goto out; + } + + sk->sk_nip_rcv_saddr = addr->sin_addr; + + /* make sure we are allowed to bind here */ + if ((snum || !inet->bind_address_no_port) && + sk->sk_prot->get_port(sk, snum)) { + inet->inet_saddr = 0; + err = -EADDRINUSE; + goto out; + } + inet->inet_sport = htons(inet->inet_num); + inet->inet_daddr = 0; + inet->inet_dport = 0; + sk_dst_reset(sk); + +out: + release_sock(sk); + return err; +} + +/* Function + * Move a socket into listening state. + * Parameter + * sock: The socket + * backlog: Specifies the number of clients that use a three-way handshake + * to establish a TCP connection + */ +int ninet_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + unsigned char old_state; + int err; + + lock_sock(sk); + + err = -EINVAL; + if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) + goto out; + + old_state = sk->sk_state; + if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN))) + goto out; + + WRITE_ONCE(sk->sk_max_ack_backlog, backlog); + /* Really, if the socket is already in listen state + * we can only allow the backlog to be adjusted. + */ + if (old_state != TCP_LISTEN) { + err = inet_csk_listen_start(sk, backlog); + if (err) + goto out; + } + err = 0; + +out: + release_sock(sk); + return err; +} + +int ninet_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (!sk) + return -EINVAL; + + atomic_dec_if_positive(&g_nip_socket_number); + return inet_release(sock); +} + +void ninet_destroy_sock(struct sock *sk) +{ + ; +} + +int ninet_getname(struct socket *sock, struct sockaddr *uaddr, + int peer) +{ + struct sock *sk = sock->sk; + struct inet_sock *inet = inet_sk(sk); + DECLARE_SOCKADDR(struct sockaddr_nin *, sin, uaddr); + + sin->sin_family = AF_NINET; + if (peer) { + if (!inet->inet_dport) + return -ENOTCONN; + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && + peer == 1) + return -ENOTCONN; + sin->sin_port = inet->inet_dport; + sin->sin_addr = sk->sk_nip_daddr; + } else { + sin->sin_port = inet->inet_sport; + sin->sin_addr = sk->sk_nip_rcv_saddr; + } + return sizeof(*sin); +} + +static long ninet_wait_for_connect(struct sock *sk, long timeo, int writebias) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + add_wait_queue(sk_sleep(sk), &wait); + sk->sk_write_pending += writebias; + + /* Basic assumption: if someone sets sk->sk_err, he _must_ + * change state of the socket from TCP_SYN_*. + * Connect() does not allow to get error notifications + * without closing the socket. + */ + while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { + release_sock(sk); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + lock_sock(sk); + if (signal_pending(current) || !timeo) + break; + } + remove_wait_queue(sk_sleep(sk), &wait); + sk->sk_write_pending -= writebias; + return timeo; +} + +/* Function + * The client socket layer is used to establish connection requests + * Parameter + * sock: The socket + * uaddr:The destination address + */ +int __ninet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + struct sock *sk = sock->sk; + int err; + long timeo; + + if (uaddr) { + if (addr_len < sizeof(uaddr->sa_family)) + return -EINVAL; + } + + switch (sock->state) { + default: + err = -EINVAL; + goto out; + case SS_CONNECTED: + err = -EISCONN; + goto out; + case SS_CONNECTING: + err = -EALREADY; + break; + case SS_UNCONNECTED: + err = -EISCONN; + if (sk->sk_state != TCP_CLOSE) + goto out; + /* Call the tcp_nip_connect function */ + err = sk->sk_prot->connect(sk, uaddr, addr_len); + if (err < 0) + goto out; + /* Switch to connecting, and then perform subsequent operations */ + sock->state = SS_CONNECTING; + err = -EINPROGRESS; + break; + } + + /* Get blocking time */ + timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); + if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { + int writebias = 0; + /* Error code is set above */ + if (!timeo || !ninet_wait_for_connect(sk, timeo, writebias)) + goto out; + + err = sock_intr_errno(timeo); + if (signal_pending(current)) + goto out; + } + + if (sk->sk_state == TCP_CLOSE) + goto sock_error; + sock->state = SS_CONNECTED; + err = 0; + +out: + return err; +sock_error: + err = sock_error(sk) ? : -ECONNABORTED; + sock->state = SS_DISCONNECTING; + goto out; +} + +int ninet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + int err; + + lock_sock(sock->sk); + err = __ninet_stream_connect(sock, uaddr, addr_len, flags); + release_sock(sock->sk); + return err; +} + +int ninet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + + nip_dbg("cmd=0x%x", cmd); + switch (cmd) { + case SIOCADDRT: + case SIOCDELRT: { + struct nip_rtmsg rtmsg; + + if (copy_from_user(&rtmsg, (void __user *)arg, sizeof(rtmsg))) { + nip_dbg("fail to copy route cfg data"); + return -EFAULT; + } + return nip_route_ioctl(net, cmd, &rtmsg); + } + case SIOCSIFADDR: + return nip_addrconf_add_ifaddr(net, (void __user *)arg); + case SIOCDIFADDR: + return nip_addrconf_del_ifaddr(net, (void __user *)arg); + case SIOCGIFADDR: + return nip_addrconf_get_ifaddr(net, cmd, (void __user *)arg); + + default: + if (!sk->sk_prot->ioctl) { + nip_dbg("sock sk_prot ioctl is null, cmd=0x%x", cmd); + return -ENOIOCTLCMD; + } + return sk->sk_prot->ioctl(sk, cmd, arg); + } +} + +#ifdef CONFIG_COMPAT +struct compat_nip_rtmsg { + struct nip_addr rtmsg_dst; + struct nip_addr rtmsg_src; + struct nip_addr rtmsg_gateway; + char dev_name[10]; + unsigned int rtmsg_type; + int rtmsg_ifindex; + unsigned int rtmsg_metric; + unsigned int rtmsg_info; /* long convert to int */ + unsigned int rtmsg_flags; +}; + +static int ninet_compat_routing_ioctl(struct sock *sk, unsigned int cmd, + struct compat_nip_rtmsg __user *ur) +{ + struct nip_rtmsg rt; + + if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst, 3 * sizeof(struct nip_addr)) || + copy_from_user(&rt.dev_name, &ur->dev_name, sizeof(rt.dev_name)) || + get_user(rt.rtmsg_type, &ur->rtmsg_type) || + get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex) || + get_user(rt.rtmsg_metric, &ur->rtmsg_metric) || + get_user(rt.rtmsg_info, &ur->rtmsg_info) || + get_user(rt.rtmsg_flags, &ur->rtmsg_flags)) { + nip_dbg("fail to convert input para, cmd=0x%x", cmd); + return -EFAULT; + } + + nip_dbg("cmd=0x%x", cmd); + return nip_route_ioctl(sock_net(sk), cmd, &rt); +} + +int ninet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + void __user *argp = compat_ptr(arg); + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCADDRT: + case SIOCDELRT: + return ninet_compat_routing_ioctl(sk, cmd, argp); + default: + return -ENOIOCTLCMD; + } +} +EXPORT_SYMBOL_GPL(ninet_compat_ioctl); +#endif /* CONFIG_COMPAT */ + +/* register new IP socket */ +const struct proto_ops ninet_dgram_ops = { + .family = PF_NINET, + .owner = THIS_MODULE, + .release = ninet_release, + .bind = ninet_bind, + .connect = inet_dgram_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = ninet_getname, + .poll = datagram_poll, + .ioctl = ninet_ioctl, + .gettstamp = sock_gettstamp, + .listen = sock_no_listen, + .shutdown = inet_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = inet_sendmsg, + .recvmsg = inet_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, + .set_peek_off = sk_set_peek_off, +#ifdef CONFIG_COMPAT + .compat_ioctl = ninet_compat_ioctl, +#endif +}; + +const struct proto_ops ninet_stream_ops = { + .family = PF_NINET, + .owner = THIS_MODULE, + .release = ninet_release, + .bind = ninet_bind, + .connect = ninet_stream_connect, + .socketpair = sock_no_socketpair, + .accept = inet_accept, + .getname = ninet_getname, + .poll = tcp_poll, + .ioctl = ninet_ioctl, + .listen = ninet_listen, + .shutdown = inet_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = inet_sendmsg, + .recvmsg = inet_recvmsg, + .mmap = sock_no_mmap, + .sendpage = inet_sendpage, +#ifdef CONFIG_COMPAT + .compat_ioctl = ninet_compat_ioctl, +#endif +}; + +static const struct net_proto_family ninet_family_ops = { + .family = PF_NINET, + .create = ninet_create, + .owner = THIS_MODULE, +}; + +int ninet_register_protosw(struct inet_protosw *p) +{ + struct list_head *lh; + struct inet_protosw *answer; + struct list_head *last_perm; + int protocol = p->protocol; + int ret; + + spin_lock_bh(&inetsw_nip_lock); + + ret = -EINVAL; + if (p->type >= SOCK_MAX) + goto out_illegal; + + /* If we are trying to override a permanent protocol, bail. */ + answer = NULL; + ret = -EPERM; + last_perm = &inetsw_nip[p->type]; + list_for_each(lh, &inetsw_nip[p->type]) { + answer = list_entry(lh, struct inet_protosw, list); + + /* Check only the non-wild match. */ + if (answer->flags & INET_PROTOSW_PERMANENT) { + if (protocol == answer->protocol) + break; + last_perm = lh; + } + + answer = NULL; + } + if (answer) + goto out_permanent; + + list_add_rcu(&p->list, last_perm); + ret = 0; +out: + spin_unlock_bh(&inetsw_nip_lock); + return ret; + +out_permanent: + nip_dbg("Attempt to override permanent protocol %d", protocol); + goto out; + +out_illegal: + nip_dbg("Ignoring attempt to register invalid socket type %d", p->type); + goto out; +} + +void ninet_unregister_protosw(struct inet_protosw *p) +{ + if (INET_PROTOSW_PERMANENT & p->flags) { + nip_dbg("Attempt to unregister permanent protocol %d", p->protocol); + } else { + spin_lock_bh(&inetsw_nip_lock); + list_del_rcu(&p->list); + spin_unlock_bh(&inetsw_nip_lock); + + synchronize_net(); + } +} + +int ninet_sk_rebuild_header(struct sock *sk) +{ + return 0; +} + +/* register to data link layer */ +static struct packet_type nip_packet_type __read_mostly = { + .type = cpu_to_be16(ETH_P_NEWIP), + .func = nip_rcv, +}; + +static int __init nip_packet_init(void) +{ + dev_add_pack(&nip_packet_type); + return 0; +} + +static int __net_init ninet_net_init(struct net *net) +{ + int err = 0; + return err; +} + +static void __net_exit ninet_net_exit(struct net *net) +{ + ; +} + +static struct pernet_operations ninet_net_ops = { + .init = ninet_net_init, + .exit = ninet_net_exit, +}; + +static int __init ninet_init(void) +{ + struct list_head *r; + int err = 0; + + sock_skb_cb_check_size(sizeof(struct ninet_skb_parm)); + + nip_dbg("NET: start to init nip network"); + /* register the socket-side information for ninet_create */ + for (r = &inetsw_nip[0]; r < &inetsw_nip[SOCK_MAX]; ++r) + INIT_LIST_HEAD(r); + + if (!newip_mod_enabled()) { + nip_dbg("Loaded, but administratively disabled, reboot required to enable"); + goto out; + } + + err = proto_register(&tcp_nip_prot, 1); + if (err) + goto out; + + err = proto_register(&nip_udp_prot, 1); + if (err) { + nip_dbg("failed to register udp proto"); + goto out_udp_register_fail; + } + + err = sock_register(&ninet_family_ops); + if (err) { + nip_dbg("failed to register newip_family_ops"); + goto out_sock_register_fail; + } + + err = register_pernet_subsys(&ninet_net_ops); + if (err) { + nip_dbg("failed to register ninet_net_ops"); + goto register_pernet_fail; + } + + err = nip_icmp_init(); + if (err) { + nip_dbg("nip_icmp_init failed"); + goto nip_icmp_fail; + } + + err = nndisc_init(); + if (err) { + nip_dbg("nndisc_init failed"); + goto nndisc_fail; + } + + err = nip_route_init(); + if (err) + goto nip_route_fail; + + err = nip_addrconf_init(); + if (err) + goto nip_addr_fail; + + err = nip_udp_init(); + if (err) { + nip_dbg("failed to init udp layer"); + goto udp_fail; + } + + err = tcp_nip_init(); + if (err) { + nip_dbg("failed to init tcp layer"); + goto tcp_fail; + } else { + nip_dbg("nip_tcp_init ok"); + } + + err = nip_packet_init(); + if (err) { + nip_dbg("failed to register to l2 layer"); + goto nip_packet_fail; + } + +#ifdef CONFIG_NEWIP_HOOKS + err = ninet_hooks_register(); + if (err) { + nip_dbg("failed to register to nip hooks"); + goto nip_packet_fail; + } +#endif + nip_dbg("init newip address family ok"); + +out: + return err; + +nip_packet_fail: + tcp_nip_exit(); +tcp_fail: + nip_udp_exit(); +udp_fail: + nip_addrconf_cleanup(); +nip_addr_fail: + nip_route_cleanup(); +nip_route_fail: +nndisc_fail: +nip_icmp_fail: + unregister_pernet_subsys(&ninet_net_ops); +register_pernet_fail: + sock_unregister(PF_NINET); +out_sock_register_fail: + proto_unregister(&nip_udp_prot); +out_udp_register_fail: + nip_dbg("newip family init failed"); + goto out; +} + +module_init(ninet_init); + +MODULE_ALIAS_NETPROTO(PF_NINET); + diff --git a/newip/src/linux-5.10/net/newip/devninet.c b/newip/src/linux-5.10/net/newip/devninet.c new file mode 100644 index 0000000000000000000000000000000000000000..272cf0522526e85e13f43428b714a1aac682d220 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/devninet.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * DEVICE - NEWIP device support. + * Based on net/ipv4/devinet.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include "tcp_nip_parameter.h" + +int ninet_gifconf(struct net_device *dev, char __user *buf, int len, int size) +{ + struct ninet_dev *nin_dev = __nin_dev_get(dev); + const struct ninet_ifaddr *ifa; + struct ifreq ifr; + int done = 0; + + if (WARN_ON(size > sizeof(struct ifreq))) + goto out; + if (!nin_dev) + goto out; + + list_for_each_entry(ifa, &nin_dev->addr_list, if_list) { + ifa = rcu_dereference_protected(ifa, lockdep_is_held(&ifa->lock)); + if (!ifa) { + done = -EFAULT; + break; + } + if (!buf) { + done += size; + continue; + } + if (len < size) + break; + memset(&ifr, 0, sizeof(struct ifreq)); + strcpy(ifr.ifr_name, ifa->rt->dst.dev->name); + + (*(struct sockaddr_nin *)&ifr.ifr_addr).sin_family = AF_NINET; + memcpy(&((struct sockaddr_nin *)&ifr.ifr_addr)->sin_addr, &ifa->addr, + sizeof(struct nip_addr)); + + if (copy_to_user(buf + done, &ifr, size)) { + done = -EFAULT; + break; + } + len -= size; + done += size; + } +out: + return done; +} + diff --git a/newip/src/linux-5.10/net/newip/icmp.c b/newip/src/linux-5.10/net/newip/icmp.c new file mode 100644 index 0000000000000000000000000000000000000000..0b388effd725d0726f52c6fd239ecf9959f525f1 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/icmp.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Internet Control Message Protocol (NewIP ICMP) + * Linux NewIP INET implementation + * + * Based on net/ipv6/icmp.c + * Based on net/ipv4/af_inet.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nip_hdr.h" +#include "tcp_nip_parameter.h" + +int nip_icmp_rcv(struct sk_buff *skb) +{ + int ret = 0; + struct nip_icmp_hdr *hdr = nip_icmp_header(skb); + u8 type = hdr->nip_icmp_type; + + nip_dbg("rcv newip icmp packet. type=%u", type); + switch (type) { + case NIP_ARP_NS: + case NIP_ARP_NA: + ret = nndisc_rcv(skb); + break; + default: + nip_dbg("nip icmp packet type error"); + } + return ret; +} + +static const struct ninet_protocol nip_icmp_protocol = { + .handler = nip_icmp_rcv, + .flags = 0, +}; + +int __init nip_icmp_init(void) +{ + int ret; + + ret = ninet_add_protocol(&nip_icmp_protocol, IPPROTO_NIP_ICMP); + return ret; +} diff --git a/newip/src/linux-5.10/net/newip/ninet_connection_sock.c b/newip/src/linux-5.10/net/newip/ninet_connection_sock.c new file mode 100644 index 0000000000000000000000000000000000000000..c0dd907f11eac49b115ce753f4c2940341ff0640 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/ninet_connection_sock.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Support for NewIP INET connection oriented protocols. + * + * Based on net/ipv4/inet_connection_sock.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include "tcp_nip_parameter.h" + +/* Function + * Timeout handler for request processing, used to retransmit SYN+ACK + * Parameter + * t: Request control block + */ +static void ninet_reqsk_timer_handler(struct timer_list *t) +{ + struct request_sock *req = from_timer(req, t, rsk_timer); + struct sock *sk_listener = req->rsk_listener; + struct net *net = sock_net(sk_listener); + struct inet_connection_sock *icsk = inet_csk(sk_listener); + struct request_sock_queue *queue = &icsk->icsk_accept_queue; + int max_retries, thresh; + + /* Defines the maximum number of retransmissions. Thresh defaults to 5 */ + max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; + thresh = max_retries; + + /* Check timeout times. SYN+ACK retransmission times +1 */ + if (req->num_timeout <= thresh) { + unsigned long timeo; + + req->rsk_ops->rtx_syn_ack(sk_listener, req); + req->num_retrans++; + /* If the number of times out is still 0, the number is increased by 1 + * to determine whether it is the first time out + */ + if (req->num_timeout++ == 0) + atomic_dec(&queue->young); + timeo = min(TCP_TIMEOUT_INIT, TCP_RTO_MAX); + mod_timer(&req->rsk_timer, jiffies + timeo); + return; + } + + inet_csk_reqsk_queue_drop_and_put(sk_listener, req); +} + +/* Function + * Add request_SOCK to the connection queue and ehash table, + * and set the SYNACK timeout retransmission timer + * Parameter + * sk: Transmission control block + * req: Connection request block + * timeout: The initial timeout period + */ +void ninet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, + unsigned long timeout) +{ + req->num_retrans = 0; + req->num_timeout = 0; + req->sk = NULL; + + timer_setup(&req->rsk_timer, ninet_reqsk_timer_handler, + TIMER_PINNED); + mod_timer(&req->rsk_timer, jiffies + timeout); + + inet_ehash_insert(req_to_sk(req), NULL, NULL); + + smp_wmb(); /* memory barrier */ + refcount_set(&req->rsk_refcnt, TCP_NUM_2 + 1); + + inet_csk_reqsk_queue_added(sk); +} + diff --git a/newip/src/linux-5.10/net/newip/ninet_hashtables.c b/newip/src/linux-5.10/net/newip/ninet_hashtables.c new file mode 100644 index 0000000000000000000000000000000000000000..908aecbfde6e38ca7c88c1c666126afdde6ba308 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/ninet_hashtables.c @@ -0,0 +1,487 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Generic NewIP INET transport hashtables + * + * Based on net/ipv4/inet_hashtables.c + * Based on net/ipv6/inet6_hashtables.c + * Based on include/net/ip.h + * Based on include/net/ipv6.h + * Based on net/core/secure_seq.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include + +#include +#include +#include +#include +#include +#include "tcp_nip_parameter.h" + +static siphash_key_t net_secret __read_mostly; + +static __always_inline void net_secret_init(void) +{ + net_get_random_once(&net_secret, sizeof(net_secret)); +} + +#ifdef CONFIG_INET +static u32 seq_scale(u32 seq) +{ + /* As close as possible to RFC 793, which + * suggests using a 250 kHz clock. + * Further reading shows this assumes 2 Mb/s networks. + * or 10 Mb/s Ethernet, a 1 MHz clock is appropriate. + * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but + * we also need to limit the resolution so that the u32 seq + * overlaps less than one time per MSL (2 minutes). + * Choosing a clock of 64 ns period is OK. (period of 274 s) + */ + return seq + (ktime_get_real_ns() >> 6); +} +#endif + +__u32 secure_tcp_nip_sequence_number(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport) +{ + const struct { + struct nip_addr saddr; + struct nip_addr daddr; + __be16 sport; + __be16 dport; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *(struct nip_addr *)saddr, + .daddr = *(struct nip_addr *)daddr, + .sport = sport, + .dport = dport, + }; + u32 hash; + + net_secret_init(); + hash = siphash(&combined, offsetofend(typeof(combined), dport), + &net_secret); + return seq_scale(hash); +} +EXPORT_SYMBOL_GPL(secure_tcp_nip_sequence_number); + +u64 secure_newip_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport) +{ + const struct { + struct nip_addr saddr; + struct nip_addr daddr; + __be16 dport; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *(struct nip_addr *)saddr, + .daddr = *(struct nip_addr *)daddr, + .dport = dport, + }; + net_secret_init(); + return siphash(&combined, offsetofend(typeof(combined), dport), + &net_secret); +} +EXPORT_SYMBOL_GPL(secure_newip_port_ephemeral); + +static inline u32 nip_portaddr_hash(const struct net *net, + const struct nip_addr *saddr, + unsigned int port) +{ + u32 v = (__force u32)saddr->nip_addr_field32[0] ^ (__force u32)saddr->nip_addr_field32[1]; + + return jhash_1word(v, net_hash_mix(net)) ^ port; +} + +static u32 __nip_addr_jhash(const struct nip_addr *a, const u32 initval) +{ + u32 v = (__force u32)a->nip_addr_field32[0] ^ (__force u32)a->nip_addr_field32[1]; + + return jhash_3words(v, + (__force u32)a->nip_addr_field32[0], + (__force u32)a->nip_addr_field32[1], + initval); +} + +static struct inet_listen_hashbucket * +ninet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk) +{ + u32 hash = nip_portaddr_hash(sock_net(sk), + &sk->sk_nip_rcv_saddr, + inet_sk(sk)->inet_num); + return inet_lhash2_bucket(h, hash); +} + +static void ninet_hash2(struct inet_hashinfo *h, struct sock *sk) +{ + struct inet_listen_hashbucket *ilb2; + + if (!h->lhash2) + return; + + ilb2 = ninet_lhash2_bucket_sk(h, sk); + + spin_lock(&ilb2->lock); + hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node, &ilb2->head); + + ilb2->count++; + spin_unlock(&ilb2->lock); +} + +/* Function + * Returns the hash value based on the passed argument + * Parameter + * net: The namespace + * laddr: The destination address + * lport: Destination port + * faddr: Source address + * fport: Source port + */ +u32 ninet_ehashfn(const struct net *net, + const struct nip_addr *laddr, const u16 lport, + const struct nip_addr *faddr, const __be16 fport) +{ + static u32 ninet_ehash_secret __read_mostly; + static u32 ninet_hash_secret __read_mostly; + + u32 lhash, fhash; + + net_get_random_once(&ninet_ehash_secret, sizeof(ninet_ehash_secret)); + net_get_random_once(&ninet_hash_secret, sizeof(ninet_hash_secret)); + + /* Ipv6 uses S6_ADdr32 [3], the last 32bits of the address */ + lhash = (__force u32)laddr->nip_addr_field32[0]; + fhash = __nip_addr_jhash(faddr, ninet_hash_secret); + + return __ninet_ehashfn(lhash, lport, fhash, fport, + ninet_ehash_secret + net_hash_mix(net)); +} + +/* Function + * The socket is put into the Listen hash in case the server finds + * the socket in the second handshake + * Parameter + * sk: Transmission control block + * osk: old socket + */ +int __ninet_hash(struct sock *sk, struct sock *osk) +{ + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + struct inet_listen_hashbucket *ilb; + int err = 0; + + if (sk->sk_state != TCP_LISTEN) { + inet_ehash_nolisten(sk, osk, NULL); + return 0; + } + WARN_ON(!sk_unhashed(sk)); + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + + spin_lock(&ilb->lock); + + __sk_nulls_add_node_rcu(sk, &ilb->nulls_head); + + ninet_hash2(hashinfo, sk); + ilb->count++; + sock_set_flag(sk, SOCK_RCU_FREE); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + + spin_unlock(&ilb->lock); + + return err; +} + +int ninet_hash(struct sock *sk) +{ + int err = 0; + + if (sk->sk_state != TCP_CLOSE) { + local_bh_disable(); + err = __ninet_hash(sk, NULL); + local_bh_enable(); + } + + return err; +} + +static void ninet_unhash2(struct inet_hashinfo *h, struct sock *sk) +{ + struct inet_listen_hashbucket *ilb2; + + if (!h->lhash2 || + WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node))) + return; + + ilb2 = ninet_lhash2_bucket_sk(h, sk); + + spin_lock(&ilb2->lock); + hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node); + ilb2->count--; + spin_unlock(&ilb2->lock); +} + +void ninet_unhash(struct sock *sk) +{ + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + struct inet_listen_hashbucket *ilb = NULL; + spinlock_t *lock; /* Spin lock (note deleted alarm) */ + + if (sk_unhashed(sk)) + return; + + if (sk->sk_state == TCP_LISTEN) { + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &ilb->lock; + } else { + lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + } + spin_lock_bh(lock); + if (sk_unhashed(sk)) + goto unlock; + + if (ilb) { + ninet_unhash2(hashinfo, sk); + ilb->count--; + } + __sk_nulls_del_node_init_rcu(sk); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + +unlock: + spin_unlock_bh(lock); +} + +/* Function + * Find transport control blocks based on address and port in the ehash table. + * If found, three handshakes have been made and a connection has been established, + * and normal communication can proceed. + * Parameter + * net: The namespace + * hashinfo: A global scalar of type tcp_hashinfo that stores tcp_SOCK(including ESTABLISHED, + * listen, and bind) for various states of the current system. + * saddr: Source address + * sport: Source port + * daddr: The destination address + * hnum: Destination port + */ +struct sock *__ninet_lookup_established(struct net *net, + struct inet_hashinfo *hashinfo, + const struct nip_addr *saddr, + const __be16 sport, + const struct nip_addr *daddr, + const u16 hnum, + const int dif) +{ + struct sock *sk; + const struct hlist_nulls_node *node; + + const __portpair ports = INET_COMBINED_PORTS(sport, hnum); + + unsigned int hash = ninet_ehashfn(net, daddr, hnum, saddr, sport); + unsigned int slot = hash & hashinfo->ehash_mask; + + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; + +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { + if (sk->sk_hash != hash) + continue; + if (!NINET_MATCH(sk, net, saddr, daddr, ports, dif)) + continue; + if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) { + nip_dbg("sk->sk_refcnt == 0"); + goto out; + } + + if (unlikely(!NINET_MATCH(sk, net, saddr, daddr, ports, dif))) { + sock_gen_put(sk); + goto begin; + } + goto found; + } + if (get_nulls_value(node) != slot) + goto begin; +out: + sk = NULL; +found: + return sk; +} + +static inline int nip_tcp_compute_score(struct sock *sk, struct net *net, + const unsigned short hnum, + const struct nip_addr *daddr, + const int dif, int sdif) +{ + int score = -1; + + if (inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_NINET && + net_eq(sock_net(sk), net)) { + score = 1; + if (!nip_addr_eq(&sk->sk_nip_rcv_saddr, &nip_any_addr)) { + if (!nip_addr_eq(&sk->sk_nip_rcv_saddr, daddr)) + return -1; + score++; + } + if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) + return -1; + score++; + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) + score++; + } + + return score; +} + +/* nip reuseport */ +static struct sock *ninet_lhash2_lookup(struct net *net, + struct inet_listen_hashbucket *ilb2, + struct sk_buff *skb, int doff, + const struct nip_addr *saddr, __be16 sport, + const struct nip_addr *daddr, const unsigned short hnum, + const int dif, const int sdif) +{ + struct inet_connection_sock *icsk; + struct sock *sk; + struct sock *result = NULL; + int hiscore = 0; + int matches = 0; + int reuseport = 0; + u32 phash = 0; + + inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) { + int score; + + sk = (struct sock *)icsk; + score = nip_tcp_compute_score(sk, net, hnum, daddr, dif, sdif); + if (score > hiscore) { + nip_dbg("find sock in lhash table"); + result = sk; + hiscore = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + nip_dbg("find reuseport sock in lhash table"); + phash = ninet_ehashfn(net, daddr, hnum, saddr, sport); + matches = 1; + } + } else if (score == hiscore && reuseport) { + matches++; + if (reciprocal_scale(phash, matches) == 0) + result = sk; + phash = next_pseudo_random32(phash); + } + } + return result; +} + +struct sock *ninet_lookup_listener(struct net *net, + struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + const struct nip_addr *saddr, + const __be16 sport, const struct nip_addr *daddr, + const unsigned short hnum, const int dif, const int sdif) +{ + struct inet_listen_hashbucket *ilb2; + struct sock *result = NULL; + unsigned int hash2 = nip_portaddr_hash(net, daddr, hnum); + + ilb2 = inet_lhash2_bucket(hashinfo, hash2); + + result = ninet_lhash2_lookup(net, ilb2, skb, doff, + saddr, sport, daddr, hnum, + dif, sdif); + if (result) + goto done; + + hash2 = nip_portaddr_hash(net, &nip_any_addr, hnum); + ilb2 = inet_lhash2_bucket(hashinfo, hash2); + + result = ninet_lhash2_lookup(net, ilb2, skb, doff, + saddr, sport, &nip_any_addr, hnum, + dif, sdif); +done: + if (IS_ERR(result)) + return NULL; + return result; +} + +/* Check whether the quad information in sock is bound by ehash. If not, + * the SK is inserted into the ehash and 0 is returned + */ +static int __ninet_check_established(struct inet_timewait_death_row *death_row, + struct sock *sk, const __u16 lport, + struct inet_timewait_sock **twp) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + struct inet_sock *inet = inet_sk(sk); + struct nip_addr *daddr = &sk->sk_nip_rcv_saddr; + struct nip_addr *saddr = &sk->sk_nip_daddr; + int dif = sk->sk_bound_dev_if; + struct net *net = sock_net(sk); + const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); + unsigned int hash = ninet_ehashfn(net, daddr, lport, + saddr, inet->inet_dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); + spinlock_t *lock = inet_ehash_lockp(hinfo, hash); + struct sock *sk2; + const struct hlist_nulls_node *node; + + spin_lock(lock); + + sk_nulls_for_each(sk2, node, &head->chain) { + if (sk2->sk_hash != hash) + continue; + + if (likely(NINET_MATCH(sk2, net, + saddr, daddr, ports, dif))) { + nip_dbg("found same sk in ehash"); + goto not_unique; + } + } + + /* Must record num and sport now. Otherwise we will see + * in hash table socket with a funny identity. + */ + nip_dbg("add tcp sock into ehash table. sport=%u", lport); + inet->inet_num = lport; + inet->inet_sport = htons(lport); + sk->sk_hash = hash; + WARN_ON(!sk_unhashed(sk)); + __sk_nulls_add_node_rcu(sk, &head->chain); + + spin_unlock(lock); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + return 0; + +not_unique: + spin_unlock(lock); + return -EADDRNOTAVAIL; +} + +static u64 ninet_sk_port_offset(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + + return secure_newip_port_ephemeral(sk->sk_nip_rcv_saddr.nip_addr_field32, + sk->sk_nip_daddr.nip_addr_field32, + inet->inet_dport); +} + +/* Bind local ports randomly */ +int ninet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk) +{ + u64 port_offset = 0; + + if (!inet_sk(sk)->inet_num) + port_offset = ninet_sk_port_offset(sk); + + return __inet_hash_connect(death_row, sk, port_offset, + __ninet_check_established); +} + diff --git a/newip/src/linux-5.10/net/newip/nip_addrconf.c b/newip/src/linux-5.10/net/newip/nip_addrconf.c new file mode 100644 index 0000000000000000000000000000000000000000..64d953e2db386e5769b7b989ae76901f3c0d9c61 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/nip_addrconf.c @@ -0,0 +1,906 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP Address [auto]configuration + * Linux NewIP INET implementation + * + * Based on net/ipv6/addrconf.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nip_hdr.h" +#include "tcp_nip_parameter.h" + +#define INFINITY_LIFE_TIME 0xFFFFFFFF + +/* Configured unicast address hash table */ +static struct hlist_head ninet_addr_lst[NIN_ADDR_HSIZE]; +static DEFINE_SPINLOCK(addrconf_hash_lock); + +static bool nip_chk_same_addr(struct net *net, const struct nip_addr *addr, + const struct net_device *dev); +static int nip_get_firstaddr(const struct net_device *dev, + struct nip_addr *addr); +static int nip_addrconf_ifdown(struct net_device *dev, bool unregister); + +static struct nip_devconf newip_devconf_dflt __read_mostly = { + .forwarding = 0, + .mtu = NIP_MIN_MTU, + .disable_nip = 0, + .ignore_routes_with_linkdown = 0, +}; + +/* Check if link is ready: is it up and is a valid qdisc available */ +static inline bool nip_addrconf_link_ready(const struct net_device *dev) +{ + return netif_oper_up(dev) && !qdisc_tx_is_noop(dev); +} + +static void nip_link_dev_addr(struct ninet_dev *idev, struct ninet_ifaddr *ifp) +{ + list_add_tail(&ifp->if_list, &idev->addr_list); +} + +static u32 ninet_addr_hash(const struct nip_addr *addr) +{ + return hash_32(nip_addr_hash(addr), NIN_ADDR_HSIZE_SHIFT); +} + +static struct ninet_ifaddr *nip_add_addr(struct ninet_dev *idev, + const struct nip_addr *addr, + u32 flags, u32 valid_lft, + u32 preferred_lft) +{ + struct ninet_ifaddr *ifa = NULL; + struct nip_rt_info *rt = NULL; + unsigned int hash; + int err = 0; + + rcu_read_lock_bh(); + + nin_dev_hold(idev); + + if (idev->dead) { + err = -ENODEV; + goto rcu_lock_out; + } + + if (idev->cnf.disable_nip) { + err = -EACCES; + goto rcu_lock_out; + } + + spin_lock(&addrconf_hash_lock); + + /* Do not configure two same addresses in a netdevice */ + if (nip_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { + nip_dbg("already assigned"); + err = -EEXIST; + goto spin_lock_out; + } + + ifa = kzalloc(sizeof(*ifa), GFP_ATOMIC); + if (!ifa) { + /* If you add log here, there will be an alarm: + * WARNING: Possible unnecessary 'out of memory' message + */ + err = -ENOBUFS; + goto spin_lock_out; + } + + rt = nip_addrconf_dst_alloc(idev, addr); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto spin_lock_out; + } + + neigh_parms_data_state_setall(idev->nd_parms); + + ifa->addr = *addr; + + spin_lock_init(&ifa->lock); + INIT_HLIST_NODE(&ifa->addr_lst); + ifa->flags = flags; + ifa->valid_lft = valid_lft; + ifa->preferred_lft = preferred_lft; + ifa->tstamp = jiffies; + ifa->cstamp = ifa->tstamp; + + ifa->rt = rt; + + ifa->idev = idev; + refcount_set(&ifa->refcnt, 1); + + /* Add to big hash table */ + hash = ninet_addr_hash(addr); + + hlist_add_head_rcu(&ifa->addr_lst, &ninet_addr_lst[hash]); + spin_unlock(&addrconf_hash_lock); + + write_lock(&idev->lock); + /* Add to ninet_dev unicast addr list. */ + nip_link_dev_addr(idev, ifa); + + nin_ifa_hold(ifa); + write_unlock(&idev->lock); + +rcu_lock_out: + rcu_read_unlock_bh(); + + if (likely(err == 0)) { + char add_addr[NIP_ADDR_BIT_LEN_MAX] = {0}; + + nip_addr_to_str(addr, add_addr, NIP_ADDR_BIT_LEN_MAX); + nip_dbg("success, %s ifindex=%u (addr=%s, idev->refcnt=%u, ifa->refcnt=%u)", + idev->dev->name, idev->dev->ifindex, add_addr, + refcount_read(&idev->refcnt), refcount_read(&ifa->refcnt)); + } else { + kfree(ifa); + nin_dev_put(idev); + ifa = ERR_PTR(err); + } + + return ifa; +spin_lock_out: + spin_unlock(&addrconf_hash_lock); + goto rcu_lock_out; +} + +static struct ninet_dev *nip_add_dev(struct net_device *dev) +{ + struct ninet_dev *ndev; + int err = -ENOMEM; + + ASSERT_RTNL(); + + if (dev->mtu < NIP_MIN_MTU) + return ERR_PTR(-EINVAL); + + ndev = kzalloc(sizeof(*ndev), GFP_KERNEL); + if (!ndev) + return ERR_PTR(err); + + rwlock_init(&ndev->lock); + ndev->dev = dev; + INIT_LIST_HEAD(&ndev->addr_list); + memcpy(&ndev->cnf, dev_net(dev)->newip.devconf_dflt, sizeof(ndev->cnf)); + + ndev->cnf.mtu = dev->mtu; + ndev->nd_parms = neigh_parms_alloc(dev, &nnd_tbl); + if (!ndev->nd_parms) { + kfree(ndev); + return ERR_PTR(err); + } + + /* We refer to the device */ + dev_hold(dev); + + refcount_set(&ndev->refcnt, 1); + + nip_dbg("init ninet_dev success, set ndev->refcnt=1"); + + if (netif_running(dev) && nip_addrconf_link_ready(dev)) + ndev->if_flags |= IF_READY; + + /* protected by rtnl_lock */ + rcu_assign_pointer(dev->nip_ptr, ndev); + return ndev; +} + +static struct ninet_dev *nip_find_idev(struct net_device *dev) +{ + struct ninet_dev *idev; + + ASSERT_RTNL(); + + idev = __nin_dev_get(dev); + if (!idev) { + idev = nip_add_dev(dev); + if (IS_ERR(idev)) + return NULL; + } + return idev; +} + +static struct ninet_dev *nip_addrconf_add_dev(struct net_device *dev) +{ + struct ninet_dev *idev; + + ASSERT_RTNL(); + + idev = nip_find_idev(dev); + if (!idev) + return ERR_PTR(-ENOBUFS); + + if (idev->cnf.disable_nip) + return ERR_PTR(-EACCES); + + return idev; +} + +/* Manual configuration of address on an interface */ +static int ninet_addr_add(struct net *net, int ifindex, + const struct nip_addr *pfx, + __u32 ifa_flags, __u32 preferred_lft, __u32 valid_lft) +{ + struct ninet_ifaddr *ifp; + struct ninet_dev *idev; + struct net_device *dev; + unsigned long timeout; + __u32 ifa_flags_tmp = ifa_flags; + __u32 valid_lft_tmp = valid_lft; + + ASSERT_RTNL(); + + /* check the lifetime */ + if (!valid_lft_tmp || preferred_lft > valid_lft_tmp) + return -EINVAL; + + dev = __dev_get_by_index(net, ifindex); + if (!dev) + return -ENODEV; + + idev = nip_addrconf_add_dev(dev); + if (IS_ERR(idev)) + return PTR_ERR(idev); + + timeout = addrconf_timeout_fixup(valid_lft_tmp, HZ); + if (addrconf_finite_timeout(timeout)) + valid_lft_tmp = timeout; + else + ifa_flags_tmp |= IFA_F_PERMANENT; + + timeout = addrconf_timeout_fixup(preferred_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa_flags_tmp |= IFA_F_DEPRECATED; + preferred_lft = timeout; + } + + ifp = nip_add_addr(idev, pfx, ifa_flags_tmp, + valid_lft_tmp, + preferred_lft); + if (!IS_ERR(ifp)) { + nin_ifa_put(ifp); + nip_ins_rt(ifp->rt); + nip_dbg("success, ifp->refcnt=%u", refcount_read(&ifp->refcnt)); + return 0; + } + + return PTR_ERR(ifp); +} + +/* Nobody refers to this ifaddr, destroy it */ +void ninet_ifa_finish_destroy(struct ninet_ifaddr *ifp) +{ + WARN_ON(!hlist_unhashed(&ifp->addr_lst)); + + nip_dbg("before idev put. idev->refcnt=%u", refcount_read(&ifp->idev->refcnt)); + nin_dev_put(ifp->idev); + nip_rt_put(ifp->rt); + kfree_rcu(ifp, rcu); +} + +static void nip_del_addr(struct ninet_ifaddr *ifp) +{ + int state; + + ASSERT_RTNL(); + + spin_lock_bh(&ifp->lock); + state = ifp->state; + ifp->state = NINET_IFADDR_STATE_DEAD; + spin_unlock_bh(&ifp->lock); + + if (state == NINET_IFADDR_STATE_DEAD) + goto out; + + spin_lock_bh(&addrconf_hash_lock); + hlist_del_init_rcu(&ifp->addr_lst); + spin_unlock_bh(&addrconf_hash_lock); + + write_lock_bh(&ifp->idev->lock); + + list_del_init(&ifp->if_list); + __nin_ifa_put(ifp); + + write_unlock_bh(&ifp->idev->lock); + + if (ifp->rt) { + /* If the ifp - & gt; Rt does not belong to any NIP_FIB_node. + * The DST reference count does not change + */ + if (dst_hold_safe(&ifp->rt->dst)) + nip_del_rt(ifp->rt); + } + +out: + nin_ifa_put(ifp); +} + +static int ninet_addr_del(struct net *net, int ifindex, u32 ifa_flags, + const struct nip_addr *pfx) +{ + struct ninet_ifaddr *ifp; + struct ninet_dev *idev; + struct net_device *dev; + + dev = __dev_get_by_index(net, ifindex); + if (!dev) + return -ENODEV; + + idev = __nin_dev_get(dev); + if (!idev) + return -ENXIO; + + read_lock_bh(&idev->lock); + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (nip_addr_eq(pfx, &ifp->addr)) { + char addr[NIP_ADDR_BIT_LEN_MAX] = {0}; + + nin_ifa_hold(ifp); + read_unlock_bh(&idev->lock); + + nip_addr_to_str(&ifp->addr, addr, NIP_ADDR_BIT_LEN_MAX); + nip_del_addr(ifp); + nip_dbg("success, %s ifindex=%u (addr=%s, ifp->refcnt=%u, idev->refcnt=%u)", + idev->dev->name, ifindex, addr, refcount_read(&ifp->refcnt), + refcount_read(&idev->refcnt)); + return 0; + } + } + read_unlock_bh(&idev->lock); + return -EADDRNOTAVAIL; +} + +int nip_addrconf_ifaddr_check(struct net *net, void __user *arg, struct nip_ifreq *ireq) +{ + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + nip_dbg("not admin can`t cfg"); + return -EPERM; + } + + if (copy_from_user(ireq, arg, sizeof(struct nip_ifreq))) { + nip_dbg("fail to copy cfg data"); + return -EFAULT; + } + + if (nip_addr_invalid(&ireq->ifrn_addr)) { + nip_dbg("nip addr invalid, bitlen=%u", ireq->ifrn_addr.bitlen); + return -EFAULT; + } + + if (nip_addr_public(&ireq->ifrn_addr)) { + nip_dbg("The public address cannot be configured"); + return -EFAULT; + } + return 0; +} + +int nip_addrconf_add_ifaddr(struct net *net, void __user *arg) +{ + struct nip_ifreq ireq; + int err; + + err = nip_addrconf_ifaddr_check(net, arg, &ireq); + if (err < 0) { + nip_dbg("The ifaddr check failed"); + return err; + } + + rtnl_lock(); + err = ninet_addr_add(net, ireq.ifrn_ifindex, &ireq.ifrn_addr, + IFA_F_PERMANENT, INFINITY_LIFE_TIME, + INFINITY_LIFE_TIME); + rtnl_unlock(); + return err; +} + +int nip_addrconf_del_ifaddr(struct net *net, void __user *arg) +{ + struct nip_ifreq ireq; + int err; + + err = nip_addrconf_ifaddr_check(net, arg, &ireq); + if (err < 0) { + nip_dbg("The ifaddr check failed"); + return err; + } + + rtnl_lock(); + err = ninet_addr_del(net, ireq.ifrn_ifindex, 0, &ireq.ifrn_addr); + rtnl_unlock(); + return err; +} + +static bool nip_chk_same_addr(struct net *net, const struct nip_addr *addr, + const struct net_device *dev) +{ + unsigned int hash = ninet_addr_hash(addr); + struct ninet_ifaddr *ifp; + + hlist_for_each_entry(ifp, &ninet_addr_lst[hash], addr_lst) { + if (!net_eq(dev_net(ifp->idev->dev), net)) + continue; + if (nip_addr_eq(&ifp->addr, addr)) { + if (!dev || ifp->idev->dev == dev) + return true; + } + } + return false; +} + +static int __nip_get_firstaddr(struct ninet_dev *idev, struct nip_addr *addr) +{ + struct ninet_ifaddr *ifp; + int err = -EADDRNOTAVAIL; + + list_for_each_entry(ifp, &idev->addr_list, if_list) { + *addr = ifp->addr; + err = 0; + break; + } + return err; +} + +static int nip_get_firstaddr(const struct net_device *dev, + struct nip_addr *addr) +{ + struct ninet_dev *idev; + int err = -EADDRNOTAVAIL; + + rcu_read_lock(); + idev = __nin_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + err = __nip_get_firstaddr(idev, addr); + read_unlock_bh(&idev->lock); + } + rcu_read_unlock(); + return err; +} + +int nip_dev_get_saddr(struct net *net, const struct net_device *dev, + const struct nip_addr *daddr, struct nip_addr *saddr) +{ + if (!dev || !saddr) + return -EADDRNOTAVAIL; + + return nip_get_firstaddr(dev, saddr); +} + +static int nip_addrconf_notify(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct ninet_dev *idev = __nin_dev_get(dev); + struct net *net = dev_net(dev); + + switch (event) { + case NETDEV_REGISTER: + if (!idev && dev->mtu >= NIP_MIN_MTU) { + nip_dbg("NIP_ADDRCONF(NETDEV_REGISTER): "); + idev = nip_add_dev(dev); + if (IS_ERR(idev)) + return notifier_from_errno(PTR_ERR(idev)); + } + break; + + case NETDEV_CHANGEMTU: + /* if MTU under NIP_MIN_MTU stop New IP on this interface. */ + if (dev->mtu < NIP_MIN_MTU) { + nip_addrconf_ifdown(dev, dev != net->loopback_dev); + break; + } + + if (idev) { + idev->cnf.mtu = dev->mtu; + break; + } + + /* allocate new idev */ + idev = nip_add_dev(dev); + if (IS_ERR(idev)) + break; + + /* device is still not ready */ + if (!(idev->if_flags & IF_READY)) + break; + + fallthrough; + case NETDEV_UP: + case NETDEV_CHANGE: + if (dev->flags & IFF_SLAVE) + break; + + if (idev && idev->cnf.disable_nip) + break; + + if (event == NETDEV_UP) { + if (!nip_addrconf_link_ready(dev)) { + /* device is not ready yet. */ + nip_dbg("NIP_ADDRCONF(NETDEV_UP)"); + nip_dbg("%s:link is not ready", dev->name); + break; + } + + if (!idev && dev->mtu >= NIP_MIN_MTU) + idev = nip_add_dev(dev); + + if (!IS_ERR_OR_NULL(idev)) + idev->if_flags |= IF_READY; + } else if (event == NETDEV_CHANGE) { + if (!nip_addrconf_link_ready(dev)) + /* device is still not ready. */ + break; + + if (idev) + idev->if_flags |= IF_READY; + + nip_dbg("NIP_ADDRCONF(NETDEV_CHANGE)"); + nip_dbg("%s:link becomes ready", dev->name); + } + + if (!IS_ERR_OR_NULL(idev)) { + /* If the MTU changed during the interface down, + * when the interface up, the changed MTU must be + * reflected in the idev as well as routers. + */ + if (idev->cnf.mtu != dev->mtu && dev->mtu >= NIP_MIN_MTU) + idev->cnf.mtu = dev->mtu; + idev->tstamp = jiffies; + + /* If the changed mtu during down is lower than + * NIP_MIN_MTU stop New IP on this interface. + */ + if (dev->mtu < NIP_MIN_MTU) + nip_addrconf_ifdown(dev, dev != net->loopback_dev); + } + break; + + case NETDEV_DOWN: + case NETDEV_UNREGISTER: + /* Remove all addresses from this interface. */ + nip_addrconf_ifdown(dev, event != NETDEV_DOWN); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int nip_addrconf_ifdown(struct net_device *dev, bool unregister) +{ + struct net *net = dev_net(dev); + struct ninet_dev *idev = __nin_dev_get(dev); + struct ninet_ifaddr *ifa, *tmp; + struct list_head del_list; + int i; + + ASSERT_RTNL(); + + nip_dbg("%s ifindex=%u, unregister=%u (unregister:1, down:0)", + dev->name, dev->ifindex, unregister); + + nip_rt_ifdown(net, dev); + neigh_ifdown(&nnd_tbl, dev); + if (!idev) + return -ENODEV; + + /* Step 1: remove reference to newip device from parent device. + * Do not dev_put! + */ + if (unregister) { + idev->dead = 1; + + /* protected by rtnl_lock */ + RCU_INIT_POINTER(dev->nip_ptr, NULL); + } + + /* Step 2: clear hash table */ + for (i = 0; i < NIN_ADDR_HSIZE; i++) { + struct hlist_head *h = &ninet_addr_lst[i]; + + spin_lock_bh(&addrconf_hash_lock); + hlist_for_each_entry_rcu(ifa, h, addr_lst) { + if (ifa->idev == idev) { + char addr[NIP_ADDR_BIT_LEN_MAX] = {0}; + + nip_addr_to_str(&ifa->addr, addr, NIP_ADDR_BIT_LEN_MAX); + nip_dbg("clear addr hash table.(addr=%s)", addr); + hlist_del_init_rcu(&ifa->addr_lst); + } + } + spin_unlock_bh(&addrconf_hash_lock); + } + + write_lock_bh(&idev->lock); + + /* Step 2: clear flags for stateless addrconf */ + if (!unregister) + idev->if_flags &= ~(IF_RS_SENT | IF_RA_RCVD | IF_READY); + + /* Step 3: Remove address node from ifa->if_list + * and insert it into the list to be del_list + */ + INIT_LIST_HEAD(&del_list); + list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { + list_move(&ifa->if_list, &del_list); + + write_unlock_bh(&idev->lock); + spin_lock_bh(&ifa->lock); + ifa->state = NINET_IFADDR_STATE_DEAD; + spin_unlock_bh(&ifa->lock); + write_lock_bh(&idev->lock); + } + write_unlock_bh(&idev->lock); + + /* Step 4: Unchain the node to be deleted and release IFA */ + while (!list_empty(&del_list)) { + ifa = list_first_entry(&del_list, struct ninet_ifaddr, if_list); + list_del(&ifa->if_list); + nin_ifa_put(ifa); + } + + /* Last: Shot the device (if unregistered) */ + if (unregister) { + neigh_parms_release(&nnd_tbl, idev->nd_parms); + neigh_ifdown(&nnd_tbl, dev); + nip_dbg("%s (ifindex=%u) before idev put. idev->refcnt=%u", + dev->name, dev->ifindex, refcount_read(&idev->refcnt)); + nin_dev_put(idev); + } + return 0; +} + +static int nip_addr_proc_show(struct seq_file *seq, void *v) +{ + struct net *net = seq->private; + struct ninet_ifaddr *ifp; + int i, j; + + rcu_read_lock(); + for (i = 0; i < NIN_ADDR_HSIZE; i++) + hlist_for_each_entry_rcu(ifp, &ninet_addr_lst[i], addr_lst) { + if (!net_eq(dev_net(ifp->idev->dev), net)) + continue; + + for (j = 0; j < ifp->addr.bitlen / NIP_ADDR_BIT_LEN_8; j++) + seq_printf(seq, "%02x", ifp->addr.nip_addr_field8[j]); + seq_printf(seq, "\t%8s\n", ifp->idev->dev ? ifp->idev->dev->name : ""); + } + + rcu_read_unlock(); + return 0; +} + +static int __net_init nip_addr_net_init(struct net *net) +{ + int err = -ENOMEM; + struct nip_devconf *dflt; + + dflt = kmemdup(&newip_devconf_dflt, + sizeof(newip_devconf_dflt), + GFP_KERNEL); + if (!dflt) + goto err_alloc_dflt; + + net->newip.devconf_dflt = dflt; + + if (!proc_create_net_single("nip_addr", 0444, net->proc_net, + nip_addr_proc_show, NULL)) { + goto err_addr_proc; + } + + return 0; + +err_addr_proc: + kfree(dflt); +err_alloc_dflt: + return err; +} + +static void __net_exit nip_addr_net_exit(struct net *net) +{ + kfree(net->newip.devconf_dflt); + remove_proc_entry("nip_addr", net->proc_net); +} + +static struct pernet_operations nip_route_proc_net_ops = { + .init = nip_addr_net_init, + .exit = nip_addr_net_exit, +}; + +/* addrconf module should be notified of a device going up + */ +static struct notifier_block nip_dev_notf = { + .notifier_call = nip_addrconf_notify, + .priority = ADDRCONF_NOTIFY_PRIORITY, +}; + +int __init nip_addrconf_init(void) +{ + int err; + + err = register_pernet_subsys(&nip_route_proc_net_ops); + if (err < 0) { + nip_dbg("register_pernet_subsys failed"); + goto out; + } + + register_netdevice_notifier(&nip_dev_notf); + +out: + return err; +} + +void nip_addrconf_cleanup(void) +{ + struct net_device *dev; + int i; + + unregister_netdevice_notifier(&nip_dev_notf); + unregister_pernet_subsys(&nip_route_proc_net_ops); + + rtnl_lock(); + + /* clean dev list */ + for_each_netdev(&init_net, dev) { + if (!__nin_dev_get(dev)) + continue; + nip_addrconf_ifdown(dev, 1); + } + + /* Check hash table. */ + spin_lock_bh(&addrconf_hash_lock); + for (i = 0; i < NIN_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&ninet_addr_lst[i])); + spin_unlock_bh(&addrconf_hash_lock); + rtnl_unlock(); +} + +static int ninet_addr_get(const struct net_device *dev, struct ninet_ifaddr *ifa) +{ + int err; + struct nip_addr addr; + + err = nip_get_firstaddr(dev, &addr); + if (!err) + ifa->addr = addr; + + return err; +} + +int nip_addrconf_get_ifaddr(struct net *net, unsigned int cmd, void __user *arg) +{ + struct nip_devreq ifr; + struct sockaddr_nin *snin; + struct ninet_ifaddr ifa; + struct net_device *dev; + void __user *p = (void __user *)arg; + int ret = -EFAULT; + + if (copy_from_user(&ifr, p, sizeof(struct nip_ifreq))) + goto out; + + ifr.nip_ifr_name[IFNAMSIZ - 1] = 0; + snin = (struct sockaddr_nin *)&ifr.nip_dev_addr; + + nip_dbg("dev name is %s", ifr.nip_ifr_name); + dev_load(net, ifr.nip_ifr_name); + + if (cmd == SIOCGIFADDR) { + memset(snin, 0, sizeof(*snin)); + snin->sin_family = AF_NINET; + } else { + goto out; + } + + rtnl_lock(); + + dev = __dev_get_by_name(net, ifr.nip_ifr_name); + if (!dev) + goto done; + + ret = ninet_addr_get(dev, &ifa); + if (ret) + goto done; + /* Get interface address */ + snin->sin_addr = ifa.addr; + + if (copy_to_user(p, &ifr, sizeof(struct nip_devreq))) + ret = -EFAULT; + +done: + rtnl_unlock(); +out: + return ret; +} + +void nip_addr_to_str(const struct nip_addr *addr, unsigned char *buf, int buf_len) +{ + int i; + int total_len = 0; + int addr_num = addr->bitlen / NIP_ADDR_BIT_LEN_8; + + if (!buf) + return; + + total_len = sprintf(buf, "%s", "0x"); + for (i = 0; (i < addr_num) && (total_len < buf_len); i++) { + int len = sprintf(buf + total_len, "%02x", addr->nip_addr_field8[i]); + + if (len <= 0) + break; + total_len += len; + } + + switch (addr_num) { + case NIP_ADDR_LEN_1: + buf[INDEX_2] = '*'; /* 0x*0 ~ 0x*C */ + break; + case NIP_ADDR_LEN_2: + buf[INDEX_2] = '*'; /* 0x**DD ~ 0x**FF */ + buf[INDEX_3] = '*'; + break; + case NIP_ADDR_LEN_3: + buf[INDEX_4] = '*'; /* 0xF1**00 ~ 0xF1**FF */ + buf[INDEX_5] = '*'; + break; + case NIP_ADDR_LEN_5: + buf[INDEX_4] = '*'; /* 0xF2 **** 0000 ~ 0xF2 **** FFFF */ + buf[INDEX_5] = '*'; + buf[INDEX_6] = '*'; + buf[INDEX_7] = '*'; + break; + case NIP_ADDR_LEN_7: + buf[INDEX_4] = '*'; /* 0xF3 **** 0000 0000 ~ 0xF3 **** FFFF FFFF */ + buf[INDEX_5] = '*'; + buf[INDEX_6] = '*'; + buf[INDEX_7] = '*'; + break; + case NIP_ADDR_LEN_8: + buf[INDEX_4] = '*'; /* 0xF4** **** 0000 0000 ~ 0xF4** **** FFFF FFFF */ + buf[INDEX_5] = '*'; + buf[INDEX_6] = '*'; + buf[INDEX_7] = '*'; + buf[INDEX_8] = '*'; + buf[INDEX_9] = '*'; + break; + default: + break; + } +} + diff --git a/newip/src/linux-5.10/net/newip/nip_addrconf_core.c b/newip/src/linux-5.10/net/newip/nip_addrconf_core.c new file mode 100644 index 0000000000000000000000000000000000000000..914886b2f2ce3ed5cef8bd2082ecc9ad6dc41921 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/nip_addrconf_core.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP library code, needed by static components when full NewIP support is + * not configured or static. + * + * Based on net/ipv6/addrconf_core.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include "tcp_nip_parameter.h" + +static void nin_dev_finish_destroy_rcu(struct rcu_head *head) +{ + struct ninet_dev *idev = container_of(head, struct ninet_dev, rcu); + + kfree(idev); +} + +void nin_dev_finish_destroy(struct ninet_dev *idev) +{ + struct net_device *dev = idev->dev; + + WARN_ON(!list_empty(&idev->addr_list)); + + dev_put(dev); + if (idev->dead) + call_rcu(&idev->rcu, nin_dev_finish_destroy_rcu); +} + diff --git a/newip/src/linux-5.10/net/newip/nip_fib.c b/newip/src/linux-5.10/net/newip/nip_fib.c new file mode 100644 index 0000000000000000000000000000000000000000..fbf25d2eddcc2cb2ab7786a00633c8b08220dd04 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/nip_fib.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Linux NewIP INET implementation + * Forwarding Information Database + * + * Based on net/ipv6/ip6_fib.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include "tcp_nip_parameter.h" + +static struct kmem_cache *nip_fib_node_kmem __read_mostly; + +struct nip_fib_table *nip_fib_get_table(struct net *net, u32 id) +{ + if (id == NIP_RT_TABLE_MAIN) + return net->newip.nip_fib_main_tbl; + else if (id == NIP_RT_TABLE_LOCAL) + return net->newip.nip_fib_local_tbl; + else + return NULL; +} + +static struct nip_fib_node *nip_node_alloc(void) +{ + struct nip_fib_node *fn; + + fn = kmem_cache_zalloc(nip_fib_node_kmem, GFP_ATOMIC); + + return fn; +} + +void nip_rt_free_pcpu(struct nip_rt_info *non_pcpu_rt) +{ + int cpu; + + if (!non_pcpu_rt->rt_pcpu) + return; + + for_each_possible_cpu(cpu) { + struct nip_rt_info **ppcpu_rt; + struct nip_rt_info *pcpu_rt; + + ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + dst_dev_put(&pcpu_rt->dst); + dst_release(&pcpu_rt->dst); + *ppcpu_rt = NULL; + } + } + + free_percpu(non_pcpu_rt->rt_pcpu); + non_pcpu_rt->rt_pcpu = NULL; +} + +static u32 ninet_route_hash(const struct nip_addr *addr) +{ + return hash_32(nip_addr_hash(addr), NIN_ROUTE_HSIZE_SHIFT); +} + +struct nip_fib_node *nip_fib_locate(struct hlist_head *nip_tb_head, + const struct nip_addr *daddr) +{ + struct nip_fib_node *fib_node; + struct hlist_head *h; + unsigned int hash; + + hash = ninet_route_hash(daddr); + h = &nip_tb_head[hash]; + + hlist_for_each_entry_rcu(fib_node, h, fib_hlist) { + if (nip_addr_eq(&fib_node->nip_route_info->rt_dst, daddr)) + return fib_node; + } + + /* find default route */ + hash = ninet_route_hash(&nip_any_addr); + h = &nip_tb_head[hash]; + + hlist_for_each_entry_rcu(fib_node, h, fib_hlist) { + if (nip_addr_eq(&fib_node->nip_route_info->rt_dst, &nip_any_addr)) + return fib_node; + } + + return NULL; +} + +/* nip_tb_lock must be taken to avoid racing */ +int nip_fib_add(struct nip_fib_table *table, struct nip_rt_info *rt) +{ + struct nip_fib_node *fib_node, *new_node; + int err = 0; + struct hlist_head *h; + unsigned int hash; + char dst[NIP_ADDR_BIT_LEN_MAX] = {0}; + char gateway[NIP_ADDR_BIT_LEN_MAX] = {0}; + + hash = ninet_route_hash(&rt->rt_dst); + h = &table->nip_tb_head[hash]; + + hlist_for_each_entry(fib_node, h, fib_hlist) { + if (table->nip_tb_id == NIP_RT_TABLE_MAIN) { + if (nip_addr_eq(&fib_node->nip_route_info->rt_dst, + &rt->rt_dst)) { + err = -EEXIST; + goto fail; + } + } else if (table->nip_tb_id == NIP_RT_TABLE_LOCAL) { + if (nip_addr_and_ifindex_eq + (&fib_node->nip_route_info->rt_dst, &rt->rt_dst, + fib_node->nip_route_info->rt_idev->dev->ifindex, + rt->rt_idev->dev->ifindex)) { + err = -EEXIST; + goto fail; + } + } + } + + new_node = nip_node_alloc(); + if (!new_node) { + nip_dbg("fail to alloc mem"); + err = -ENOMEM; + goto fail; + } + new_node->nip_route_info = rt; + rcu_assign_pointer(rt->rt_node, new_node); + atomic_inc(&rt->rt_ref); + hlist_add_tail_rcu(&new_node->fib_hlist, h); + nip_addr_to_str(&rt->rt_dst, dst, NIP_ADDR_BIT_LEN_MAX); + nip_addr_to_str(&rt->gateway, gateway, NIP_ADDR_BIT_LEN_MAX); + nip_dbg("%s ifindex=%u (addr=%s, gateway=%s, rt_idev->refcnt=%u)", + rt->rt_idev->dev->name, rt->rt_idev->dev->ifindex, + dst, gateway, refcount_read(&rt->rt_idev->refcnt)); + +out: + return err; + +fail: + dst_release_immediate(&rt->dst); + goto out; +} + +static void nip_fib_destroy_rcu(struct rcu_head *head) +{ + struct nip_fib_node *fn = container_of(head, struct nip_fib_node, rcu); + + nip_rt_release(fn->nip_route_info); + kfree(fn); +} + +/* nip_tb_lock must be taken to avoid racing */ +int nip_fib_del(struct nip_rt_info *rt, struct nl_info *info) +{ + struct nip_fib_node *fn; + struct net *net = info->nl_net; + + fn = rcu_dereference_protected(rt->rt_node, + lockdep_is_held(&rt->rt_table->nip_tb_lock)); + if (!fn || rt == net->newip.nip_null_entry) + return -ENOENT; + + hlist_del_init_rcu(&fn->fib_hlist); + + /* route_info directed by the fib_node can be released + * only after the fib_node is released + */ + RCU_INIT_POINTER(rt->rt_node, NULL); + call_rcu(&fn->rcu, nip_fib_destroy_rcu); + + return 0; +} + +static void nip_fib_free_table(struct nip_fib_table *table) +{ + kfree(table); +} + +/* caller must hold nip_tb_lock */ +static void nip_fib_clean_hash(struct net *net, struct hlist_head *nip_tb_head, + int (*func)(struct nip_rt_info *, void *arg), + void *arg) +{ + int i; + int err; + struct nip_fib_node *fn; + struct hlist_node *tmp; + struct nl_info info = { + .nl_net = net, + }; + + for (i = 0; i < NIN_ROUTE_HSIZE; i++) { + struct hlist_head *h = &nip_tb_head[i]; + + hlist_for_each_entry_safe(fn, tmp, h, fib_hlist) { + if (func(fn->nip_route_info, arg) < 0) { + char dst[NIP_ADDR_BIT_LEN_MAX] = {0}; + char gateway[NIP_ADDR_BIT_LEN_MAX] = {0}; + + nip_addr_to_str(&fn->nip_route_info->rt_dst, dst, + NIP_ADDR_BIT_LEN_MAX); + nip_addr_to_str(&fn->nip_route_info->gateway, gateway, + NIP_ADDR_BIT_LEN_MAX); + + nip_dbg("try to del rt_info, rt_dst=%s, gateway=%s", dst, gateway); + err = nip_fib_del(fn->nip_route_info, &info); + if (err) + nip_dbg("nip_fib_del failed"); + } + } + } +} + +void nip_fib_clean_all(struct net *net, + int (*func)(struct nip_rt_info *, void *arg), void *arg) +{ + struct nip_fib_table *main_tbl = net->newip.nip_fib_main_tbl; + struct nip_fib_table *local_tbl = net->newip.nip_fib_local_tbl; + + spin_lock_bh(&main_tbl->nip_tb_lock); + nip_fib_clean_hash(net, main_tbl->nip_tb_head, func, arg); + spin_unlock_bh(&main_tbl->nip_tb_lock); + + spin_lock_bh(&local_tbl->nip_tb_lock); + nip_fib_clean_hash(net, local_tbl->nip_tb_head, func, arg); + spin_unlock_bh(&local_tbl->nip_tb_lock); +} + +static void nip_fib_link_table(struct nip_fib_table *tb) +{ + /* You need to initialize multiple routing tables */ + spin_lock_init(&tb->nip_tb_lock); +} + +static void __net_init nip_fib_tables_init(struct net *net) +{ + nip_fib_link_table(net->newip.nip_fib_main_tbl); + nip_fib_link_table(net->newip.nip_fib_local_tbl); +} + +static int __net_init nip_fib_net_init(struct net *net) +{ + net->newip.nip_fib_main_tbl = + kzalloc(sizeof(*net->newip.nip_fib_main_tbl), GFP_KERNEL); + if (!net->newip.nip_fib_main_tbl) + goto out_fib_table_hash; + + net->newip.nip_fib_main_tbl->nip_tb_id = NIP_RT_TABLE_MAIN; + net->newip.nip_fib_main_tbl->flags = 1; + + net->newip.nip_fib_local_tbl = + kzalloc(sizeof(*net->newip.nip_fib_local_tbl), GFP_KERNEL); + if (!net->newip.nip_fib_local_tbl) + goto out_main_tbl; + + net->newip.nip_fib_local_tbl->nip_tb_id = NIP_RT_TABLE_LOCAL; + + nip_fib_tables_init(net); + + return 0; + +out_main_tbl: + kfree(net->newip.nip_fib_main_tbl); +out_fib_table_hash: + return -ENOMEM; +} + +static void nip_fib_net_exit(struct net *net) +{ + nip_fib_free_table(net->newip.nip_fib_main_tbl); + nip_fib_free_table(net->newip.nip_fib_local_tbl); +} + +static struct pernet_operations nip_fib_net_ops = { + .init = nip_fib_net_init, + .exit = nip_fib_net_exit, +}; + +int __init nip_fib_init(void) +{ + int ret = -ENOMEM; + + nip_fib_node_kmem = kmem_cache_create("nip_fib_nodes", + sizeof(struct nip_fib_node), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!nip_fib_node_kmem) + goto out; + + nip_dbg("nip_fib_node size is %lu", + sizeof(struct nip_fib_node) + sizeof(struct nip_rt_info)); + + ret = register_pernet_subsys(&nip_fib_net_ops); + if (ret) + goto out_kmem_cache_create; + +out: + return ret; + +out_kmem_cache_create: + kmem_cache_destroy(nip_fib_node_kmem); + goto out; +} + +void nip_fib_gc_cleanup(void) +{ + unregister_pernet_subsys(&nip_fib_net_ops); + kmem_cache_destroy(nip_fib_node_kmem); +} + diff --git a/newip/src/linux-5.10/net/newip/nip_fib_rules.c b/newip/src/linux-5.10/net/newip/nip_fib_rules.c new file mode 100644 index 0000000000000000000000000000000000000000..03fcd5c5a14fda288c2757b6beba7ae273a78da2 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/nip_fib_rules.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP Routing Policy Rules + * + * Based on net/ipv6/fib_rules.c + * Based on net/ipv6/fib6_rules.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include "tcp_nip_parameter.h" + +struct dst_entry *nip_fib_rule_lookup(struct net *net, struct flow_nip *fln, + int flags, int *tbl_type, nip_pol_lookup_t lookup) +{ + struct nip_rt_info *rt; + + rt = lookup(net, net->newip.nip_fib_local_tbl, fln, flags); + if (rt != net->newip.nip_null_entry) { + *tbl_type = (int)RT_TABLE_LOCAL; + return &rt->dst; + } + nip_rt_put(rt); + rt = lookup(net, net->newip.nip_fib_main_tbl, fln, flags); + if (rt != net->newip.nip_null_entry) { + *tbl_type = (int)RT_TABLE_MAIN; + return &rt->dst; + } + nip_rt_put(rt); + + dst_hold(&net->newip.nip_null_entry->dst); + *tbl_type = (int)RT_TABLE_MAX; + return &net->newip.nip_null_entry->dst; +} diff --git a/newip/src/linux-5.10/net/newip/nip_hooks_register.c b/newip/src/linux-5.10/net/newip/nip_hooks_register.c new file mode 100644 index 0000000000000000000000000000000000000000..8cc6dd58629f28939f7c1fe285231830db67386b --- /dev/null +++ b/newip/src/linux-5.10/net/newip/nip_hooks_register.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Definitions for the NewIP Hooks Register module. + */ +#ifdef CONFIG_NEWIP_HOOKS +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include /* ninet_ehashfn */ +#include +#include +#include "tcp_nip_parameter.h" + +void vendor_ninet_ehashfn(void *data, const struct sock *sk, u32 *ret) +{ + *ret = ninet_ehashfn(sock_net(sk), &sk->sk_nip_rcv_saddr, + sk->sk_num, &sk->sk_nip_daddr, sk->sk_dport); +} + +void vendor_ninet_gifconf(void *data, struct net_device *dev, + char __user *buf, int len, int size, int *ret) +{ + if (*ret >= 0) { + int done = ninet_gifconf(dev, buf + *ret, len - *ret, size); + + if (done < 0) + *ret = done; + else + *ret += done; + } +} + +int ninet_hooks_register(void) +{ + int ret; + + ret = register_trace_vendor_ninet_ehashfn(&vendor_ninet_ehashfn, NULL); + if (ret) { + nip_dbg("failed to register to vendor_ninet_ehashfn"); + return -1; + } + + ret = register_trace_vendor_ninet_gifconf(&vendor_ninet_gifconf, NULL); + if (ret) { + nip_dbg("failed to register to vendor_ninet_gifconf"); + return -1; + } + + return 0; +} +#endif /* CONFIG_NEWIP_HOOKS */ + diff --git a/newip/src/linux-5.10/net/newip/nip_hooks_register.h b/newip/src/linux-5.10/net/newip/nip_hooks_register.h new file mode 100644 index 0000000000000000000000000000000000000000..f5109ce8d59e5ea7d410fc81b128c9021cb9383a --- /dev/null +++ b/newip/src/linux-5.10/net/newip/nip_hooks_register.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Definitions for the NewIP Hooks Register module. + */ +#ifndef _NIP_HOOKS_REGISTER_H +#define _NIP_HOOKS_REGISTER_H + +#ifdef CONFIG_NEWIP_HOOKS +int ninet_hooks_register(void); +#endif + +#endif /* _NIP_HOOKS_REGISTER_H */ diff --git a/newip/src/linux-5.10/net/newip/nip_input.c b/newip/src/linux-5.10/net/newip/nip_input.c new file mode 100644 index 0000000000000000000000000000000000000000..9941b405735faed2a0494fc1d2d1aefabe436c65 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/nip_input.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP input + * Linux NewIP INET implementation + * + * Based on net/ipv6/ip6_input.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "nip_hdr.h" +#include "tcp_nip_parameter.h" + +static int _nip_update_recv_skb_len(struct sk_buff *skb, + struct nip_hdr_decap *niph) +{ + if (!niph->include_total_len) + return 0; + + if (niph->total_len > skb->len) { + nip_dbg("total_len(%u) is bigger than skb_len(%u), Drop a packet", + niph->total_len, skb->len); + return NET_RX_DROP; + } + + /* At present, NewIP only uses linear regions, uses skb_trim to remove end from a buffer; + * If the nonlinear region is also used later, use pskb_trim to remove end from a buffer; + */ + skb_trim(skb, niph->total_len); + return 0; +} + +static int nip_rcv_finish(struct sk_buff *skb) +{ + struct net *net = dev_net(skb->dev); + void (*edemux)(struct sk_buff *skb) = NULL; + int err = 0; + + /* set /proc/sys/net/ipv4/ip_early_demux to change sysctl_ip_early_demux, + * which is used by ipv4, ipv6 and newip + */ + if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) { + const struct ninet_protocol *ipprot; + + nip_dbg("try to early demux skb, nexthdr=0x%x", NIPCB(skb)->nexthdr); + ipprot = rcu_dereference(ninet_protos[NIPCB(skb)->nexthdr]); + if (ipprot) + edemux = READ_ONCE(ipprot->early_demux); + if (edemux) + edemux(skb); + } + + /* nip_route_input will set nip_null_entry + * instead of NULL in skb when looking up failed. + */ + if (!skb_valid_dst(skb)) + err = nip_route_input(skb); + if (err) { + nip_dbg("nip_route_input lookup route exception, release skb"); + kfree_skb(skb); + return 0; + } + return dst_input(skb); +} + +int nip_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + int offset = 0; + struct nip_hdr_decap niph = {0}; + + if (skb->pkt_type == PACKET_OTHERHOST) { + kfree_skb(skb); + return NET_RX_DROP; + } + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out; + + memset(NIPCB(skb), 0, sizeof(struct ninet_skb_parm)); + offset = nip_hdr_parse(skb->data, skb->len, &niph); + if (offset <= 0) { + nip_dbg("check in failure, errcode=%d, Drop a packet (nexthdr=%u, hdr_len=%u)", + offset, niph.nexthdr, niph.hdr_len); + goto drop; + } + + if (niph.nexthdr != IPPROTO_UDP && niph.nexthdr != IPPROTO_TCP && + niph.nexthdr != IPPROTO_NIP_ICMP) { + nip_dbg("nexthdr(%u) invalid, Drop a packet", niph.nexthdr); + goto drop; + } + + niph.total_len = ntohs(niph.total_len); + NIPCB(skb)->dstaddr = niph.daddr; + NIPCB(skb)->srcaddr = niph.saddr; + NIPCB(skb)->nexthdr = niph.nexthdr; + skb->transport_header = skb->network_header + offset; + skb_orphan(skb); + + /* SKB refreshes the length after replication */ + if (_nip_update_recv_skb_len(skb, &niph)) + goto drop; + + return nip_rcv_finish(skb); +drop: + kfree_skb(skb); +out: + return NET_RX_DROP; +} + +/* Deliver the packet to transport layer, + * including TCP, UDP and ICMP. + * Caller must hold rcu. + */ +void nip_protocol_deliver_rcu(struct sk_buff *skb) +{ + const struct ninet_protocol *ipprot; + + if (!pskb_pull(skb, skb_transport_offset(skb))) + goto discard; + + ipprot = rcu_dereference(ninet_protos[NIPCB(skb)->nexthdr]); + if (ipprot) { + ipprot->handler(skb); + } else { + kfree_skb(skb); + nip_dbg("not found transport protol, drop this packet"); + } + return; + +discard: + kfree_skb(skb); +} + +/* Generally called by dst_input */ +int nip_input(struct sk_buff *skb) +{ + rcu_read_lock(); + nip_protocol_deliver_rcu(skb); + rcu_read_unlock(); + + return 0; +} diff --git a/newip/src/linux-5.10/net/newip/nip_output.c b/newip/src/linux-5.10/net/newip/nip_output.c new file mode 100644 index 0000000000000000000000000000000000000000..bbea85db00625b8caa5365c304816c68a1636fc8 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/nip_output.c @@ -0,0 +1,523 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP output functions + * Linux NewIP INET implementation + * + * Based on net/ipv6/ip6_output.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nip_hdr.h" +#include "nip_checksum.h" +#include "tcp_nip_parameter.h" + +#define NIP_BIT_TO_BYTE 1024 +void update_memory_rate(const char *upper_fun) +{ + struct sysinfo mem_info; + unsigned long total; + unsigned long free; + unsigned long used; + unsigned int uint_kb; + + si_meminfo(&mem_info); + uint_kb = mem_info.mem_unit / NIP_BIT_TO_BYTE; + total = (unsigned long)mem_info.totalram * uint_kb; + free = (unsigned long)mem_info.freeram * uint_kb; + used = total - free; + nip_dbg("%s call cur-func mem total: %ld KB, mem used: %ld KB", upper_fun, total, used); +} + +int nip_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct nip_addr *nexthop; + struct neighbour *neigh; + int ret = 0; + struct net_device *dev = skb_dst(skb)->dev; + + skb->protocol = htons(ETH_P_NEWIP); + skb->dev = dev; + + /* prepare to build ethernet header */ + nexthop = nip_nexthop((struct nip_rt_info *)dst, &NIPCB(skb)->dstaddr); + + rcu_read_lock_bh(); + + neigh = __nip_neigh_lookup_noref(dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&nnd_tbl, nexthop, dev, false); + if (!IS_ERR(neigh)) { + int res = neigh_output(neigh, skb, false); + + rcu_read_unlock_bh(); + return res; + } + nip_dbg("find neigh and create neigh failed"); + + rcu_read_unlock_bh(); + kfree_skb(skb); + return ret; +} + +int nip_forward(struct sk_buff *skb) +{ + return nip_output(NULL, NULL, skb); +} + +static int nip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + int err; + + err = dst_output(net, sk, skb); + return err; +} + +int nip_send_skb(struct sk_buff *skb) +{ + struct net *net; + int err = 0; + + net = skb->sk ? sock_net(skb->sk) : dev_net(skb_dst(skb)->dev); + err = nip_local_out(net, skb->sk, skb); + if (err) { + if (err > 0) + err = net_xmit_errno(err); + nip_dbg("failed to out skb, err = %d", err); + } + + return err; +} + +unsigned short nip_get_output_checksum(struct sk_buff *skb, + struct nip_hdr_encap *head) +{ + struct nip_pseudo_header nph = {0}; + u8 *udp_hdr = skb_transport_header(skb); + unsigned short check_len = head->trans_hdr_len + head->usr_data_len; + + nph.nexthdr = IPPROTO_UDP; + nph.saddr = NIPCB(skb)->srcaddr; + nph.daddr = NIPCB(skb)->dstaddr; + nph.check_len = htons(check_len); + return nip_check_sum_build(udp_hdr, check_len, &nph); +} + +static struct sk_buff *_nip_alloc_skb(struct sock *sk, + struct nip_hdr_encap *head, + struct nip_pkt_seg_info *seg_info, + struct dst_entry *dst) +{ + int len; + int nip_hdr_len = get_nip_hdr_len(NIP_HDR_UDP, &head->saddr, &head->daddr); + struct sk_buff *skb; + + nip_hdr_len = nip_hdr_len == 0 ? NIP_HDR_MAX : nip_hdr_len; + len = NIP_ETH_HDR_LEN + nip_hdr_len + head->trans_hdr_len + seg_info->mid_usr_pkt_len; + skb = alloc_skb(len, 0); + if (!skb) { + nip_dbg("no space for skb"); + return NULL; + } + + skb->protocol = htons(ETH_P_NEWIP); + skb->ip_summed = CHECKSUM_NONE; + skb->csum = 0; + skb->sk = sk; + + dst_hold(dst); + nip_dbg("malloc_len=%d, dst->__refcnt=%u", len, atomic_read(&dst->__refcnt)); + skb_dst_set(skb, dst); + memset(NIPCB(skb), 0, sizeof(struct ninet_skb_parm)); + + return skb; +} + +static int _nip_udp_single_output(struct sock *sk, + struct nip_hdr_encap *head, + struct nip_pkt_seg_info *seg_info, + struct dst_entry *dst) +{ + int len; + int ret; + struct msghdr *from = (struct msghdr *)head->usr_data; + struct sk_buff *skb = _nip_alloc_skb(sk, head, seg_info, dst); + unsigned short check = 0; + + if (IS_ERR_OR_NULL(skb)) { + nip_dbg("skb alloc fail"); + return -ENOMEM; + } + + /* Reserved Position of the Ethernet header (to be filled after the + * Ethernet header is delivered to the link layer) + */ + skb_reserve(skb, NIP_ETH_HDR_LEN); + + /* Fill in the Network-layer Header (newIP) */ + skb_reset_network_header(skb); + head->hdr_buf = skb->data; + nip_hdr_udp_encap(head); + skb_reserve(skb, head->hdr_buf_pos); + NIPCB(skb)->dstaddr = head->daddr; + NIPCB(skb)->srcaddr = head->saddr; + NIPCB(skb)->nexthdr = IPPROTO_UDP; + + /* Fill in the Transport Layer Header (UDP) */ + skb_reset_transport_header(skb); + nip_build_udp_hdr(head->sport, head->dport, + htons(head->trans_hdr_len + head->usr_data_len), + skb->data, htons(0)); + skb_reserve(skb, head->trans_hdr_len); + len = copy_from_iter(skb->data, head->usr_data_len, &from->msg_iter); + if (len < 0) { + /* The DST has been set to the SKB. When the SKB is released, + * the DST is automatically released + */ + nip_dbg("copy from iter fail (datalen=%u)", head->usr_data_len); + kfree_skb(skb); + return -EFBIG; + } + + /* insert check sum */ + check = nip_get_output_checksum(skb, head); + nip_build_udp_hdr(head->sport, head->dport, + htons(head->trans_hdr_len + head->usr_data_len), + skb->data - head->trans_hdr_len, htons(check)); + + /* Refresh the data/tail of the SKB after the packet copy is complete */ + skb_put(skb, head->usr_data_len); + skb->data = skb_network_header(skb); + skb->len = head->hdr_buf_pos + head->trans_hdr_len + + head->usr_data_len; + + /* Add the actual size of the current SKB to the SOCK send cache count + * and set destructor to __sock_wfree to reduce the SOCK send cache size + * when the SKB is released. + */ + skb->destructor = __sock_wfree; + refcount_add(skb->truesize, &sk->sk_wmem_alloc); + skb->priority = sk->sk_priority; + + ret = nip_send_skb(skb); + nip_dbg("output finish (ret=%d, datalen=%u)", ret, head->usr_data_len); + update_memory_rate(__func__); + return ret; +} + +int _nip_udp_output(struct sock *sk, void *from, int datalen, + int transhdrlen, const struct nip_addr *saddr, + ushort sport, const struct nip_addr *daddr, + ushort dport, struct dst_entry *dst) +{ + int i; + u32 ret = 0; + u32 mtu = dst_mtu(dst); + struct nip_pkt_seg_info seg_info = {0}; + struct nip_hdr_encap head = {0}; + int nip_hdr_len = get_nip_hdr_len(NIP_HDR_UDP, saddr, daddr); + + head.saddr = *saddr; + head.daddr = *daddr; + head.sport = sport; + head.dport = dport; + head.usr_data = from; + head.ttl = NIP_DEFAULT_TTL; + head.nexthdr = IPPROTO_UDP; + head.trans_hdr_len = transhdrlen; + + nip_hdr_len = nip_hdr_len == 0 ? NIP_HDR_MAX : nip_hdr_len; + nip_calc_pkt_frag_num(mtu, nip_hdr_len, datalen, &seg_info); + + /* Send intermediate data segments */ + for (i = 0; i < seg_info.mid_pkt_num; i++) { + head.usr_data_len = seg_info.mid_usr_pkt_len; + ret = _nip_udp_single_output(sk, &head, &seg_info, dst); + if (ret) + goto end; + } + + /* Send the last data segment */ + if (seg_info.last_pkt_num) { + head.usr_data_len = seg_info.last_usr_pkt_len; + ret = _nip_udp_single_output(sk, &head, &seg_info, dst); + } + +end: + return ret; +} + +static int nip_sk_dst_check(struct dst_entry *dst, + struct flow_nip *fln) +{ + int err = 0; + + if (!dst) + goto out; + + if (fln->flowin_oif && fln->flowin_oif != dst->dev->ifindex) + err = -EPERM; + +out: + return err; +} + +/* 1. Based on FLN, the routing table is searched to obtain the corresponding DST + * 2. The newIP address of the source end is obtained based on the routing table + * search result and stored in the fln->saddr + */ +static int nip_dst_lookup_tail(struct net *net, const struct sock *sk, + struct dst_entry **dst, struct flow_nip *fln) +{ + int err; + struct nip_rt_info *rt; + + if (!(*dst)) + *dst = nip_route_output(net, sk, fln); + + err = (*dst)->error; + if (err) { + rt = NULL; + nip_dbg("route output search error"); + goto out_err_release; + } + + err = nip_sk_dst_check(*dst, fln); + if (err) + goto out_err_release; + + rt = (struct nip_rt_info *)*dst; + if (*dst == &net->newip.nip_broadcast_entry->dst) { + fln->saddr = fln->daddr; + err = 0; + } else { + err = nip_route_get_saddr(net, rt, &fln->daddr, &fln->saddr); + } + + if (err) + goto out_err_release; + + return 0; + +out_err_release: + dst_release(*dst); + *dst = NULL; + + return err; +} + +struct dst_entry *nip_dst_lookup_flow(struct net *net, const struct sock *sk, + struct flow_nip *fln, + const struct nip_addr *final_dst) +{ + struct dst_entry *dst = NULL; + int err; + + err = nip_dst_lookup_tail(net, sk, &dst, fln); + if (err) + return ERR_PTR(err); + if (final_dst) + fln->daddr = *final_dst; + + return dst; +} + +struct dst_entry *nip_sk_dst_lookup_flow(struct sock *sk, struct flow_nip *fln) +{ + struct dst_entry *dst = NULL; + int err; + + err = nip_dst_lookup_tail(sock_net(sk), sk, &dst, fln); + if (err) + return ERR_PTR(err); + + return dst; +} + +int tcp_nip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) +{ + int err = -EHOSTUNREACH; + struct net *net = sock_net(sk); + struct nip_addr *saddr, *daddr; + struct dst_entry *dst; + struct flow_nip fln; + struct nip_hdr_encap head = {0}; + unsigned char hdr_buf[NIP_HDR_MAX]; /* Cache the newIP header */ + + rcu_read_lock(); + skb->protocol = htons(ETH_P_NEWIP); + skb->ip_summed = CHECKSUM_NONE; + skb->csum = 0; + saddr = &sk->sk_nip_rcv_saddr; + daddr = &sk->sk_nip_daddr; + + head.saddr = *saddr; + head.daddr = *daddr; + head.ttl = NIP_DEFAULT_TTL; + head.nexthdr = IPPROTO_TCP; + head.hdr_buf = hdr_buf; + nip_hdr_comm_encap(&head); + head.total_len = head.hdr_buf_pos + skb->len; + nip_update_total_len(&head, htons(head.total_len)); + + fln.daddr = sk->sk_nip_daddr; + dst = __sk_dst_check(sk, 0); + if (!dst) { + nip_dbg("no dst cache for sk, search newip rt"); + dst = nip_route_output(net, sk, &fln); + if (!dst) { + nip_dbg("cannot find dst"); + goto out; + } + if (dst->error) + goto out_err_release; + sk_dst_set(sk, dst); + } + skb_dst_set_noref(skb, dst); + + /* build nwk header */ + skb_push(skb, head.hdr_buf_pos); + memcpy(skb->data, head.hdr_buf, head.hdr_buf_pos); + + skb_reset_network_header(skb); + NIPCB(skb)->srcaddr = *saddr; + NIPCB(skb)->dstaddr = *daddr; + NIPCB(skb)->nexthdr = head.nexthdr; + + skb->priority = sk->sk_priority; + head.total_len = skb->len; + err = nip_send_skb(skb); + if (err) + nip_dbg("failed to send skb, skb->len=%u", head.total_len); + else + nip_dbg("send skb ok, skb->len=%u", head.total_len); + +out: + rcu_read_unlock(); + return err; + +out_err_release: + dst_release(dst); + dst = NULL; + sk->sk_err_soft = -err; + sk->sk_route_caps = 0; + kfree_skb(skb); + return err; +} + +void tcp_nip_actual_send_reset(struct sock *sk, struct sk_buff *skb, u32 seq, + u32 ack_seq, u32 win, int rst, u32 priority) +{ + const struct tcphdr *th = tcp_hdr(skb); + struct tcphdr *t1; + struct sk_buff *buff; + struct flow_nip fln; + struct net *net; + struct nip_addr *saddr, *daddr; + unsigned int tot_len = sizeof(struct tcphdr); + struct nip_hdr_encap head = {0}; + unsigned char hdr_buf[NIP_HDR_MAX]; + struct dst_entry *dst; + int err; + + net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); + + /* alloc skb */ + buff = alloc_skb(MAX_TCP_HEADER, priority); + if (!buff) + /* If you add log here, there will be an alarm: + * WARNING: Possible unnecessary 'out of memory' message + */ + return; + + skb_reserve(buff, MAX_TCP_HEADER); + + buff->sk = sk; // sk could be NULL + saddr = &(NIPCB(skb)->dstaddr); + daddr = &(NIPCB(skb)->srcaddr); + + /* Fill in tcp header */ + t1 = skb_push(buff, sizeof(struct tcphdr)); + skb_reset_transport_header(buff); + memset(t1, 0, sizeof(*t1)); + t1->dest = th->source; + t1->source = th->dest; + t1->doff = tot_len / TCP_NUM_4; + t1->seq = htonl(seq); + t1->ack_seq = htonl(ack_seq); + t1->ack = !rst || !th->ack; + t1->rst = rst; + t1->window = htons(win); + t1->check = htons(nip_get_output_checksum_tcp(buff, *saddr, *daddr)); + nip_dbg("host dport=%u, net dport=0x%x, host sport=%u, net sport=0x%x", + ntohs(t1->dest), t1->dest, ntohs(t1->source), t1->source); + nip_dbg("host seq=%u, net seq=0x%x, host ack_seq=%u, net ack_seq=0x%x", + seq, t1->seq, ack_seq, t1->ack_seq); + + buff->protocol = htons(ETH_P_NEWIP); + buff->ip_summed = CHECKSUM_NONE; + buff->csum = 0; + + /* Fill in nip header */ + head.saddr = *saddr; + head.daddr = *daddr; + head.ttl = NIP_DEFAULT_TTL; + head.nexthdr = IPPROTO_TCP; + head.hdr_buf = hdr_buf; + nip_hdr_comm_encap(&head); + head.total_len = head.hdr_buf_pos + buff->len; + nip_update_total_len(&head, htons(head.total_len)); + + /* Check routine */ + fln.daddr = *daddr; + dst = nip_route_output(net, sk, &fln); // here, sk not used. + if (!dst) { + nip_dbg("cannot find dst"); + goto out; + } + skb_dst_set_noref(buff, dst); + + /* Build newip header */ + skb_push(buff, head.hdr_buf_pos); + memcpy(buff->data, head.hdr_buf, head.hdr_buf_pos); + + skb_reset_network_header(buff); + NIPCB(buff)->srcaddr = *saddr; + NIPCB(buff)->dstaddr = *daddr; + NIPCB(buff)->nexthdr = head.nexthdr; + + buff->priority = priority; + head.total_len = buff->len; + err = nip_send_skb(buff); + if (err) + nip_dbg("failed to send skb, skb->len=%u", head.total_len); + else + nip_dbg("send skb ok, skb->len=%u", head.total_len); + +out: + return; +} diff --git a/newip/src/linux-5.10/net/newip/nip_sockglue.c b/newip/src/linux-5.10/net/newip/nip_sockglue.c new file mode 100644 index 0000000000000000000000000000000000000000..1ae978e0a24bd8cc9acffc79271f7c30de5d2542 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/nip_sockglue.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * The NewIP to API glue. + * + * Based on net/ipv4/ip_sockglue.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "tcp_nip_parameter.h" + +#define NIP_OPTNAME_MAX 255 + +static void __nip_set_sock_tos(struct sock *sk, int val) +{ + sk->sk_priority = rt_tos2priority(val); + sk_dst_reset(sk); +} + +static bool nip_setsockopt_needs_rtnl(int optname) +{ + switch (optname) { + case IP_MSFILTER: + return true; + } + return false; +} + +static bool nip_getsockopt_needs_rtnl(int optname) +{ + switch (optname) { + case IP_MSFILTER: + return true; + } + return false; +} + +static int do_nip_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen) +{ + struct inet_sock *inet = inet_sk(sk); + int val = 0; + int err = 0; + bool needs_rtnl = nip_setsockopt_needs_rtnl(optname); + + if (optlen >= sizeof(int)) { + if (copy_from_sockptr(&val, optval, sizeof(val))) + return -EFAULT; + } else if (optlen >= sizeof(char)) { + unsigned char ucval; + + if (copy_from_sockptr(&ucval, optval, sizeof(ucval))) + return -EFAULT; + val = (int)ucval; + } + + if (needs_rtnl) + rtnl_lock(); + lock_sock(sk); + + switch (optname) { + case IP_TOS: + inet->tos = val; + __nip_set_sock_tos(sk, val); + break; + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); + + return err; +} + +int nip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen) +{ + int err; + + if (level != SOL_IP) + return -ENOPROTOOPT; + + err = do_nip_setsockopt(sk, level, optname, optval, optlen); + + return err; +} + +static int do_nip_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct inet_sock *inet = inet_sk(sk); + bool needs_rtnl = nip_getsockopt_needs_rtnl(optname); + int val, err = 0; + int len; + + if (level != SOL_IP) + return -EOPNOTSUPP; + if (get_user(len, optlen)) + return -EFAULT; + if (len < 0) + return -EINVAL; + + if (needs_rtnl) + rtnl_lock(); + lock_sock(sk); + + switch (optname) { + case IP_TOS: + val = inet->tos; + break; + default: + err = -ENOPROTOOPT; + goto out; + } + + if (len < sizeof(int) && len > 0 && val >= 0 && val <= NIP_OPTNAME_MAX) { + unsigned char ucval = (unsigned char)val; + + len = 1; + if (put_user(len, optlen)) { + err = -EFAULT; + goto out; + } + if (copy_to_user(optval, &ucval, 1)) { + err = -EFAULT; + goto out; + } + } else { + len = min_t(unsigned int, sizeof(int), len); + if (put_user(len, optlen)) { + err = -EFAULT; + goto out; + } + if (copy_to_user(optval, &val, len)) { + err = -EFAULT; + goto out; + } + } +out: + release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); + + return err; +} + +int nip_getsockopt(struct sock *sk, int level, + int optname, char __user *optval, int __user *optlen) +{ + return do_nip_getsockopt(sk, level, optname, optval, optlen); +} + diff --git a/newip/src/linux-5.10/net/newip/nndisc.c b/newip/src/linux-5.10/net/newip/nndisc.c new file mode 100644 index 0000000000000000000000000000000000000000..59d1a2a4b69db83cc2e3ff99feb4e409770486c9 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/nndisc.c @@ -0,0 +1,579 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * Neighbour Discovery for NewIP + * Linux NewIP INET implementation + * + * Based on net/ipv6/ndisc.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nip_hdr.h" +#include "nip_checksum.h" +#include "tcp_nip_parameter.h" + +/* NUD_INCOMPLETE + * The neighbor request packet has been sent but no response has been received + * NUD_REACHABLE + * Reachable: Indicates that the neighbor is reachable + * NUD_STAL + * Idle state, which has not been confirmed for a long time, + * and the idle time exceeds the rated time + * NUD_DELAY + * If the acknowledgment time expires but the idle time does not exceed the rated time, + * you need to obtain the acknowledgment packet + * NUD_PROBE + * After NUD_DELAY does not receive confirmation for a long time, ARP request messages are sent + * NUD_FAILED + * The neighbor is unreachable + * NUD_NOARP + * Indicates the status of the neighbor that does not need the ARP status change + * NUD_PERMANENT + * Indicates that the status of the neighbor item is permanent and does not need to change + * NUD_NONE + * Initialization status of the neighbor item + */ +static void nndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); + +static u32 nndisc_hash(const void *pkey, + const struct net_device *dev, __u32 *hash_rnd); +static bool nndisc_key_eq(const struct neighbour *neigh, const void *pkey); +static int nndisc_constructor(struct neighbour *neigh); + +static void nndisc_error_report(struct neighbour *neigh, struct sk_buff *skb) +{ + kfree_skb(skb); +} + +static const struct neigh_ops nndisc_generic_ops = { + .family = AF_NINET, + .solicit = nndisc_solicit, + .output = neigh_resolve_output, + .connected_output = neigh_connected_output, +}; + +static const struct neigh_ops nndisc_hh_ops = { + .family = AF_NINET, + .solicit = nndisc_solicit, + .error_report = nndisc_error_report, + .output = neigh_resolve_output, + .connected_output = neigh_resolve_output, +}; + +static const struct neigh_ops nndisc_direct_ops = { + .family = AF_NINET, + .output = neigh_direct_output, + .connected_output = neigh_direct_output, +}; + +#define NIP_NEIGH_MCAST_PROBES 4 +#define NIP_NEIGH_UCAST_PROBES 4 +#define NIP_NEIGH_DELAY_PROBE_TIME (5 * HZ) +#define NIP_NEIGH_GC_STALETIME (60 * HZ) +#define NIP_NEIGH_QUEUE_LEN_BYTES (64 * 1024) +#define NIP_NEIGH_PROXY_QLEN 64 +#define NIP_NEIGH_ANYCAST_DELAY (1 * HZ) +#define NIP_NEIGH_PROXY_DELAY ((8 * HZ) / 10) +#define NIP_NEIGH_GC_INTERVAL (30 * HZ) +#define NIP_NEIGH_GC_THRESH_1 128 +#define NIP_NEIGH_GC_THRESH_2 512 +#define NIP_NEIGH_GC_THRESH_3 1024 + +struct neigh_table nnd_tbl = { + .family = AF_NINET, + .key_len = sizeof(struct nip_addr), + .protocol = cpu_to_be16(ETH_P_NEWIP), + .hash = nndisc_hash, + .key_eq = nndisc_key_eq, + .constructor = nndisc_constructor, + .id = "nndisc_cache", + .parms = { + .tbl = &nnd_tbl, + .reachable_time = ND_REACHABLE_TIME, + .data = { + [NEIGH_VAR_MCAST_PROBES] = NIP_NEIGH_MCAST_PROBES, + [NEIGH_VAR_UCAST_PROBES] = NIP_NEIGH_UCAST_PROBES, + [NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER, + [NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME, + [NEIGH_VAR_DELAY_PROBE_TIME] = NIP_NEIGH_DELAY_PROBE_TIME, + [NEIGH_VAR_GC_STALETIME] = NIP_NEIGH_GC_STALETIME, + [NEIGH_VAR_QUEUE_LEN_BYTES] = NIP_NEIGH_QUEUE_LEN_BYTES, + [NEIGH_VAR_PROXY_QLEN] = NIP_NEIGH_PROXY_QLEN, + [NEIGH_VAR_ANYCAST_DELAY] = NIP_NEIGH_ANYCAST_DELAY, + [NEIGH_VAR_PROXY_DELAY] = NIP_NEIGH_PROXY_DELAY, + }, + }, + .gc_interval = NIP_NEIGH_GC_INTERVAL, + .gc_thresh1 = NIP_NEIGH_GC_THRESH_1, + .gc_thresh2 = NIP_NEIGH_GC_THRESH_2, + .gc_thresh3 = NIP_NEIGH_GC_THRESH_3, +}; + +static u32 nndisc_hash(const void *pkey, + const struct net_device *dev, __u32 *hash_rnd) +{ + return nndisc_hashfn(pkey, dev, hash_rnd); +} + +static bool nndisc_key_eq(const struct neighbour *neigh, const void *pkey) +{ + return neigh_key_eq800(neigh, pkey); +} + +static int nndisc_constructor(struct neighbour *neigh) +{ + struct nip_addr *addr = (struct nip_addr *)&neigh->primary_key; + struct net_device *dev = neigh->dev; + struct ninet_dev *nin_dev; + struct neigh_parms *parms; + bool is_broadcast = (bool)nip_addr_eq(addr, &nip_broadcast_addr_arp); + + nin_dev = nin_dev_get(dev); + if (!nin_dev) + return -EINVAL; + + parms = nin_dev->nd_parms; + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); + neigh->type = RTN_UNICAST; + if (!dev->header_ops) { + neigh->nud_state = NUD_NOARP; + neigh->ops = &nndisc_direct_ops; + neigh->output = neigh_direct_output; + } else { + if (is_broadcast || + (dev->flags & IFF_POINTOPOINT)) { + neigh->nud_state = NUD_NOARP; + memcpy(neigh->ha, dev->broadcast, dev->addr_len); + } else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) { + neigh->nud_state = NUD_NOARP; + memcpy(neigh->ha, dev->dev_addr, dev->addr_len); + if (dev->flags & IFF_LOOPBACK) + neigh->type = RTN_LOCAL; + } + + if (dev->header_ops->cache) + neigh->ops = &nndisc_hh_ops; + else + neigh->ops = &nndisc_generic_ops; + + if (neigh->nud_state & NUD_VALID) + neigh->output = neigh->ops->connected_output; + else + neigh->output = neigh->ops->output; + } + + nin_dev_put(nin_dev); + + return 0; +} + +void nip_insert_nndisc_send_checksum(struct sk_buff *skb, u_short checksum) +{ +#define NNDISC_CHECKSUM_BIAS 2 + *(__u16 *)(skb_transport_header(skb) + NNDISC_CHECKSUM_BIAS) = + htons(checksum); +} + +unsigned short nip_get_nndisc_send_checksum(struct sk_buff *skb, + struct nip_hdr_encap *head, + int payload_len) +{ + struct nip_pseudo_header nph = {0}; + + nph.nexthdr = head->nexthdr; + nph.saddr = head->saddr; + nph.daddr = head->daddr; + nph.check_len = htons(payload_len); + return nip_check_sum_build(skb_transport_header(skb), + payload_len, &nph); +} + +bool nip_get_nndisc_rcv_checksum(struct sk_buff *skb, + const u_char *transport_tail) +{ + struct nip_pseudo_header nph = {0}; + unsigned short check_len = (unsigned short)(transport_tail - (skb_transport_header(skb))); + + nph.nexthdr = NIPCB(skb)->nexthdr; + nph.saddr = NIPCB(skb)->srcaddr; + nph.daddr = NIPCB(skb)->dstaddr; + nph.check_len = htons(check_len); + + return nip_check_sum_parse(skb_transport_header(skb), check_len, &nph) + == 0xffff ? true : false; +} + +static void nndisc_payload_ns_pack(const struct nip_addr *solicit, + struct sk_buff *skb) +{ + struct nnd_msg *msg = (struct nnd_msg *)skb->data; + u_char *p = msg->data; + + memset(&msg->icmph, 0, sizeof(msg->icmph)); + msg->icmph.nip_icmp_type = NIP_ARP_NS; + msg->icmph.nip_icmp_cksum = 0; + p = build_nip_addr(solicit, p); +} + +static struct dst_entry *nndisc_dst_alloc(struct net_device *dev) +{ + struct nip_rt_info *rt; + struct net *net = dev_net(dev); + + rt = nip_dst_alloc(net, dev, 0); + if (!rt) + return NULL; + + rt->dst.flags |= DST_HOST; + rt->dst.input = nip_input; + rt->dst.output = nip_output; + atomic_set(&rt->dst.__refcnt, 1); + + return &rt->dst; +} + +static int get_ns_payload_len(const struct nip_addr *solicit) +{ + return sizeof(struct nip_icmp_hdr) + get_nip_addr_len(solicit); +} + +static int nndisc_send_skb(struct net_device *dev, + struct sk_buff *skb, struct nip_hdr_encap *head, + const int payload_len) +{ + int ret = 0; + struct sock *sk = NULL; + struct dst_entry *dst = NULL; + u_short checksum = 0; + + /* skip transport hdr */ + skb_reserve(skb, payload_len); + + /* set skb->data to point network header */ + skb->data = skb_network_header(skb); + skb->len = head->hdr_buf_pos + payload_len; + + dst = nndisc_dst_alloc(dev); + if (!dst) { + kfree_skb(skb); + return -ENOMEM; + } + /* add check sum */ + checksum = nip_get_nndisc_send_checksum(skb, head, payload_len); + nip_insert_nndisc_send_checksum(skb, checksum); + + skb_dst_set(skb, dst); + ret = dst_output(dev_net(skb->dev), sk, skb); + return ret; +} + +static struct sk_buff *nndisc_alloc_skb(struct net_device *dev, + struct nip_hdr_encap *head, int payload_len) +{ + struct sk_buff *skb = NULL; + int len = NIP_ETH_HDR_LEN + NIP_HDR_MAX + payload_len; + + skb = alloc_skb(len, 0); + if (!skb) + /* If you add log here, there will be an alarm: + * WARNING: Possible unnecessary 'out of memory' message + */ + return skb; + + skb->protocol = htons(ETH_P_NEWIP); + skb->ip_summed = CHECKSUM_NONE; + skb->csum = 0; + skb->dev = dev; + memset(NIPCB(skb), 0, sizeof(struct ninet_skb_parm)); + + NIPCB(skb)->dstaddr = head->daddr; + NIPCB(skb)->srcaddr = head->saddr; + NIPCB(skb)->nexthdr = head->nexthdr; + /* reserve space for hardware header */ + skb_reserve(skb, NIP_ETH_HDR_LEN); + skb_reset_network_header(skb); + + /* build nwk header */ + head->hdr_buf = (unsigned char *)skb->data; + nip_hdr_comm_encap(head); + head->total_len = head->hdr_buf_pos + payload_len; + nip_update_total_len(head, htons(head->total_len)); + skb_reserve(skb, head->hdr_buf_pos); + skb_reset_transport_header(skb); + return skb; +} + +static void nndisc_send_ns(struct net_device *dev, + const struct nip_addr *solicit, + const struct nip_addr *daddr, + const struct nip_addr *saddr) +{ + int ret; + struct sk_buff *skb; + int payload_len = get_ns_payload_len(solicit); + struct nip_hdr_encap head = {0}; + + head.saddr = *saddr; + head.daddr = *daddr; + head.ttl = NIP_ARP_DEFAULT_TTL; + head.nexthdr = IPPROTO_NIP_ICMP; + + skb = nndisc_alloc_skb(dev, &head, payload_len); + if (!skb) + /* If you add log here, there will be an alarm: + * WARNING: Possible unnecessary 'out of memory' message + */ + return; + /* build ns header */ + nndisc_payload_ns_pack(solicit, skb); + + ret = nndisc_send_skb(dev, skb, &head, payload_len); + if (ret) + nip_dbg("dst output fail"); +} + +static void nndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) +{ + struct net_device *dev = neigh->dev; + struct nip_addr *target = (struct nip_addr *)&neigh->primary_key; + struct nip_addr *saddr = NULL; + struct ninet_dev *idev; + + /* Obtain the NewIP address from the current dev as + * the source address of the request packet + */ + rcu_read_lock(); + idev = __nin_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + if (!list_empty(&idev->addr_list)) { + struct ninet_ifaddr *ifp; + + list_for_each_entry(ifp, &idev->addr_list, if_list) { + saddr = &ifp->addr; + nndisc_send_ns(dev, target, + &nip_broadcast_addr_arp, + saddr); + } + } + read_unlock_bh(&idev->lock); + } else { + nip_dbg("idev don't exist"); + } + rcu_read_unlock(); +} + +static void build_na_hdr(u_char *smac, u_char mac_len, struct sk_buff *skb) +{ + struct nnd_msg *msg = (struct nnd_msg *)skb->data; + u_char *p = msg->data; + + memset(&msg->icmph, 0, sizeof(msg->icmph)); + msg->icmph.nip_icmp_type = NIP_ARP_NA; + msg->icmph.nip_icmp_cksum = 0; + *p = mac_len; + p++; + memcpy(p, smac, mac_len); +} + +static int get_na_payload_len(struct net_device *dev) +{ + /* Icmp Header Length + * Number of bytes in the MAC address length field + * MAC Address Length + */ + return sizeof(struct nip_icmp_hdr) + 1 + dev->addr_len; +} + +static void nndisc_send_na(struct net_device *dev, + const struct nip_addr *daddr, + const struct nip_addr *saddr) +{ + int ret; + struct sk_buff *skb = NULL; + int payload_len = get_na_payload_len(dev); + u_char *smac = dev->dev_addr; + struct nip_hdr_encap head = {0}; + + head.saddr = *saddr; + head.daddr = *daddr; + head.ttl = NIP_ARP_DEFAULT_TTL; + head.nexthdr = IPPROTO_NIP_ICMP; + + skb = nndisc_alloc_skb(dev, &head, payload_len); + if (!skb) + /* If you add log here, there will be an alarm: + * WARNING: Possible unnecessary 'out of memory' message + */ + return; + /* build na header */ + build_na_hdr(smac, dev->addr_len, skb); + + ret = nndisc_send_skb(dev, skb, &head, payload_len); + if (ret) + nip_dbg("dst output fail"); +} + +bool nip_addr_local(struct net_device *dev, struct nip_addr *addr) +{ + struct ninet_dev *idev; + bool ret = false; + + rcu_read_lock(); + idev = __nin_dev_get(dev); + if (!idev) + goto out; + + read_lock_bh(&idev->lock); + if (!list_empty(&idev->addr_list)) { + struct ninet_ifaddr *ifp; + + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (nip_addr_eq(addr, &ifp->addr)) { + ret = true; + break; + } + } + } + read_unlock_bh(&idev->lock); +out: + rcu_read_unlock(); + return ret; +} + +int nndisc_rcv_ns(struct sk_buff *skb) +{ + struct nnd_msg *msg = (struct nnd_msg *)skb_transport_header(skb); + u_char *p = msg->data; + u_char *lladdr; + struct nip_addr addr = {0}; + struct neighbour *neigh; + struct ethhdr *eth; + struct net_device *dev = skb->dev; + int err = 0; + + p = decode_nip_addr(p, &addr); + if (!p) { + nip_dbg("failure when decode source address"); + err = -EFAULT; + goto out; + } + + if (nip_addr_invalid(&addr)) { + nip_dbg("icmp hdr addr invalid, bitlen=%u", addr.bitlen); + err = -EFAULT; + goto out; + } + + if (!nip_addr_local(dev, &addr)) { + err = -ENXIO; + goto out; + } + + eth = (struct ethhdr *)skb_mac_header(skb); + lladdr = eth->h_source; + + /* checksum parse */ + if (!nip_get_nndisc_rcv_checksum(skb, p)) { + nip_dbg("ns ICMP checksum failed, drop the packet"); + err = -EINVAL; + goto out; + } + + neigh = __neigh_lookup(&nnd_tbl, &NIPCB(skb)->srcaddr, dev, lladdr || !dev->addr_len); + if (neigh) { + neigh_update(neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_OVERRIDE, 0); + neigh_release(neigh); + } + + nndisc_send_na(dev, &NIPCB(skb)->srcaddr, &addr); +out: + kfree_skb(skb); + return err; +} + +int nndisc_rcv_na(struct sk_buff *skb) +{ + struct nnd_msg *msg = (struct nnd_msg *)skb_transport_header(skb); + u_char *p = msg->data; + u_char len; + u8 lladdr[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; + struct net_device *dev = skb->dev; + struct neighbour *neigh; + + len = *p; + p++; + memset(lladdr, 0, ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))); + memcpy(lladdr, p, len); + + if (!nip_get_nndisc_rcv_checksum(skb, p + len)) { + nip_dbg("na ICMP checksum failed, drop the packet"); + kfree_skb(skb); + return 0; + } + + neigh = neigh_lookup(&nnd_tbl, &NIPCB(skb)->srcaddr, dev); + if (neigh) { + neigh_update(neigh, lladdr, NUD_REACHABLE, NEIGH_UPDATE_F_OVERRIDE, 0); + neigh_release(neigh); + kfree_skb(skb); + return 0; + } + kfree_skb(skb); + return -EFAULT; +} + +int nndisc_rcv(struct sk_buff *skb) +{ + int ret = 0; + struct nip_icmp_hdr *hdr = nip_icmp_header(skb); + u8 type = hdr->nip_icmp_type; + + switch (type) { + case NIP_ARP_NS: + ret = nndisc_rcv_ns(skb); + break; + case NIP_ARP_NA: + ret = nndisc_rcv_na(skb); + break; + default: + nip_dbg("nd packet type error"); + } + + return ret; +} + +int __init nndisc_init(void) +{ + neigh_table_init(NEIGH_NND_TABLE, &nnd_tbl); + return 0; +} diff --git a/newip/src/linux-5.10/net/newip/protocol.c b/newip/src/linux-5.10/net/newip/protocol.c new file mode 100644 index 0000000000000000000000000000000000000000..928917aafc99b161cf56eba70d86546a35e613d2 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/protocol.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * NewIP INET protocol dispatch tables. + * + * Based on net/ipv6/protocol.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include "tcp_nip_parameter.h" + +const struct ninet_protocol __rcu *ninet_protos[MAX_INET_PROTOS] __read_mostly; + +int ninet_add_protocol(const struct ninet_protocol *prot, + unsigned char protocol) +{ + return !cmpxchg((const struct ninet_protocol **)&ninet_protos[protocol], + NULL, prot) ? 0 : -1; +} + +int ninet_del_protocol(const struct ninet_protocol *prot, + unsigned char protocol) +{ + int ret; + + ret = (cmpxchg((const struct ninet_protocol **)&ninet_protos[protocol], + prot, NULL) == prot) ? 0 : -1; + + synchronize_net(); + + return ret; +} + diff --git a/newip/src/linux-5.10/net/newip/route.c b/newip/src/linux-5.10/net/newip/route.c new file mode 100644 index 0000000000000000000000000000000000000000..0ede62a441f6fa3e8cb7ccc992386b7fea3ca0fc --- /dev/null +++ b/newip/src/linux-5.10/net/newip/route.c @@ -0,0 +1,1007 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * ROUTE - implementation of the NewIP router. + * + * Based on net/ipv4/route.c + * Based on net/ipv6/route.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include /* copy_from_user() */ +#include /* rtnl_lock() */ +#include + +#include +#include +#include +#include +#include + +#include +#include "nip_hdr.h" +#include "tcp_nip_parameter.h" + +static int nip_pkt_discard(struct sk_buff *skb); +static int nip_pkt_discard_out(struct net *net, struct sock *sk, + struct sk_buff *skb); +static unsigned int nip_mtu(const struct dst_entry *dst); + +static const struct nip_rt_info nip_null_entry_template = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .obsolete = DST_OBSOLETE_FORCE_CHK, + .error = -ENETUNREACH, + .input = nip_pkt_discard, + .output = nip_pkt_discard_out, + }, + .rt_ref = ATOMIC_INIT(1), +}; + +static const struct nip_rt_info nip_broadcast_entry_template = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .obsolete = DST_OBSOLETE_FORCE_CHK, + .input = nip_input, + .output = nip_output, + }, + .rt_ref = ATOMIC_INIT(1), +}; + +struct nip_addr *nip_nexthop(struct nip_rt_info *rt, struct nip_addr *daddr) +{ + if (rt->rt_flags & RTF_GATEWAY) + return &rt->gateway; + else + return daddr; +} + +static void rtmsg_to_fibni_config(struct net *net, struct nip_rtmsg *rtmsg, + struct nip_fib_config *cfg) +{ + memset(cfg, 0, sizeof(*cfg)); + + cfg->fc_table = NIP_RT_TABLE_MAIN; + cfg->fc_ifindex = rtmsg->rtmsg_ifindex; + cfg->fc_metric = rtmsg->rtmsg_metric; + cfg->fc_expires = rtmsg->rtmsg_info; + + cfg->fc_flags = rtmsg->rtmsg_flags; + + cfg->fc_nlinfo.nl_net = net; + + cfg->fc_dst = rtmsg->rtmsg_dst; + cfg->fc_src = rtmsg->rtmsg_src; + cfg->fc_gateway = rtmsg->rtmsg_gateway; +} + +static void nip_rt_info_init(struct nip_rt_info *rt) +{ + struct dst_entry *dst = &rt->dst; + + memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); + rt->from = NULL; +} + +static struct nip_rt_info *__nip_dst_alloc(struct net *net, + struct net_device *dev, int flags) +{ + struct nip_rt_info *rt = + dst_alloc(&net->newip.nip_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, + flags); + + if (rt) + nip_rt_info_init(rt); + + return rt; +} + +struct nip_rt_info *nip_dst_alloc(struct net *net, struct net_device *dev, + int flags) +{ + struct nip_rt_info *rt = __nip_dst_alloc(net, dev, flags); + + if (rt) { + rt->rt_pcpu = + alloc_percpu_gfp(struct nip_rt_info *, GFP_ATOMIC); + if (rt->rt_pcpu) { + int cpu; + + for_each_possible_cpu(cpu) { + struct nip_rt_info **p; + + p = per_cpu_ptr(rt->rt_pcpu, cpu); + /* no one shares rt */ + *p = NULL; + } + } else { + dst_destroy((struct dst_entry *)rt); + return NULL; + } + } + + return rt; +} + +static void nip_rt_dst_from_metrics_check(struct nip_rt_info *rt) +{ + if (rt->from && + dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->from)) + dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->from), true); +} + +static struct nip_rt_info *nip_rt_get_pcpu_route(struct nip_rt_info *rt) +{ + struct nip_rt_info *pcpu_rt, **p; + + p = this_cpu_ptr(rt->rt_pcpu); + pcpu_rt = *p; + + if (pcpu_rt) { + dst_hold(&pcpu_rt->dst); + nip_rt_dst_from_metrics_check(pcpu_rt); + } + return pcpu_rt; +} + +static void nip_rt_set_from(struct nip_rt_info *rt, struct nip_rt_info *from) +{ + WARN_ON(from->from); + + rt->rt_flags &= ~RTF_EXPIRES; + dst_hold(&from->dst); + rt->from = &from->dst; + dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); +} + +static void nip_rt_copy_init(struct nip_rt_info *rt, struct nip_rt_info *ort) +{ + rt->dst.input = ort->dst.input; + rt->dst.output = ort->dst.output; + rt->rt_dst = ort->rt_dst; + rt->dst.error = ort->dst.error; + rt->rt_idev = ort->rt_idev; + if (rt->rt_idev) + nin_dev_hold(rt->rt_idev); + + rt->dst.lastuse = jiffies; + rt->gateway = ort->gateway; + rt->rt_flags = ort->rt_flags; + nip_rt_set_from(rt, ort); + rt->rt_metric = ort->rt_metric; + rt->rt_table = ort->rt_table; + rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); +} + +static struct nip_rt_info *nip_rt_pcpu_alloc(struct nip_rt_info *rt) +{ + struct nip_rt_info *pcpu_rt; + + pcpu_rt = __nip_dst_alloc(dev_net(rt->dst.dev), + rt->dst.dev, rt->dst.flags); + if (!pcpu_rt) + return NULL; + nip_rt_copy_init(pcpu_rt, rt); + pcpu_rt->rt_protocol = rt->rt_protocol; + pcpu_rt->rt_flags |= RTF_PCPU; + return pcpu_rt; +} + +static struct nip_rt_info *nip_rt_make_pcpu_route(struct nip_rt_info *rt) +{ + struct nip_rt_info *pcpu_rt, *prev; + + pcpu_rt = nip_rt_pcpu_alloc(rt); + if (!pcpu_rt) { + struct net *net = dev_net(rt->dst.dev); + + dst_hold(&net->newip.nip_null_entry->dst); + return net->newip.nip_null_entry; + } + + rcu_read_lock_bh(); + if (rt->rt_pcpu) { + struct nip_rt_info **p = this_cpu_ptr(rt->rt_pcpu); + + prev = cmpxchg(p, NULL, pcpu_rt); + if (prev) { + /* If someone did it before us, return prev instead */ + dst_destroy(&pcpu_rt->dst); + pcpu_rt = prev; + } + } else { + dst_destroy(&pcpu_rt->dst); + pcpu_rt = rt; + } + dst_hold(&pcpu_rt->dst); + nip_rt_dst_from_metrics_check(pcpu_rt); + rcu_read_unlock_bh(); + return pcpu_rt; +} + +static struct nip_rt_info *nip_pol_route_input(struct net *net, + struct nip_fib_table *table, + struct flow_nip *fln, int flags) +{ + return nip_pol_route(net, table, fln->flowin_iif, fln, flags); +} + +struct dst_entry *nip_route_input_lookup(struct net *net, + struct net_device *dev, + struct flow_nip *fln, int flags, int *tbl_type) +{ + return nip_fib_rule_lookup(net, fln, flags, tbl_type, nip_pol_route_input); +} + +int nip_route_input(struct sk_buff *skb) +{ + struct net *net = dev_net(skb->dev); + int flags = 0; + struct flow_nip fln = { + .flowin_iif = skb->skb_iif, + .daddr = NIPCB(skb)->dstaddr, + .saddr = NIPCB(skb)->srcaddr, + }; + struct dst_entry *out_dst; + int tbl_type = 0; + + if (nip_addr_eq(&fln.daddr, &nip_broadcast_addr_arp)) { + nip_dbg("recv broadcast packet"); + dst_hold(&net->newip.nip_broadcast_entry->dst); + skb_dst_set(skb, + (struct dst_entry *)net->newip.nip_broadcast_entry); + return 0; + } + + out_dst = nip_route_input_lookup(net, skb->dev, &fln, flags, &tbl_type); + skb_dst_set(skb, out_dst); + + if (tbl_type == RT_TABLE_MAIN) { + struct ninet_dev *nin_dev = rcu_dereference(skb->dev->nip_ptr); + struct ninet_dev *nout_dev = rcu_dereference(out_dst->dev->nip_ptr); + + /* When global variable ipv4 all/send_redirects or + * corresponding network/send_redirects is 1, + * IN_DEV_TX_REDIRECTS() conditions are valid. + * send_redirects default is 1. + */ + if (nin_dev == nout_dev && + IN_DEV_TX_REDIRECTS(rcu_dereference(out_dst->dev->ip_ptr))) { + nip_dbg("The inlet and outlet are the same"); + return 1; + } + } + return 0; +} + +static struct nip_rt_info *nip_pol_route_output(struct net *net, + struct nip_fib_table *table, + struct flow_nip *fln, int flags) +{ + return nip_pol_route(net, table, fln->flowin_oif, fln, flags); +} + +struct dst_entry *nip_route_output_flags(struct net *net, const struct sock *sk, + struct flow_nip *fln, int flags) +{ + struct dst_entry *dst; + struct nip_rt_info *rt; + int tbl_type = 0; + + dst = nip_fib_rule_lookup(net, fln, flags, &tbl_type, nip_pol_route_output); + rt = (struct nip_rt_info *)dst; + + if (!(rt->rt_flags & RTF_LOCAL)) + return dst; + + rcu_read_lock(); + if (rt->rt_idev) { + read_lock_bh(&rt->rt_idev->lock); + /* search saddr in idev->addr */ + if (!list_empty(&rt->rt_idev->addr_list)) { + struct ninet_ifaddr *ifp; + + list_for_each_entry(ifp, &rt->rt_idev->addr_list, if_list) { + fln->saddr = ifp->addr; + break; + } + } + read_unlock_bh(&rt->rt_idev->lock); + } + rcu_read_unlock(); + + dst_release(dst); + dst_hold(&net->newip.nip_broadcast_entry->dst); + return &net->newip.nip_broadcast_entry->dst; +} + +struct nip_rt_info *nip_pol_route(struct net *net, struct nip_fib_table *table, + int oif, struct flow_nip *fln, int flags) +{ + struct nip_fib_node *fn; + struct nip_rt_info *rt, *pcpu_rt; + + rcu_read_lock_bh(); + fn = nip_fib_locate(table->nip_tb_head, &fln->daddr); + if (!fn) { + rcu_read_unlock_bh(); + nip_dbg("search fail"); + rt = net->newip.nip_null_entry; + dst_hold_and_use(&rt->dst, jiffies); + return rt; + } + rt = fn->nip_route_info; + + /* Get a percpu copy */ + rt->dst.lastuse = jiffies; + rt->dst.__use++; + pcpu_rt = nip_rt_get_pcpu_route(rt); + nip_dbg("cpu id=%d", smp_processor_id()); + if (pcpu_rt) { + rcu_read_unlock_bh(); + nip_dbg("pcpu found"); + } else { + dst_hold(&rt->dst); + rcu_read_unlock_bh(); + pcpu_rt = nip_rt_make_pcpu_route(rt); + dst_release(&rt->dst); + } + + nip_dbg("rt dst.__refcnt=%d, pcpu dst.__refcnt=%d", + atomic_read(&rt->dst.__refcnt), + atomic_read(&pcpu_rt->dst.__refcnt)); + return pcpu_rt; +} + +bool nip_bind_addr_check(struct net *net, + struct nip_addr *addr) +{ + struct nip_fib_node *fn; + struct nip_fib_table *fib_tbl = net->newip.nip_fib_local_tbl; + + if (nip_addr_invalid(addr)) { + nip_dbg("binding-addr invalid, bitlen=%u", addr->bitlen); + return false; + } + + if (nip_addr_eq(addr, &nip_any_addr)) { + nip_dbg("binding-addr is any addr"); + return true; + } + + rcu_read_lock_bh(); + fn = nip_fib_locate(fib_tbl->nip_tb_head, addr); + rcu_read_unlock_bh(); + if (!fn) { + nip_dbg("binding-addr is not local addr"); + return false; + } + + nip_dbg("binding-addr is local addr"); + return true; +} + +static struct nip_rt_info *nip_route_info_create(struct nip_fib_config *cfg) +{ + struct net *net = cfg->fc_nlinfo.nl_net; + struct nip_rt_info *rt = NULL; + struct net_device *dev = NULL; + struct ninet_dev *idev = NULL; + struct nip_fib_table *table; + int err = -ENODEV; + + /* find net_device */ + dev = dev_get_by_index(net, cfg->fc_ifindex); + if (!dev) { + nip_dbg("fail to get dev by ifindex(%u)", cfg->fc_ifindex); + goto out; + } + + /* find ninet_dev,which has the newip address list */ + idev = nin_dev_get(dev); + if (!idev) { + nip_dbg("fail to get ninet dev (ifindex=%u)", cfg->fc_ifindex); + goto out; + } + + if (cfg->fc_metric == 0) + cfg->fc_metric = NIP_RT_PRIO_USER; + + err = -ENOBUFS; + table = nip_fib_get_table(net, cfg->fc_table); + if (!table) { + nip_dbg("fail to get fib table (fc_table=%u)", cfg->fc_table); + goto out; + } + + rt = nip_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); + if (!rt) { + nip_dbg("fail to alloc dst mem"); + err = -ENOMEM; + goto out; + } + + nip_rt_clean_expires(rt); + + if (cfg->fc_protocol == RTPROT_UNSPEC) + cfg->fc_protocol = RTPROT_BOOT; + rt->rt_protocol = cfg->fc_protocol; + + if (cfg->fc_flags & RTF_LOCAL) { + rt->dst.input = nip_input; + nip_dbg("rt->dst.input=nip_input, ifindex=%u", cfg->fc_ifindex); + } else { + rt->dst.input = nip_forward; + nip_dbg("rt->dst.input=nip_forward, ifindex=%u", cfg->fc_ifindex); + } + + rt->dst.output = nip_output; + rt->rt_dst = cfg->fc_dst; + rt->rt_src = cfg->fc_src; + rt->rt_metric = cfg->fc_metric; + + if (cfg->fc_flags & RTF_GATEWAY) + rt->gateway = cfg->fc_gateway; + else + rt->gateway = nip_any_addr; + + rt->rt_flags = cfg->fc_flags; + rt->dst.dev = dev; + rt->rt_idev = idev; + rt->rt_table = table; + + return rt; +out: + if (dev) + dev_put(dev); + if (idev) + nin_dev_put(idev); + return ERR_PTR(err); +} + +/* __nip_ins_rt is called with FREE table->nip_tb_lock. + * It takes new route entry, the addition fails by any reason the + * route is released. + */ +static int __nip_ins_rt(struct nip_rt_info *rt) +{ + int err; + struct nip_fib_table *table; + + table = rt->rt_table; + + spin_lock_bh(&table->nip_tb_lock); + err = nip_fib_add(table, rt); + spin_unlock_bh(&table->nip_tb_lock); + + return err; +} + +int nip_ins_rt(struct nip_rt_info *rt) +{ + /* Hold dst to account for the reference from the nip fib hash */ + dst_hold(&rt->dst); + return __nip_ins_rt(rt); +} + +int nip_route_add(struct nip_fib_config *cfg) +{ + struct nip_rt_info *rt; + int err; + + rt = nip_route_info_create(cfg); + if (IS_ERR(rt)) { + nip_dbg("fail to creat route info"); + err = PTR_ERR(rt); + rt = NULL; + goto out; + } + + err = __nip_ins_rt(rt); +out: + return err; +} + +static int __nip_del_rt(struct nip_rt_info *rt, struct nl_info *info) +{ + int err; + struct nip_fib_table *table; + struct net *net = dev_net(rt->dst.dev); + + if (rt == net->newip.nip_null_entry) { + err = -ENOENT; + goto out; + } + + table = rt->rt_table; + spin_lock_bh(&table->nip_tb_lock); + err = nip_fib_del(rt, info); + spin_unlock_bh(&table->nip_tb_lock); + +out: + nip_rt_put(rt); + return err; +} + +int nip_del_rt(struct nip_rt_info *rt) +{ + struct nl_info info = { + .nl_net = dev_net(rt->dst.dev), + }; + return __nip_del_rt(rt, &info); +} + +static int nip_route_del(struct nip_fib_config *cfg) +{ + struct net *net = cfg->fc_nlinfo.nl_net; + struct nip_fib_table *table; + struct nip_fib_node *fn; + struct nip_rt_info *rt; + int err = -ESRCH; + + table = nip_fib_get_table(net, cfg->fc_table); + if (!table) + return err; + + rcu_read_lock_bh(); + fn = nip_fib_locate(table->nip_tb_head, &cfg->fc_dst); + if (fn) { + rt = fn->nip_route_info; + dst_hold(&rt->dst); + rcu_read_unlock_bh(); + + return __nip_del_rt(rt, &cfg->fc_nlinfo); + } + rcu_read_unlock_bh(); + + return err; +} + +int nip_route_ioctl(struct net *net, unsigned int cmd, struct nip_rtmsg *rtmsg) +{ + struct nip_fib_config cfg; + int err; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + nip_dbg("not admin can`t cfg"); + return -EPERM; + } + + rtmsg_to_fibni_config(net, rtmsg, &cfg); + if (nip_addr_invalid(&cfg.fc_dst)) { + nip_dbg("nip daddr invalid, bitlen=%u", cfg.fc_dst.bitlen); + return -EFAULT; + } + + if (cfg.fc_flags & RTF_GATEWAY) { + if (nip_addr_invalid(&cfg.fc_gateway)) { + nip_dbg("nip gateway daddr invalid, bitlen=%u", + cfg.fc_gateway.bitlen); + return -EFAULT; + } + } + + rtnl_lock(); + switch (cmd) { + case SIOCADDRT: /* Add a route */ + err = nip_route_add(&cfg); + break; + case SIOCDELRT: /* Delete a route */ + err = nip_route_del(&cfg); + break; + default: + err = -EINVAL; + } + rtnl_unlock(); + + return err; +} + +static void nip_dst_destroy(struct dst_entry *dst) +{ + struct nip_rt_info *rt = (struct nip_rt_info *)dst; + struct dst_entry *from = rt->from; + struct ninet_dev *idev; + + dst_destroy_metrics_generic(dst); + free_percpu(rt->rt_pcpu); + + idev = rt->rt_idev; + if (idev) { + rt->rt_idev = NULL; + nip_dbg("idev->refcnt=%u", refcount_read(&idev->refcnt)); + nin_dev_put(idev); + } + + if (from) + nip_dbg("from->__refcnt=%d", atomic_read(&from->__refcnt)); + rt->from = NULL; + dst_release(from); +} + +static inline const void *nip_choose_neigh_daddr(struct nip_rt_info *rt, + struct sk_buff *skb, + const void *daddr) +{ + struct nip_addr *p = &rt->gateway; + + if (rt->rt_flags & RTF_GATEWAY) + return (const void *)p; + else if (skb) + return &NIPCB(skb)->dstaddr; + return daddr; +} + +static struct neighbour *nip_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) +{ + struct nip_rt_info *rt = (struct nip_rt_info *)dst; + struct neighbour *n; + + daddr = nip_choose_neigh_daddr(rt, skb, daddr); + n = __nip_neigh_lookup(dst->dev, daddr); + if (n) + return n; + return neigh_create(&nnd_tbl, daddr, dst->dev); +} + +static struct dst_entry *nip_dst_check(struct dst_entry *dst, u32 cookie) +{ + if (dst->obsolete != DST_OBSOLETE_FORCE_CHK) + return NULL; + return dst; +} + +/* Used to calculate the MSS value required by TCP + * Because there is no MSS in the TCP of NewIP, + * the value is calculated based on the MTU of the network port + */ +static unsigned int nip_default_advmss(const struct dst_entry *dst) +{ + unsigned int mtu = dst_mtu(dst); + + mtu -= NIP_HDR_MAX + sizeof(struct tcphdr); + + return mtu; +} + +static unsigned int nip_mtu(const struct dst_entry *dst) +{ + unsigned int mtu; + struct ninet_dev *idev; + + mtu = NIP_MIN_MTU; + + rcu_read_lock(); + idev = __nin_dev_get(dst->dev); + if (idev) + mtu = idev->cnf.mtu; + rcu_read_unlock(); + + return mtu; +} + +static void nip_dst_ifdown(struct dst_entry *dst, struct net_device *dev, + int how) +{ + struct nip_rt_info *rt = (struct nip_rt_info *)dst; + struct ninet_dev *idev = rt->rt_idev; + struct net_device *loopback_dev = + dev_net(dev)->loopback_dev; + + if (idev && idev->dev != loopback_dev) { + struct ninet_dev *loopback_idev = nin_dev_get(loopback_dev); + + if (loopback_idev) { + rt->rt_idev = loopback_idev; + nin_dev_put(idev); + } + } +} + +static struct dst_ops nip_dst_ops_template = { + .family = AF_NINET, + .destroy = nip_dst_destroy, + .ifdown = nip_dst_ifdown, + .neigh_lookup = nip_neigh_lookup, + .check = nip_dst_check, + .default_advmss = nip_default_advmss, + .mtu = nip_mtu, +}; + +static int nip_pkt_discard(struct sk_buff *skb) +{ + kfree_skb(skb); + return 0; +} + +static int nip_pkt_discard_out(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + kfree_skb(skb); + return 0; +} + +struct nip_rt_info *nip_addrconf_dst_alloc(struct ninet_dev *idev, + const struct nip_addr *addr) +{ + u32 tb_id; + struct net *net = dev_net(idev->dev); + struct net_device *dev = idev->dev; + struct nip_rt_info *rt; + + rt = nip_dst_alloc(net, dev, DST_NOCOUNT); + if (!rt) + return ERR_PTR(-ENOMEM); + + nin_dev_hold(idev); + + rt->dst.flags |= DST_HOST; + rt->dst.input = nip_input; + rt->dst.output = nip_output; + rt->rt_idev = idev; + + rt->rt_protocol = RTPROT_KERNEL; + rt->rt_flags = RTF_UP | RTF_NONEXTHOP; + rt->rt_flags |= RTF_LOCAL; + + rt->gateway = *addr; + rt->rt_dst = *addr; + tb_id = NIP_RT_TABLE_LOCAL; + rt->rt_table = nip_fib_get_table(net, tb_id); + + return rt; +} + +struct arg_dev_net { + struct net_device *dev; + struct net *net; +}; + +/* Determine whether an RT should be deleted along with ifDown + * called with nip_tb_lock held for table with rt + */ +static int nip_fib_ifdown(struct nip_rt_info *rt, void *arg) +{ + const struct arg_dev_net *adn = arg; + const struct net_device *dev = adn->dev; + bool not_same_dev = (rt->dst.dev == dev || !dev); + bool not_null_entry = (rt != adn->net->newip.nip_null_entry); + bool not_broadcast_entry = (rt != adn->net->newip.nip_broadcast_entry); + bool dev_unregister = (dev && netdev_unregistering(dev)); + bool ignore_route_ifdown = (!rt->rt_idev->cnf.ignore_routes_with_linkdown); + + if (not_same_dev && not_null_entry && not_broadcast_entry && + (dev_unregister || ignore_route_ifdown)) + return -1; + + nip_dbg("don`t del route with %s down, ifindex=%u, not_same_dev=%u, not_null_entry=%u", + dev->name, dev->ifindex, not_same_dev, not_null_entry); + nip_dbg("not_broadcast_entry=%u, dev_unregister=%u, ignore_route_ifdown=%u", + not_broadcast_entry, dev_unregister, ignore_route_ifdown); + return 0; +} + +void nip_rt_ifdown(struct net *net, struct net_device *dev) +{ + struct arg_dev_net adn = { + .dev = dev, + .net = net, + }; + + nip_fib_clean_all(net, nip_fib_ifdown, &adn); +} + +static int __net_init nip_route_net_init(struct net *net) +{ + int ret = -ENOMEM; + + memcpy(&net->newip.nip_dst_ops, &nip_dst_ops_template, + sizeof(net->newip.nip_dst_ops)); + + if (dst_entries_init(&net->newip.nip_dst_ops) < 0) + goto out; + + net->newip.nip_null_entry = kmemdup(&nip_null_entry_template, + sizeof(*net->newip.nip_null_entry), + GFP_KERNEL); + if (!net->newip.nip_null_entry) + goto out_nip_dst_entries; + net->newip.nip_null_entry->dst.ops = &net->newip.nip_dst_ops; + dst_init_metrics(&net->newip.nip_null_entry->dst, dst_default_metrics.metrics, true); + + net->newip.nip_broadcast_entry = + kmemdup(&nip_broadcast_entry_template, + sizeof(*net->newip.nip_broadcast_entry), + GFP_KERNEL); + if (!net->newip.nip_broadcast_entry) + goto out_nip_null_entry; + net->newip.nip_broadcast_entry->dst.ops = &net->newip.nip_dst_ops; + dst_init_metrics(&net->newip.nip_broadcast_entry->dst, dst_default_metrics.metrics, true); + ret = 0; +out: + return ret; + +out_nip_null_entry: + kfree(net->newip.nip_null_entry); +out_nip_dst_entries: + dst_entries_destroy(&net->newip.nip_dst_ops); + goto out; +} + +static void __net_exit nip_route_net_exit(struct net *net) +{ + kfree(net->newip.nip_broadcast_entry); + kfree(net->newip.nip_null_entry); + dst_entries_destroy(&net->newip.nip_dst_ops); +} + +static struct pernet_operations nip_route_net_ops = { + .init = nip_route_net_init, + .exit = nip_route_net_exit, +}; + +static int nip_route_dev_notify(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + + if (!(dev->flags & IFF_LOOPBACK)) + return NOTIFY_OK; + + if (event == NETDEV_REGISTER) { + net->newip.nip_null_entry->dst.dev = dev; + net->newip.nip_null_entry->rt_idev = nin_dev_get(dev); + + net->newip.nip_broadcast_entry->dst.dev = dev; + net->newip.nip_broadcast_entry->rt_idev = nin_dev_get(dev); + } else if (event == NETDEV_UNREGISTER && + dev->reg_state != NETREG_UNREGISTERED) { + nin_dev_put_clear(&net->newip.nip_null_entry->rt_idev); + nin_dev_put_clear(&net->newip.nip_broadcast_entry->rt_idev); + } + + return NOTIFY_OK; +} + +static void seq_printf_nipaddr_to_proc(struct seq_file *seq, + struct nip_addr *addr) +{ + int i = 0; + + for (i = 0; i < addr->bitlen / NIP_ADDR_BIT_LEN_8; i++) + seq_printf(seq, "%02x", addr->nip_addr_field8[i]); + + seq_puts(seq, "\t"); +} + +static void nip_route_show_table(struct seq_file *seq, + struct nip_fib_table *table) +{ + struct nip_fib_node *fn; + int i; + + rcu_read_lock_bh(); + for (i = 0; i < NIN_ROUTE_HSIZE; i++) { + hlist_for_each_entry_rcu(fn, &table->nip_tb_head[i], + fib_hlist) { + struct nip_rt_info *rt = fn->nip_route_info; + + seq_printf_nipaddr_to_proc(seq, &rt->rt_dst); + seq_printf_nipaddr_to_proc(seq, &rt->gateway); + seq_printf(seq, "%4u %4s\n", rt->rt_flags, + rt->dst.dev ? rt->dst.dev->name : ""); + } + } + rcu_read_unlock_bh(); +} + +static int nip_route_proc_show(struct seq_file *seq, void *v) +{ + struct net *net = seq->private; + + nip_route_show_table(seq, net->newip.nip_fib_main_tbl); + nip_route_show_table(seq, net->newip.nip_fib_local_tbl); + + return 0; +} + +static int __net_init nip_route_net_init_late(struct net *net) +{ + proc_create_net_single("nip_route", 0444, net->proc_net, + nip_route_proc_show, NULL); + return 0; +} + +static void __net_exit nip_route_net_exit_late(struct net *net) +{ + remove_proc_entry("nip_route", net->proc_net); +} + +static struct pernet_operations nip_route_net_late_ops = { + .init = nip_route_net_init_late, + .exit = nip_route_net_exit_late, +}; + +static struct notifier_block nip_route_dev_notifier = { + .notifier_call = nip_route_dev_notify, + .priority = ADDRCONF_NOTIFY_PRIORITY - 10, +}; + +int __init nip_route_init(void) +{ + int ret; + + ret = -ENOMEM; + + nip_dst_ops_template.kmem_cachep = + kmem_cache_create("nip_dst_cache", sizeof(struct nip_rt_info), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!nip_dst_ops_template.kmem_cachep) + goto out; + + ret = register_pernet_subsys(&nip_route_net_ops); + if (ret) + goto out_kmem_cache; + + ret = nip_fib_init(); + if (ret) + goto out_register_subsys; + + ret = register_pernet_subsys(&nip_route_net_late_ops); + if (ret) + goto out_nip_fib_init; + + ret = register_netdevice_notifier(&nip_route_dev_notifier); + if (ret) + goto out_register_late_subsys; + +out: + return ret; + +out_register_late_subsys: + unregister_pernet_subsys(&nip_route_net_late_ops); +out_nip_fib_init: + nip_fib_gc_cleanup(); +out_register_subsys: + unregister_pernet_subsys(&nip_route_net_ops); +out_kmem_cache: + kmem_cache_destroy(nip_dst_ops_template.kmem_cachep); + goto out; +} + +void nip_route_cleanup(void) +{ + unregister_pernet_subsys(&nip_route_net_late_ops); + nip_fib_gc_cleanup(); + unregister_pernet_subsys(&nip_route_net_ops); + kmem_cache_destroy(nip_dst_ops_template.kmem_cachep); +} + diff --git a/newip/src/linux-5.10/net/newip/tcp_nip.c b/newip/src/linux-5.10/net/newip/tcp_nip.c new file mode 100644 index 0000000000000000000000000000000000000000..5dc18afb3e3f29f0563a44a592809c071c91837f --- /dev/null +++ b/newip/src/linux-5.10/net/newip/tcp_nip.c @@ -0,0 +1,1747 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Implementation of the Transmission Control Protocol(TCP). + * + * TCP over NewIP + * + * Based on net/ipv4/tcp.c + * Based on net/ipv4/tcp_ipv4.c + * Based on net/ipv6/tcp_ipv6.c + * Based on net/core/stream.c + * + * Description of States: + * + * TCP_SYN_SENT sent a connection request, waiting for ack + * + * TCP_SYN_RECV received a connection request, sent ack, + * waiting for final ack in three-way handshake. + * + * TCP_ESTABLISHED connection established + * + * TCP_FIN_WAIT1 our side has shutdown, waiting to complete + * transmission of remaining buffered data + * + * TCP_FIN_WAIT2 all buffered data sent, waiting for remote + * to shutdown + * + * TCP_CLOSING both sides have shutdown but we still have + * data we have to finish sending + * + * TCP_TIME_WAIT timeout to catch resent junk before entering + * closed, can only be entered from FIN_WAIT2 + * or CLOSING. Required because the other end + * may not have gotten our last ACK causing it + * to retransmit the data packet (which we ignore) + * + * TCP_CLOSE_WAIT remote side has shutdown and is waiting for + * us to finish writing our data and to shutdown + * (we have to close() to move on to LAST_ACK) + * + * TCP_LAST_ACK out side has shutdown after remote has + * shutdown. There may still be data in our + * buffer that we have to finish sending + * + * TCP_CLOSE socket is finished + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nip_checksum.h" +#include "tcp_nip_parameter.h" + +static const struct inet_connection_sock_af_ops newip_specific; + +static void tcp_nip_push(struct sock *sk, int flags, int mss_now, + int nonagle, int size_goal) +{ + __tcp_nip_push_pending_frames(sk, mss_now, nonagle); +} + +static const unsigned char new_state[16] = { + /* current state: new state: action: */ +[0] = TCP_CLOSE, +[TCP_ESTABLISHED] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, +[TCP_SYN_SENT] = TCP_CLOSE, +[TCP_SYN_RECV] = TCP_FIN_WAIT1 | TCP_ACTION_FIN, +[TCP_FIN_WAIT1] = TCP_FIN_WAIT1, +[TCP_FIN_WAIT2] = TCP_FIN_WAIT2, +[TCP_TIME_WAIT] = TCP_CLOSE, +[TCP_CLOSE] = TCP_CLOSE, +[TCP_CLOSE_WAIT] = TCP_LAST_ACK | TCP_ACTION_FIN, +[TCP_LAST_ACK] = TCP_LAST_ACK, +[TCP_LISTEN] = TCP_CLOSE, +[TCP_CLOSING] = TCP_CLOSING, +[TCP_NEW_SYN_RECV] = TCP_CLOSE, /* should not happen ! */ +}; + +bool nip_get_tcp_input_checksum(struct sk_buff *skb) +{ + struct nip_pseudo_header nph = {0}; + + nph.nexthdr = NIPCB(skb)->nexthdr; + nph.saddr = NIPCB(skb)->srcaddr; + nph.daddr = NIPCB(skb)->dstaddr; + + nph.check_len = htons(skb->len); + return nip_check_sum_parse(skb_transport_header(skb), + skb->len, &nph) + == 0xffff ? true : false; +} + +static int tcp_nip_close_state(struct sock *sk) +{ + int next = (int)new_state[sk->sk_state]; + int ns = next & TCP_STATE_MASK; + + tcp_set_state(sk, ns); + + return next & TCP_ACTION_FIN; +} + +void sk_nip_stream_kill_queues(struct sock *sk) +{ + /* First the read buffer. */ + __skb_queue_purge(&sk->sk_receive_queue); + + /* Next, the error queue. */ + __skb_queue_purge(&sk->sk_error_queue); + + /* Next, the write queue. */ + WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); + + WARN_ON(sk->sk_wmem_queued); +} + +void tcp_nip_shutdown(struct sock *sk, int how) +{ + if (!(how & SEND_SHUTDOWN)) + return; + + /* If we've already sent a FIN, or it's a closed state, skip this. */ + if ((1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_SYN_SENT | + TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { + /* Clear out any half completed packets. FIN if needed. */ + if (tcp_nip_close_state(sk)) + tcp_nip_send_fin(sk); + } +} + +void tcp_nip_close(struct sock *sk, long timeout) +{ + struct sk_buff *skb; + int data_was_unread = 0; + int state; + u32 sk_ack_backlog; + + lock_sock(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + + nip_dbg("sk_state:%d", sk->sk_state); + + if (sk->sk_state == TCP_LISTEN) { + tcp_set_state(sk, TCP_CLOSE); + + sk_ack_backlog = READ_ONCE(sk->sk_ack_backlog); + inet_csk_listen_stop(sk); + nip_dbg("sk_state CLOSE, sk_ack_backlog=%u to %u, sk_max_ack_backlog=%u", + sk_ack_backlog, READ_ONCE(sk->sk_ack_backlog), + READ_ONCE(sk->sk_max_ack_backlog)); + goto adjudge_to_death; + } + + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; + + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + len--; + data_was_unread += len; + __kfree_skb(skb); + } + + if (sk->sk_state == TCP_CLOSE) + goto adjudge_to_death; + + if (data_was_unread) { + tcp_set_state(sk, TCP_CLOSE); + tcp_nip_send_active_reset(sk, sk->sk_allocation); + } else if (tcp_nip_close_state(sk)) { + /* RED-PEN. Formally speaking, we have broken TCP state + * machine. State transitions: + * + * TCP_ESTABLISHED -> TCP_FIN_WAIT1 + * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible) + * TCP_CLOSE_WAIT -> TCP_LAST_ACK + */ + nip_dbg("ready to send fin, sk_state=%d", sk->sk_state); + tcp_nip_send_fin(sk); + } + +adjudge_to_death: + state = sk->sk_state; + sock_hold(sk); + sock_orphan(sk); + + /* It is the last release_sock in its life. It will remove backlog. */ + release_sock(sk); + + local_bh_disable(); + bh_lock_sock(sk); + WARN_ON(sock_owned_by_user(sk)); + + this_cpu_dec(*sk->sk_prot->orphan_count); + + if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) + goto out; + + if (sk->sk_state == TCP_CLOSE) + inet_csk_destroy_sock(sk); + +out: + bh_unlock_sock(sk); + local_bh_enable(); + sock_put(sk); +} + +/* These states need RST on ABORT according to RFC793 */ +static inline bool tcp_nip_need_reset(int state) +{ + return (1 << state) & + (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 | + TCPF_FIN_WAIT2 | TCPF_SYN_RECV); +} + +/* Function + * Initialize some of the parameters in request_sock + * Parameter + * req: Request connection control block + * sk_listener: Transmission control block + * skb: Transfer control block buffer + */ +static void tcp_nip_init_req(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + ireq->ir_nip_rmt_addr = NIPCB(skb)->srcaddr; + ireq->ir_nip_loc_addr = NIPCB(skb)->dstaddr; +} + +/* Function + * Initialize The initialization number SEQ. Calculate the initial serial number of + * the server based on part of the source address source port, part of the destination + * address, and destination port + * Parameter + * skb: Transfer control block buffer + */ +static __u32 tcp_nip_init_sequence(const struct sk_buff *skb) +{ + return secure_tcp_nip_sequence_number(NIPCB(skb)->dstaddr.nip_addr_field32, + NIPCB(skb)->srcaddr.nip_addr_field32, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); +} + +static struct dst_entry *tcp_nip_route_req(const struct sock *sk, + struct flowi *fl, + const struct request_sock *req) +{ + struct dst_entry *dst; + struct inet_request_sock *ireq = inet_rsk(req); + struct flow_nip fln; + + fln.daddr = ireq->ir_nip_rmt_addr; + dst = nip_route_output(sock_net(sk), sk, &fln); + return dst; +} + +/* Function + * Functions used by the client transport layer to connect requests + * This parameter is used to set the source address, destination address and interface + * Parameter + * sk: Transmission control block + * uaddr:The destination address + * addr_len:Destination address Length + */ +static int tcp_nip_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + struct sockaddr_nin *usin = (struct sockaddr_nin *)uaddr; + struct inet_sock *inet = inet_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); + __be16 orig_dport; + struct nip_addr *daddr; + struct dst_entry *dst; + int err; + struct ip_options_rcu *inet_opt; + struct inet_timewait_death_row *tcp_death_row; + struct flow_nip fln; + + fln.daddr = usin->sin_addr; + + if (addr_len < sizeof(struct sockaddr_nin)) + return -EINVAL; + + if (usin->sin_family != AF_NINET) + return -EAFNOSUPPORT; + + inet_opt = rcu_dereference_protected(inet->inet_opt, + lockdep_sock_is_held(sk)); + /* Destination ADDRESS and port */ + daddr = &usin->sin_addr; + orig_dport = usin->sin_port; + + /* Find the route and obtain the source address */ + nip_dbg("sk->sk_bound_dev_if is %d", sk->sk_bound_dev_if); + fln.flowin_oif = sk->sk_bound_dev_if; + dst = nip_dst_lookup_flow(sock_net(sk), sk, &fln, NULL); + if (IS_ERR(dst)) { + nip_dbg("cannot find dst"); + err = PTR_ERR(dst); + goto failure; + } + + /* find the actual source addr for sk->sk_nip_rcv_saddr */ + if (nip_addr_eq(&sk->sk_nip_rcv_saddr, &nip_any_addr)) + sk->sk_nip_rcv_saddr = fln.saddr; + fln.saddr = sk->sk_nip_rcv_saddr; + + if (nip_addr_invalid(&fln.daddr)) { + nip_dbg("nip daddr invalid, bitlen=%u", fln.daddr.bitlen); + err = -EFAULT; + goto failure; + } + + if (nip_addr_invalid(&fln.saddr)) { + nip_dbg("nip saddr invalid, bitlen=%u", fln.saddr.bitlen); + err = -EFAULT; + goto failure; + } + + /* The destination address and port are set to the transport control block */ + inet->inet_dport = usin->sin_port; + sk->sk_nip_daddr = usin->sin_addr; + + inet_csk(sk)->icsk_ext_hdr_len = 0; + if (inet_opt) + inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; + + tcp_set_state(sk, TCP_SYN_SENT); + sk_set_txhash(sk); + sk_dst_set(sk, dst); + + /* Dynamically bind local ports */ + tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; + err = ninet_hash_connect(tcp_death_row, sk); + if (err) + goto late_failure; + + /* Class if the transport control block has already been linked */ + if (tp->rx_opt.ts_recent_stamp) { + /* Reset inherited state */ + tp->rx_opt.ts_recent = 0; + tp->rx_opt.ts_recent_stamp = 0; + if (likely(!tp->repair)) + tp->write_seq = 0; + } + + if (!tp->write_seq) + tp->write_seq = + secure_tcp_nip_sequence_number(sk->sk_nip_rcv_saddr.nip_addr_field32, + sk->sk_nip_daddr.nip_addr_field32, + inet->inet_sport, + usin->sin_port); + + inet->inet_id = prandom_u32(); + + /* Call tcp_connect to send the SYN field */ + err = __tcp_nip_connect(sk); + if (err) + goto late_failure; + + return 0; + +/* failure after tcp_set_state(sk, TCP_SYN_SENT) */ +late_failure: + tcp_set_state(sk, TCP_CLOSE); +failure: + sk->sk_route_caps = 0; + inet->inet_dport = 0; + return err; +} + +static void tcp_nip_send_reset(struct sock *sk, struct sk_buff *skb) +{ + const struct tcphdr *th = tcp_hdr(skb); + u32 seq = 0; + u32 ack_seq = 0; + u32 priority = gfp_any(); + + /* Never send a reset in response to a reset. */ + if (th->rst) + return; + + nip_dbg("send rst"); + if (th->ack) + seq = ntohl(th->ack_seq); + else + ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - + (th->doff << 2); + + tcp_nip_actual_send_reset(sk, skb, seq, ack_seq, 0, 1, priority); +} + +/* Function + * function used by the server to send SYN+ACK segments + * Parameter + * sk: Transmission control block + * dst: routing。 + * flowi: Flow control block + * req: Request connection control block + * foc: Fast open options + * synack_type: Type of the SYN+ACK segment + */ +static int tcp_nip_send_synack(const struct sock *sk, struct dst_entry *dst, + struct flowi *fl, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + enum tcp_synack_type synack_type, + struct sk_buff *syn_skb) +{ + struct sk_buff *skb; + int err = -ENOMEM; + + skb = tcp_nip_make_synack(sk, dst, req, foc, synack_type); + if (skb) { + nip_dbg("TCP server create SYN+ACK skb successfully"); + rcu_read_lock(); + err = nip_send_synack(req, skb); + rcu_read_unlock(); + } + + return err; +} + +static void tcp_nip_reqsk_destructor(struct request_sock *req) +{ + ; +} + +struct request_sock_ops tcp_nip_request_sock_ops __read_mostly = { + .family = AF_NINET, + .obj_size = sizeof(struct tcp_nip_request_sock), + .rtx_syn_ack = tcp_nip_rtx_synack, + .send_ack = NULL, + .destructor = tcp_nip_reqsk_destructor, + .send_reset = NULL, + .syn_ack_timeout = NULL, +}; + +static const struct tcp_request_sock_ops tcp_request_sock_newip_ops = { + .mss_clamp = TCP_BASE_MSS, +#ifdef CONFIG_TCP_MD5SIG + .req_md5_lookup = NULL, + .calc_md5_hash = NULL, +#endif + .init_req = tcp_nip_init_req, +#ifdef CONFIG_SYN_COOKIES + .cookie_init_seq = NULL, +#endif + .route_req = tcp_nip_route_req, + .init_seq = tcp_nip_init_sequence, + .send_synack = tcp_nip_send_synack, +}; + +/* Function + * The route cache saves the transport control block from the SKB + * Parameter + * sk: Transmission control block + * skb: Transfer control block buffer + * req: Request connection control block + * dst: routing + * req_unhash: Request connection control block + */ +void ninet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + if (dst && dst_hold_safe(dst)) { + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + } +} + +/* Function + * A function used by the server to process client connection requests + * Parameter + * sk: Transmission control block + * skb: Transfer control block buffer + */ +static int tcp_nip_conn_request(struct sock *sk, struct sk_buff *skb) +{ + return _tcp_nip_conn_request(&tcp_nip_request_sock_ops, + &tcp_request_sock_newip_ops, sk, skb); +} + +/* Function + * Create child control blocks + * Parameter + * sk: Transmission control block + * skb: Transfer control block buffer + * req: Request connection control block + * dst: routing + * req_unhash: Request connection control block + */ +static struct sock *tcp_nip_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req) +{ + struct tcp_nip_request_sock *niptreq = tcp_nip_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); + bool found_dup_sk = false; + struct tcp_nip_sock *newtcpnipsk; + struct inet_sock *newinet; + struct tcp_sock *newtp; + struct sock *newsk; + struct flow_nip fln; + + if (sk_acceptq_is_full(sk)) + goto out_overflow; + + fln.daddr = ireq->ir_nip_rmt_addr; + if (!dst) { + dst = nip_route_output(sock_net(sk), sk, &fln); + if (!dst) + goto out; + } + + newsk = tcp_nip_create_openreq_child(sk, req, skb); + if (!newsk) + goto out_nonewsk; + + /* Save the received route cache */ + ninet_sk_rx_dst_set(newsk, skb); + + newtcpnipsk = (struct tcp_nip_sock *)newsk; + newtcpnipsk->common = niptreq->common; + + newtp = tcp_sk(newsk); + newinet = inet_sk(newsk); + + newsk->sk_nip_daddr = ireq->ir_nip_rmt_addr; + newsk->sk_nip_rcv_saddr = ireq->ir_nip_loc_addr; + + newinet->inet_opt = NULL; + + inet_csk(newsk)->icsk_ext_hdr_len = 0; + + newtp->retrans_stamp = jiffies; + + /* Negotiate MSS */ + newtp->mss_cache = TCP_BASE_MSS; + newtp->out_of_order_queue = RB_ROOT; + newtp->advmss = dst_metric_advmss(dst); + if (tcp_sk(sk)->rx_opt.user_mss && + tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) + newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; + + tcp_nip_initialize_rcv_mss(newsk); + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; + /* Deleting the old sock from the ehash table and adding the new sock to the + * ehash table succeeds *own_req equals true + */ + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), + &found_dup_sk); + + /* newip newsk doesn't save this dst. release it. */ + dst_release(dst); + return newsk; + +out_overflow: + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); +out_nonewsk: +out: + /* newip newsk doesn't save this dst. release it. */ + dst_release(dst); + tcp_listendrop(sk); + return NULL; +put_and_exit: + newinet->inet_opt = NULL; + inet_csk_prepare_forced_close(newsk); + tcp_nip_done(newsk); + goto out; +} + +static const struct inet_connection_sock_af_ops newip_specific = { + .queue_xmit = tcp_nip_queue_xmit, + .send_check = NULL, + .rebuild_header = NULL, + .sk_rx_dst_set = ninet_sk_rx_dst_set, + .conn_request = tcp_nip_conn_request, + .syn_recv_sock = tcp_nip_syn_recv_sock, + .net_header_len = 0, + .net_frag_header_len = 0, + .setsockopt = nip_setsockopt, + .getsockopt = nip_getsockopt, + .addr2sockaddr = NULL, + .sockaddr_len = sizeof(struct sockaddr_nin), + + .mtu_reduced = NULL, +}; + +#if IS_ENABLED(CONFIG_NEWIP_FAST_KEEPALIVE) +#define MAX_NIP_TCP_KEEPIDLE 32767 +#define MAX_NIP_TCP_KEEPINTVL 32767 +#define MAX_NIP_TCP_KEEPCNT 255 +static int tcp_nip_keepalive_para_update(struct sock *sk, + u32 keepalive_time, + u32 keepalive_intvl, + u8 keepalive_probes) +{ + int val; + struct tcp_sock *tp = tcp_sk(sk); + + /* set keep idle (TCP_KEEPIDLE) */ + val = keepalive_time; + if (val < 1 || val > MAX_NIP_TCP_KEEPIDLE) { + nip_dbg("keepalive_time(%u) invalid", val); + return -EINVAL; + } + + tp->keepalive_time = val; + if (sock_flag(sk, SOCK_KEEPOPEN) && + !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { + u32 elapsed = keepalive_time_elapsed(tp); + + if (tp->keepalive_time > elapsed) + elapsed = tp->keepalive_time - elapsed; + else + elapsed = 0; + inet_csk_reset_keepalive_timer(sk, elapsed); + } + + /* set keep intvl (TCP_KEEPINTVL) */ + val = keepalive_intvl; + if (val < 1 || val > MAX_NIP_TCP_KEEPINTVL) { + nip_dbg("keepalive_intvl(%u) invalid", val); + return -EINVAL; + } + tp->keepalive_intvl = val; + + /* set keep cnt (TCP_KEEPCNT) */ + val = keepalive_probes; + if (val < 1 || val > MAX_NIP_TCP_KEEPCNT) { + nip_dbg("keepalive_probes(%u) invalid", val); + return -EINVAL; + } + tp->keepalive_probes = val; + + /* enable keepalive (SO_KEEPALIVE) */ + if (sk->sk_prot->keepalive) { + sk->sk_prot->keepalive(sk, 1); + sock_valbool_flag(sk, SOCK_KEEPOPEN, 1); + } else { + nip_dbg("keepalive func is null"); + } + + return 0; +} +#endif + +#define NIP_PKT_TOTAL_LEN_BOUNDARY 100000 // 100K +#define NIP_KEEPALIVE_PROBES 255 +void tcp_nip_keepalive_enable(struct sock *sk) +{ +#if IS_ENABLED(CONFIG_NEWIP_FAST_KEEPALIVE) + int ret; + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + struct sk_buff *skb = tcp_nip_send_head(sk); + + if (!skb) + return; + + if (ntp->nip_keepalive_enable) { + /* If keepalive set by setsockopt, backup para and change para to nip para */ + if (tp->keepalive_time > HZ) { + ntp->keepalive_time_bak = tp->keepalive_time; + ntp->keepalive_probes_bak = tp->keepalive_probes; + ntp->keepalive_intvl_bak = tp->keepalive_intvl; + + nip_dbg("HZ=%u, change time/probes/intvl [%u, %u, %u] to [%u, %u, %u]", + HZ, tp->keepalive_time, tp->keepalive_probes, + tp->keepalive_intvl, get_nip_keepalive_time(), + NIP_KEEPALIVE_PROBES, get_nip_keepalive_intvl()); + + tp->keepalive_time = get_nip_keepalive_time(); + tp->keepalive_probes = NIP_KEEPALIVE_PROBES; + tp->keepalive_intvl = get_nip_keepalive_intvl(); + inet_csk_reset_keepalive_timer(sk, tp->keepalive_time); + } + return; + } + + /* If keepalive set by setsockopt, backup para */ + if (sock_flag(sk, SOCK_KEEPOPEN)) { + ntp->keepalive_time_bak = tp->keepalive_time; + ntp->keepalive_probes_bak = tp->keepalive_probes; + ntp->keepalive_intvl_bak = tp->keepalive_intvl; + nip_dbg("HZ=%u, backup normal time/probes/intvl [%u, %u, %u]", + HZ, tp->keepalive_time, tp->keepalive_probes, tp->keepalive_intvl); + } + + /* change para to nip para */ + ret = tcp_nip_keepalive_para_update(sk, get_nip_keepalive_time(), + get_nip_keepalive_intvl(), + NIP_KEEPALIVE_PROBES); + if (ret != 0) { + nip_dbg("fail, HZ=%u, time/probes/intvl [%u, %u, %u]", + HZ, tp->keepalive_time, tp->keepalive_probes, tp->keepalive_intvl); + return; + } + + nip_dbg("ok, HZ=%u, time/probes/intvl [%u, %u, %u]", + HZ, tp->keepalive_time, tp->keepalive_probes, tp->keepalive_intvl); + ntp->nip_keepalive_enable = true; +#endif +} + +void tcp_nip_keepalive_disable(struct sock *sk) +{ +#if IS_ENABLED(CONFIG_NEWIP_FAST_KEEPALIVE) + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + + if (!ntp->nip_keepalive_enable) + return; + + if (!sock_flag(sk, SOCK_KEEPOPEN)) { + ntp->nip_keepalive_enable = false; + nip_dbg("ok, HZ=%u, normal ka has disable", HZ); + return; + } + + if (ntp->idle_ka_probes_out < get_nip_idle_ka_probes_out()) + return; + + /* newip keepalive change to normal keepalive */ + if (ntp->keepalive_time_bak) { + nip_dbg("HZ=%u, change normal time/probes/intvl [%u, %u, %u] to [%u, %u, %u]", + HZ, tp->keepalive_time, tp->keepalive_probes, + tp->keepalive_intvl, ntp->keepalive_time_bak, ntp->keepalive_probes_bak, + ntp->keepalive_intvl_bak); + tp->keepalive_time = ntp->keepalive_time_bak; + tp->keepalive_probes = ntp->keepalive_probes_bak; + tp->keepalive_intvl = ntp->keepalive_intvl_bak; + inet_csk_reset_keepalive_timer(sk, tp->keepalive_time); + return; + } + + ntp->keepalive_time_bak = 0; + ntp->keepalive_probes_bak = 0; + ntp->keepalive_intvl_bak = 0; + + /* enable keepalive (SO_KEEPALIVE) */ + if (sk->sk_prot->keepalive) + sk->sk_prot->keepalive(sk, 0); + sock_valbool_flag(sk, SOCK_KEEPOPEN, 0); + + nip_dbg("ok, HZ=%u, idle_ka_probes_out=%u", HZ, get_nip_idle_ka_probes_out()); + ntp->nip_keepalive_enable = false; +#endif +} + +static void _tcp_sock_priv_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + + memset(ntp, 0, sizeof(*ntp)); + ntp->nip_ssthresh = get_nip_ssthresh_default(); + tp->sacked_out = 0; + tp->rcv_tstamp = 0; + tp->selective_acks[0].start_seq = 0; + tp->selective_acks[0].end_seq = 0; + tp->keepalive_time = 0; + tp->keepalive_probes = 0; + tp->keepalive_intvl = 0; +} + +static void tcp_sock_priv_init(struct sock *sk) +{ + _tcp_sock_priv_init(sk); +} + +/* Function + * Example Initialize sock information in TCP + * Parameter + * sk: Sock to be initialized + * Note: Currently, this function does not initialize timer, pre-queue, and congestion control, + * and does not allow fast retransmission. No function is set to adjust MSS + */ +static int tcp_nip_init_sock(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + + tcp_sock_priv_init(sk); + + tp->out_of_order_queue = RB_ROOT; + tcp_nip_init_xmit_timers(sk); + INIT_LIST_HEAD(&tp->tsq_node); + + icsk->icsk_rto = get_nip_rto() == 0 ? TCP_TIMEOUT_INIT : (HZ / get_nip_rto()); + icsk->icsk_rto_min = TCP_RTO_MIN; + icsk->icsk_delack_max = TCP_DELACK_MAX; + tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); + minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); + + tp->snd_cwnd = TCP_INIT_CWND; + tp->app_limited = ~0U; + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + tp->snd_cwnd_clamp = ~0; + tp->mss_cache = TCP_MSS_DEFAULT; + + tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; + tp->tsoffset = 0; + sk->sk_state = TCP_CLOSE; + sk->sk_write_space = sk_stream_write_space; + sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + + icsk->icsk_sync_mss = tcp_nip_sync_mss; + + WRITE_ONCE(sk->sk_sndbuf, get_nip_sndbuf()); // sock_net(sk)->ipv4.sysctl_tcp_wmem[1] + WRITE_ONCE(sk->sk_rcvbuf, get_nip_rcvbuf()); // sock_net(sk)->ipv4.sysctl_tcp_rmem[1] + + local_bh_disable(); + sk_sockets_allocated_inc(sk); + local_bh_enable(); + + icsk->icsk_af_ops = &newip_specific; + + return 0; +} + +static void skb_nip_entail(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + + skb->csum = 0; + tcb->seq = tp->write_seq; + tcb->end_seq = tp->write_seq; + tcb->tcp_flags = TCPHDR_ACK; + tcb->sacked = 0; + + tcp_nip_add_write_queue_tail(sk, skb); + + sk->sk_wmem_queued += skb->truesize; + sk_mem_charge(sk, skb->truesize); +} + +static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, + int large_allowed) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 new_size_goal, size_goal; + + if (!large_allowed) + return mss_now; + + /* Note : tcp_tso_autosize() will eventually split this later */ + new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER; + new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal); + + /* We try hard to avoid divides here */ + size_goal = tp->gso_segs * mss_now; + if (unlikely(new_size_goal < size_goal || + new_size_goal >= size_goal + mss_now)) { + tp->gso_segs = min_t(u16, new_size_goal / mss_now, + sk->sk_gso_max_segs); + size_goal = tp->gso_segs * mss_now; + } + + return max(size_goal, mss_now); +} + +int tcp_nip_send_mss(struct sock *sk, int *size_goal, int flags) +{ + int mss_now; + + mss_now = tcp_nip_current_mss(sk); + *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); + return mss_now; +} + +int tcp_nip_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + int flags; + int err; + int copied = 0; + int mss_now = 0; + int size_goal; + bool process_backlog = false; + long timeo; + + lock_sock(sk); + + flags = msg->msg_flags; + + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + + if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && + !tcp_passive_fastopen(sk)) { + err = sk_stream_wait_connect(sk, &timeo); + if (err != 0) + goto do_error; + } + + /* This should be in poll */ + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); + + copied = 0; + +restart: + mss_now = tcp_nip_send_mss(sk, &size_goal, flags); + + nip_dbg("mss_now=%d", mss_now); + + err = -EPIPE; + if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + goto do_error; + + while (msg_data_left(msg)) { + int copy = 0; + int max = mss_now; + + bool first_skb; + + if (!sk_stream_memory_free(sk)) + goto wait_for_sndbuf; + + if (process_backlog && sk_flush_backlog(sk)) { + process_backlog = false; + goto restart; + } + first_skb = skb_queue_empty(&sk->sk_write_queue); + skb = sk_stream_alloc_skb(sk, mss_now, sk->sk_allocation, first_skb); + if (!skb) + goto wait_for_memory; + + skb->tstamp = 0; + process_backlog = true; + + skb_nip_entail(sk, skb); + copy = mss_now; + max = mss_now; + + /* Try to append data to the end of skb. */ + if (copy > msg_data_left(msg)) + copy = msg_data_left(msg); + + if (skb_availroom(skb) > 0) { + /* We have some space in skb head. Superb! */ + copy = min_t(int, copy, skb_availroom(skb)); + err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); + if (err) + goto do_fault; + } else { + nip_dbg("msg too big, tcp cannot devide packet now"); + goto out; + } + + if (!copied) + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; + + tp->write_seq += copy; + TCP_SKB_CB(skb)->end_seq += copy; + tcp_skb_pcount_set(skb, 0); + copied += copy; + if (!msg_data_left(msg)) { + if (unlikely(flags & MSG_EOR)) + TCP_SKB_CB(skb)->eor = 1; + goto out; + } + + continue; + +wait_for_sndbuf: + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); +wait_for_memory: + if (copied) + tcp_nip_push(sk, flags & ~MSG_MORE, mss_now, + TCP_NAGLE_PUSH, size_goal); + + err = sk_stream_wait_memory(sk, &timeo); + if (err != 0) + goto do_error; + + mss_now = tcp_nip_send_mss(sk, &size_goal, flags); + } + +out: + if (copied) + tcp_nip_push(sk, flags, mss_now, tp->nonagle, size_goal); + release_sock(sk); + return copied; + +do_fault: + if (!skb->len) { + tcp_unlink_write_queue(skb, sk); + sk_wmem_free_skb(sk, skb); + } + +do_error: + if (copied) + goto out; + + err = sk_stream_error(sk, flags, err); + /* make sure we wake any epoll edge trigger waiter */ + if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) + sk->sk_write_space(sk); + release_sock(sk); + return err; +} + +/* Clean up the receive buffer for full frames taken by the user, + * then send an ACK if necessary. COPIED is the number of bytes + * tcp_recvmsg has given to the user so far, it speeds up the + * calculation of whether or not we must ACK for the sake of + * a window update. + */ +void tcp_nip_cleanup_rbuf(struct sock *sk, int copied) +{ + struct tcp_sock *tp = tcp_sk(sk); + bool time_to_ack = false; + + struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); + + WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), + "cleanup rbuf bug: copied %X seq %X rcvnxt %X", + tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); + + if (inet_csk_ack_scheduled(sk)) { + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (tp->rcv_nxt - tp->rcv_wup > (get_ack_num() * 20 * icsk->icsk_ack.rcv_mss) || + /* If this read emptied read buffer, we send ACK, if + * connection is not bidirectional, user drained + * receive buffer and there was a small segment + * in queue. + */ + (copied > 0 && + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !inet_csk_in_pingpong_mode(sk))) && + !atomic_read(&sk->sk_rmem_alloc))) + time_to_ack = true; + } + + /* We send an ACK if we can now advertise a non-zero window + * which has been raised "significantly". + * + * Even if window raised up to infinity, do not send window open ACK + * in states, where we will not receive more. It is useless. + */ + if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) { + __u32 rcv_window_now = tcp_receive_window(tp); + + /* Optimize, __nip_tcp_select_window() is not cheap. */ + if (2 * rcv_window_now <= tp->window_clamp) { + __u32 new_window = __nip_tcp_select_window(sk); + + /* Send ACK now, if this read freed lots of space + * in our buffer. Certainly, new_window is new window. + * We can advertise it now, if it is not less than current one. + * "Lots" means "at least twice" here. + */ + if (new_window && new_window >= 2 * rcv_window_now) + time_to_ack = true; + } + } + if (time_to_ack) + tcp_nip_send_ack(sk); +} + +int tcp_nip_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len) +{ + struct tcp_sock *tp = tcp_sk(sk); + int copied = 0; + u32 *seq; + unsigned long used; + int err = 0; + int target; + long timeo; + size_t len_tmp = len; + struct sk_buff *skb, *last; + + lock_sock(sk); + + if (sk->sk_state == TCP_LISTEN) + goto out; + + timeo = sock_rcvtimeo(sk, nonblock); + + seq = &tp->copied_seq; + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len_tmp); + + do { + u32 offset; + /* Next get a buffer. */ + last = skb_peek_tail(&sk->sk_receive_queue); + skb_queue_walk(&sk->sk_receive_queue, skb) { + last = skb; + /* Now that we have two receive queues this + * shouldn't happen. + */ + if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), + "TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X", + *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, + flags)) + break; + offset = *seq - TCP_SKB_CB(skb)->seq; + if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { + pr_err_once("found a SYN, please report"); + offset--; + } + if (offset < skb->len) + goto found_ok_skb; + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + goto found_fin_ok; + /* If the first SKB in the current SK_receive_queue is not the SKB to + * be replicated, then MSG_PEEK should be set in flags + */ + WARN(!(flags & MSG_PEEK), + "TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X", + *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags); + } + + /* If the program is executed at this point, the SK_receive_queue is finished */ + /* If there is no data in the backlog, stop reading at target */ + if (copied >= target && !sk->sk_backlog.tail) + break; + + if (copied) { + if (sk->sk_err || + sk->sk_state == TCP_CLOSE || + (sk->sk_shutdown & RCV_SHUTDOWN) || + !timeo || + signal_pending(current)) + break; + } else { + if (sock_flag(sk, SOCK_DONE)) + break; + + if (sk->sk_err) { + copied = sock_error(sk); + break; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN) + break; + + if (sk->sk_state == TCP_CLOSE) { + if (!sock_flag(sk, SOCK_DONE)) { + /* This occurs when user tries to read + * from never connected socket. + */ + copied = -ENOTCONN; + break; + } + break; + } + + if (!timeo) { + copied = -EAGAIN; + break; + } + + if (signal_pending(current)) { + copied = sock_intr_errno(timeo); + break; + } + } + + tcp_nip_cleanup_rbuf(sk, copied); + + if (copied >= target) { + /* Do not sleep, just process backlog. */ + release_sock(sk); + lock_sock(sk); + } else { + nip_dbg("no enough data receive queue, wait"); + sk_wait_data(sk, &timeo, last); + } + continue; +found_ok_skb: + used = skb->len - offset; + if (len_tmp < used) + used = len_tmp; + nip_dbg("copy data into msg, len=%ld", used); + if (!(flags & MSG_TRUNC)) { + err = skb_copy_datagram_msg(skb, offset, msg, used); + if (err) { + nip_dbg("copy data failed"); + if (!copied) + copied = -EFAULT; + break; + } + } + *seq += used; + len_tmp -= used; + copied += used; + + if (used + offset < skb->len) + continue; + + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + goto found_fin_ok; + if (!(flags & MSG_PEEK)) + sk_eat_skb(sk, skb); + continue; + +found_fin_ok: + /* Process the FIN. */ + ++*seq; + if (!(flags & MSG_PEEK)) + sk_eat_skb(sk, skb); + break; + } while (len_tmp > 0); + + /* Clean up data we have read: This will do ACK frames. */ + tcp_nip_cleanup_rbuf(sk, copied); + + release_sock(sk); + return copied; + +out: + release_sock(sk); + return err; +} + +static void skb_nip_rbtree_purge(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + skb_rbtree_purge(&tp->out_of_order_queue); +} + +void tcp_nip_destroy_sock(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tcp_nip_clear_xmit_timers(sk); + + tcp_nip_write_queue_purge(sk); + + skb_nip_rbtree_purge(sk); + + if (inet_csk(sk)->icsk_bind_hash) + inet_put_port(sk); + + tcp_saved_syn_free(tp); + local_bh_disable(); + sk_sockets_allocated_dec(sk); + local_bh_enable(); +} + +/* Function + * The sock handler for THE LISTEN and ESTABLISHED states is called by tcp_nip_rCV + * Parameter + * skb: Packets received from the network layer + * sk: A SOCK instance needs to be processed + */ +static int tcp_nip_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + nip_dbg("received newip tcp skb, sk_state=%d", sk->sk_state); + + if (sk->sk_state == TCP_ESTABLISHED) { + struct dst_entry *dst = sk->sk_rx_dst; + + if (dst) { + /* Triggered when processing newly received skb after deleting routes */ + if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || + !dst->ops->check(dst, 0)) { + dst_release(dst); + sk->sk_rx_dst = NULL; + } + } + tcp_nip_rcv_established(sk, skb, tcp_hdr(skb), skb->len); + return 0; + } + + /* The connection is established in cookie mode to defend against SYN-flood attacks */ + if (sk->sk_state == TCP_LISTEN) + nip_dbg("found TCP_LISTEN SOCK"); + + if (tcp_nip_rcv_state_process(sk, skb)) + goto discard; + return 0; + +discard: + kfree_skb(skb); + return 0; +} + +/* Function: + * Fill the TCP header field in SKB into the TCP private control block, + * because the TCP header field in SKB is the network byte order, + * in order to facilitate later call, need to convert the host byte order + * and store in the TCP control block. + * Parameter: + * skb:Packets delivered by the network layer + * th:TCP header field in a packet + */ +static void tcp_nip_fill_cb(struct sk_buff *skb, const struct tcphdr *th) +{ + barrier(); + + TCP_SKB_CB(skb)->seq = ntohl(th->seq); + TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + + skb->len - th->doff * TCP_NUM_4); + + TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); + TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); + TCP_SKB_CB(skb)->tcp_tw_isn = 0; + TCP_SKB_CB(skb)->sacked = 0; +} + +static bool tcp_nip_add_backlog(struct sock *sk, struct sk_buff *skb) +{ + u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf); + + /* Only socket owner can try to collapse/prune rx queues + * to reduce memory overhead, so add a little headroom here. + * Few sockets backlog are possibly concurrently non empty. + */ + limit += 64 * 1024; + + /* In case all data was pulled from skb frags (in __pskb_pull_tail()), + * we can fix skb->truesize to its real value to avoid future drops. + * This is valid because skb is not yet charged to the socket. + * It has been noticed pure SACK packets were sometimes dropped + * (if cooked by drivers without copybreak feature). + */ + skb_condense(skb); + + if (unlikely(sk_add_backlog(sk, skb, limit))) { + bh_unlock_sock(sk); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); + nip_dbg("insert backlog fail"); + return true; + } + return false; +} + +/* Function + * TCP is the gateway from the network layer to the transport layer + * and receives data packets from the network layer + * Parameter + * skb:Packets delivered by the network layer + */ +static int tcp_nip_rcv(struct sk_buff *skb) +{ + const struct tcphdr *th; + bool refcounted; + struct sock *sk; + int ret; + int dif = skb->skb_iif; + + if (skb->pkt_type != PACKET_HOST) { + nip_dbg("unknown pkt-type(%u), drop skb", skb->pkt_type); + goto discard_it; + } + + if (!nip_get_tcp_input_checksum(skb)) { + nip_dbg("checksum fail, drop skb"); + goto discard_it; + } + + th = (const struct tcphdr *)skb->data; + + if (unlikely(th->doff < sizeof(struct tcphdr) / TCP_NUM_4)) { + nip_dbg("non-four byte alignment, drop skb"); + goto discard_it; + } + + sk = __ninet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), + th->source, th->dest, dif, &refcounted); + if (!sk) { + nip_dbg("can`t find related sock for skb, will disconnect"); + goto no_tcp_socket; + } + + if (sk->sk_state == TCP_TIME_WAIT) { + /* Handles the SK portion of the interrupt state */ + nip_dbg("sk_state is TCP_TIME_WAIT, drop skb"); + goto discard_it; + } + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); + struct sock *nsk; + + nip_dbg("TCP server into third shake hands, sk->sk_state:%d", sk->sk_state); + sk = req->rsk_listener; + + sock_hold(sk); + refcounted = true; + nsk = NULL; + /* You need to create a new SOCK and enter TCP_SYN_RECV, + * which is then set to Established + */ + if (!tcp_filter(sk, skb)) { + th = (const struct tcphdr *)skb->data; + tcp_nip_fill_cb(skb, th); + nsk = tcp_nip_check_req(sk, skb, req); + } + if (!nsk || nsk == sk) { + nip_dbg("skb info error and create newsk failure, drop skb"); + reqsk_put(req); + goto discard_and_relse; + } + if (tcp_nip_child_process(sk, nsk, skb)) { + nip_dbg("child process fail, drop skb"); + goto discard_and_relse; + } else { + sock_put(sk); + return 0; + } + } + + tcp_nip_fill_cb(skb, th); + + if (tcp_filter(sk, skb)) { + nip_dbg("tcp filter fail, drop skb"); + goto discard_and_relse; + } + th = (const struct tcphdr *)skb->data; + skb->dev = NULL; + + if (sk->sk_state == TCP_LISTEN) { + nip_dbg("TCP server into first shake hands! sk->sk_state:%d", sk->sk_state); + ret = tcp_nip_do_rcv(sk, skb); + goto put_and_return; + } + bh_lock_sock_nested(sk); + + ret = 0; + if (!sock_owned_by_user(sk)) { + ret = tcp_nip_do_rcv(sk, skb); + } else { + nip_dbg("sock locked by user, put packet into backlog"); + if (tcp_nip_add_backlog(sk, skb)) { + nip_dbg("add backlog fail, drop skb"); + goto discard_and_relse; + } + } + + bh_unlock_sock(sk); + +put_and_return: + if (refcounted) + sock_put(sk); + return ret ? -1 : 0; + +no_tcp_socket: + tcp_nip_send_reset(NULL, skb); + goto discard_it; +discard_it: + kfree_skb(skb); + return 0; + +discard_and_relse: + sk_drops_add(sk, skb); + if (refcounted) + sock_put(sk); + goto discard_it; +} + +static void tcp_nip_early_demux(struct sk_buff *skb) +{ + const struct tcphdr *th; + struct sock *sk; + + if (skb->pkt_type != PACKET_HOST) + return; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) + return; + + th = tcp_hdr(skb); + if (th->doff < sizeof(struct tcphdr) / 4) + return; + + sk = __ninet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, + &NIPCB(skb)->srcaddr, th->source, + &NIPCB(skb)->dstaddr, ntohs(th->dest), skb->skb_iif); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk_fullsock(sk)) { + struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); + + if (dst) + dst = dst_check(dst, 0); + if (dst && inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) { + nip_dbg("find sock in ehash, set dst for skb"); + skb_dst_set_noref(skb, dst); + } + } + } +} + +void tcp_nip_done(struct sock *sk) +{ + struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + + if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) + TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS); + + tcp_set_state(sk, TCP_CLOSE); + inet_csk_clear_xmit_timers(sk); + if (req) + reqsk_fastopen_remove(sk, req, false); + + sk->sk_shutdown = SHUTDOWN_MASK; + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_state_change(sk); + } else { + WARN_ON(sk->sk_state != TCP_CLOSE); + WARN_ON(!sock_flag(sk, SOCK_DEAD)); + + /* It cannot be in hash table! */ + WARN_ON(!sk_unhashed(sk)); + + /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ + WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); + sk->sk_prot->destroy(sk); + + sk_nip_stream_kill_queues(sk); + + local_bh_disable(); + this_cpu_dec(*sk->sk_prot->orphan_count); + local_bh_enable(); + sock_put(sk); + nip_dbg("close sock done"); + } +} + +/* Function + * Disconnect the connection to the peer end, non-blocking + * Release read/write queue, send RST (not sent yet), clear timer + * Parameter + * sk: Transmission control block + */ +int tcp_nip_disconnect(struct sock *sk, int flags) +{ + struct inet_sock *inet = inet_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + int err = 0; + int old_state = sk->sk_state; + u32 sk_ack_backlog; + + nip_dbg("old_state=%u", old_state); + if (old_state != TCP_CLOSE) + tcp_set_state(sk, TCP_CLOSE); + + if (old_state == TCP_LISTEN) { + sk_ack_backlog = READ_ONCE(sk->sk_ack_backlog); + inet_csk_listen_stop(sk); + nip_dbg("sk_state CLOSE, sk_ack_backlog=%u to %u, sk_max_ack_backlog=%u", + sk_ack_backlog, READ_ONCE(sk->sk_ack_backlog), + READ_ONCE(sk->sk_max_ack_backlog)); + } else if (tcp_nip_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && + (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { + tcp_nip_send_active_reset(sk, gfp_any()); + sk->sk_err = ECONNRESET; + } else if (old_state == TCP_SYN_SENT) { + sk->sk_err = ECONNRESET; + } + + tcp_nip_clear_xmit_timers(sk); + __skb_queue_purge(&sk->sk_receive_queue); + tcp_write_queue_purge(sk); + + _tcp_sock_priv_init(sk); + + inet->inet_dport = 0; + sk->sk_shutdown = 0; + sock_reset_flag(sk, SOCK_DONE); + tp->srtt_us = 0; + tp->write_seq += tp->max_window + TCP_NUM_2; + if (tp->write_seq == 0) + tp->write_seq = 1; + tp->snd_cwnd = TCP_NUM_2; + icsk->icsk_backoff = 0; + icsk->icsk_probes_out = 0; + icsk->icsk_probes_tstamp = 0; + icsk->icsk_rto = get_nip_rto() == 0 ? TCP_TIMEOUT_INIT : (HZ / get_nip_rto()); + icsk->icsk_rto_min = TCP_RTO_MIN; + icsk->icsk_delack_max = TCP_DELACK_MAX; + tp->packets_out = 0; + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + tp->snd_cwnd_cnt = 0; + tp->window_clamp = 0; + tp->delivered = 0; + tcp_clear_retrans(tp); + tp->total_retrans = 0; + inet_csk_delack_init(sk); + + icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; + sk->sk_send_head = NULL; + memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); + __sk_dst_reset(sk); + dst_release(sk->sk_rx_dst); + sk->sk_rx_dst = NULL; + tp->segs_in = 0; + tp->segs_out = 0; + tp->bytes_acked = 0; + tp->bytes_received = 0; + tp->data_segs_in = 0; + tp->data_segs_out = 0; + + WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); + + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + sk->sk_frag.offset = 0; + } + + sk->sk_error_report(sk); + return err; +} + +struct sock *ninet_csk_accept(struct sock *sk, int flags, int *err, bool kern) +{ + struct sock *newsk; + u32 sk_ack_backlog_last = READ_ONCE(sk->sk_ack_backlog); + u32 sk_max_ack_backlog = READ_ONCE(sk->sk_max_ack_backlog); + + newsk = inet_csk_accept(sk, flags, err, kern); + nip_dbg("accept %s, sk_ack_backlog_last=%u, sk_max_ack_backlog=%u", + (newsk ? "ok" : "fail"), sk_ack_backlog_last, sk_max_ack_backlog); + + return newsk; +} + +struct proto tcp_nip_prot = { + .name = "NIP_TCP", + .owner = THIS_MODULE, + .close = tcp_nip_close, + .connect = tcp_nip_connect, + .disconnect = tcp_nip_disconnect, + .accept = ninet_csk_accept, + .ioctl = tcp_ioctl, + .init = tcp_nip_init_sock, + .destroy = tcp_nip_destroy_sock, + .shutdown = tcp_nip_shutdown, + .setsockopt = tcp_setsockopt, + .getsockopt = tcp_getsockopt, + .keepalive = tcp_set_keepalive, + .recvmsg = tcp_nip_recvmsg, + .sendmsg = tcp_nip_sendmsg, + .sendpage = NULL, + .backlog_rcv = tcp_nip_do_rcv, + .release_cb = tcp_nip_release_cb, + .hash = ninet_hash, + .unhash = ninet_unhash, + .get_port = inet_csk_get_port, + .sockets_allocated = &tcp_sockets_allocated, + .orphan_count = &tcp_orphan_count, + .memory_allocated = &tcp_memory_allocated, + .memory_pressure = &tcp_memory_pressure, + .sysctl_mem = sysctl_tcp_mem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), + .max_header = MAX_TCP_HEADER, + .obj_size = sizeof(struct tcp_nip_sock), + .rsk_prot = &tcp_nip_request_sock_ops, + .h.hashinfo = &tcp_hashinfo, + .no_autobind = true, +}; + +static const struct ninet_protocol tcp_nip_protocol = { + .early_demux = tcp_nip_early_demux, + .handler = tcp_nip_rcv, + .flags = 0, +}; + +static struct inet_protosw tcp_nip_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + .prot = &tcp_nip_prot, + .ops = &ninet_stream_ops, + .flags = INET_PROTOSW_PERMANENT | + INET_PROTOSW_ICSK, +}; + +int __init tcp_nip_init(void) +{ + int ret; + + ret = ninet_add_protocol(&tcp_nip_protocol, IPPROTO_TCP); + if (ret) + goto out; + + /* register ninet protocol */ + ret = ninet_register_protosw(&tcp_nip_protosw); + if (ret) + goto out_nip_tcp_protocol; + +out: + return ret; + +out_nip_tcp_protocol: + ninet_del_protocol(&tcp_nip_protocol, IPPROTO_TCP); + goto out; +} + +void tcp_nip_exit(void) +{ + ninet_unregister_protosw(&tcp_nip_protosw); + ninet_del_protocol(&tcp_nip_protocol, IPPROTO_TCP); +} diff --git a/newip/src/linux-5.10/net/newip/tcp_nip_input.c b/newip/src/linux-5.10/net/newip/tcp_nip_input.c new file mode 100644 index 0000000000000000000000000000000000000000..c8ef3a6c056632993bac5fb1e8e0e3e3977ef38f --- /dev/null +++ b/newip/src/linux-5.10/net/newip/tcp_nip_input.c @@ -0,0 +1,1996 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Implementation of the Transmission Control Protocol(TCP). + * + * Based on net/ipv4/tcp_input.c + * Based on net/ipv4/tcp_output.c + * Based on net/ipv4/tcp_minisocks.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "tcp_nip_parameter.h" + +#define FLAG_DATA 0x01 /* Incoming frame contained data. */ +#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ +#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ +#define FLAG_RETRANS_DATA_ACKED 0x08 /* some of which was retransmitted. */ +#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ +#define FLAG_DATA_SACKED 0x20 /* New SACK. */ +#define FLAG_ECE 0x40 /* ECE in this ACK */ +#define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */ +#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update */ +#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ +#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ +#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ +#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ +#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ +#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ + +#define FLAG_ACKED (FLAG_DATA_ACKED | FLAG_SYN_ACKED) +#define FLAG_NOT_DUP (FLAG_DATA | FLAG_WIN_UPDATE | FLAG_ACKED) +#define FLAG_CA_ALERT (FLAG_DATA_SACKED | FLAG_ECE) +#define FLAG_FORWARD_PROGRESS (FLAG_ACKED | FLAG_DATA_SACKED) + +#define TCP_REMNANT (TCP_FLAG_FIN | TCP_FLAG_URG | TCP_FLAG_SYN | TCP_FLAG_PSH) +#define TCP_HP_BITS (~(TCP_RESERVED_BITS | TCP_FLAG_PSH)) + +#define REXMIT_NONE 0 /* no loss recovery to do */ +#define REXMIT_LOST 1 /* retransmit packets marked lost */ +#define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */ + +#define TCP_MAX_MSS 1460 + +void tcp_nip_fin(struct sock *sk) +{ + inet_csk_schedule_ack(sk); + + sk->sk_shutdown |= RCV_SHUTDOWN; + sock_set_flag(sk, SOCK_DONE); + + switch (sk->sk_state) { + case TCP_SYN_RECV: + case TCP_ESTABLISHED: + /* Move to CLOSE_WAIT */ + tcp_set_state(sk, TCP_CLOSE_WAIT); + inet_csk(sk)->icsk_ack.pingpong = 1; + break; + + case TCP_CLOSE_WAIT: + case TCP_CLOSING: + /* Received a retransmission of the FIN, do + * nothing. + */ + break; + case TCP_LAST_ACK: + /* RFC793: Remain in the LAST-ACK state. */ + break; + + case TCP_FIN_WAIT1: + /* This case occurs when a simultaneous close + * happens, we must ack the received FIN and + * enter the CLOSING state. + */ + tcp_nip_send_ack(sk); + tcp_set_state(sk, TCP_CLOSING); + break; + case TCP_FIN_WAIT2: + /* Received a FIN -- send ACK and enter TIME_WAIT. */ + tcp_nip_send_ack(sk); + inet_csk_reset_keepalive_timer(sk, TCP_TIMEWAIT_LEN); + break; + default: + /* Only TCP_LISTEN and TCP_CLOSE are left, in these + * cases we should never reach this piece of code. + */ + nip_dbg("Impossible, sk->sk_state=%d", sk->sk_state); + break; + } + + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); +} + +static void tcp_nip_drop(struct sock *sk, struct sk_buff *skb) +{ + sk_drops_add(sk, skb); + __kfree_skb(skb); +} + +static void tcp_nip_overlap_handle(struct tcp_sock *tp, struct sk_buff *skb) +{ + u32 diff = tp->rcv_nxt - TCP_SKB_CB(skb)->seq; + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + + skb->data += diff; + skb->len -= diff; + tcb->seq += diff; +} + +static void tcp_nip_left_overlap(struct sk_buff *cur, struct sk_buff *skb) +{ + u32 diff = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(cur)->seq; + struct tcp_skb_cb *tcb = TCP_SKB_CB(cur); + + cur->data += diff; + cur->len -= diff; + tcb->seq += diff; +} + +static void tcp_nip_right_overlap(struct sk_buff *cur, struct sk_buff *skb) +{ + u32 diff = TCP_SKB_CB(cur)->end_seq - TCP_SKB_CB(skb)->seq; + struct tcp_skb_cb *tcb = TCP_SKB_CB(cur); + unsigned int len; + + len = cur->len - diff; + /* At present NewIP only uses linear regions, uses skb_trim to remove end from a buffer; + * If the nonlinear region is also used later, use pskb_trim to remove end from a buffer; + */ + skb_trim(cur, len); + tcb->end_seq -= diff; +} + +/* If we update tp->rcv_nxt, also update tp->bytes_received */ +static void tcp_nip_rcv_nxt_update(struct tcp_sock *tp, u32 seq) +{ + u32 delta = seq - tp->rcv_nxt; + + sock_owned_by_me((struct sock *)tp); + tp->bytes_received += delta; + WRITE_ONCE(tp->rcv_nxt, seq); +} + +/* tcp_nip_try_coalesce - try to merge skb to prior one + * @sk: socket + * @to: prior buffer + * @from: buffer to add in queue + * @fragstolen: pointer to boolean + * + * Before queueing skb @from after @to, try to merge them + * to reduce overall memory use and queue lengths, if cost is small. + * Packets in ofo or receive queues can stay a long time. + * Better try to coalesce them right now to avoid future collapses. + * Returns true if caller should free @from instead of queueing it + */ +static bool tcp_nip_try_coalesce(struct sock *sk, + struct sk_buff *to, + struct sk_buff *from, + bool *fragstolen) +{ + int delta; + + *fragstolen = false; + + /* Its possible this segment overlaps with prior segment in queue */ + if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) + return false; + + if (!skb_try_coalesce(to, from, fragstolen, &delta)) { + nip_dbg("try to merge skb to the previous one failed"); + return false; + } + + atomic_add(delta, &sk->sk_rmem_alloc); + sk_mem_charge(sk, delta); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); + TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; + TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; + TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; + + if (TCP_SKB_CB(from)->has_rxtstamp) { + TCP_SKB_CB(to)->has_rxtstamp = true; + to->tstamp = from->tstamp; + skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp; + } + + return true; +} + +static bool tcp_nip_ooo_try_coalesce(struct sock *sk, + struct sk_buff *to, + struct sk_buff *from, + bool *fragstolen) +{ + bool res = tcp_nip_try_coalesce(sk, to, from, fragstolen); + + /* In case tcp_nip_drop() is called later, update to->gso_segs */ + if (res) { + u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) + + max_t(u16, 1, skb_shinfo(from)->gso_segs); + u32 to_gso_segs = skb_shinfo(to)->gso_segs; + + nip_dbg("(to)->gso_segs %u, (from)->gso_segs %u", skb_shinfo(to)->gso_segs, + skb_shinfo(from)->gso_segs); + skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF); + nip_dbg("gso_segs %u to %u", to_gso_segs, skb_shinfo(to)->gso_segs); + } + return res; +} + +/* This one checks to see if we can put data from the + * out_of_order queue into the receive_queue. + */ +static void tcp_nip_ofo_queue(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + bool fin; + bool fragstolen; + bool eaten; + struct sk_buff *skb; + struct sk_buff *tail; + struct rb_node *p; + + p = rb_first(&tp->out_of_order_queue); + while (p) { + skb = rb_to_skb(p); + if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { + nip_dbg("nodes are all after rcv_nxt"); + break; + } + + p = rb_next(p); + rb_erase(&skb->rbnode, &tp->out_of_order_queue); + + if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { + nip_dbg("this node is before rcv_nxt, drop skb"); + tcp_nip_drop(sk, skb); + continue; + } + + tail = skb_peek_tail(&sk->sk_receive_queue); + eaten = tail && tcp_nip_try_coalesce(sk, tail, skb, &fragstolen); + tcp_nip_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); + fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; + if (!eaten) + __skb_queue_tail(&sk->sk_receive_queue, skb); + else + kfree_skb_partial(skb, fragstolen); + + if (unlikely(fin)) { + nip_dbg("will send fin"); + tcp_nip_fin(sk); + /* tcp_fin() purges tp->out_of_order_queue, + * so we must end this loop right now. + */ + break; + } + } +} + +/* The tcp_nip_data_queue function is responsible for receiving the socket data. For the packets + * whose start sequence number is after the sequence to be received by the socket and whose + * start sequence number is within the receiving window, current function is called to add them + * to the TCP out-of-order queue. + */ +static void tcp_nip_data_queue_ofo(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct rb_node **p; + struct rb_node *parent; + struct sk_buff *skb1; + struct sk_buff *skb2; + u32 seq; + u32 end_seq; + bool fragstolen; + + if (unlikely(atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)) { + nip_dbg("no memory, drop pkt"); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP); + sk->sk_data_ready(sk); + tcp_nip_drop(sk, skb); + return; + } + + /* Disable header prediction. */ + tp->pred_flags = 0; + /* set the ICSK_ACK_SCHED flag bit to indicate that an ACK needs to be sent. */ + inet_csk_schedule_ack(sk); + + tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); + seq = TCP_SKB_CB(skb)->seq; + end_seq = TCP_SKB_CB(skb)->end_seq; + + /* If it is the first out-of-order packet to be added, the out_of_order_queue queue is + * empty, insert it into the queue, and update the last skb pointer ooo_last_skb. + */ + p = &tp->out_of_order_queue.rb_node; + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) { + nip_dbg("add first ofo pkt"); + rb_link_node(&skb->rbnode, NULL, p); + rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); + tp->ooo_last_skb = skb; + goto end; + } + + /* In the typical case, we are adding an skb to the end of the list. + * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. + */ + if (tcp_nip_ooo_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { +coalesce_done: + /* fragstolen indicates that the data in the linear cache portion of the data + * packet is merged, but the data in the shared cache is still in use, + * so it cannot be released + */ + nip_dbg("ofo skb coalesce done"); + kfree_skb_partial(skb, fragstolen); + skb = NULL; + goto end; + } + /* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */ + if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) { + nip_dbg("add skb after ooo_last_skb"); + parent = &tp->ooo_last_skb->rbnode; + p = &parent->rb_right; + goto insert; + } + + if (after(seq, TCP_SKB_CB(tp->ooo_last_skb)->seq)) { + tcp_nip_left_overlap(skb, tp->ooo_last_skb); + if (tcp_nip_ooo_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { + nip_dbg("ofo skb coalesce ooo_last_skb done"); + goto coalesce_done; + } else { + nip_dbg("ofo skb coalesce ooo_last_skb failed, drop pkt"); + tcp_nip_drop(sk, skb); + skb = NULL; + goto end; + } + } + + /* Find place to insert this segment. Handle overlaps on the way. */ + parent = NULL; + while (*p) { + parent = *p; + skb1 = rb_to_skb(parent); + if (before(seq, TCP_SKB_CB(skb1)->seq)) { + p = &parent->rb_left; + continue; + } + if (before(seq, TCP_SKB_CB(skb1)->end_seq)) { + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + /* skb1->seq <= seq, end_seq <= skb1->end_seq */ + nip_dbg("completely overlapping, drop pkt"); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); + tcp_nip_drop(sk, skb); + skb = NULL; + goto end; + } + if (after(seq, TCP_SKB_CB(skb1)->seq)) { + /* skb1->seq < seq, end_seq > skb1->end_seq */ + tcp_nip_left_overlap(skb, skb1); + skb2 = skb_rb_next(skb1); + if (before(TCP_SKB_CB(skb2)->seq, TCP_SKB_CB(skb)->end_seq)) + tcp_nip_right_overlap(skb, skb2); + if (tcp_nip_ooo_try_coalesce(sk, skb1, skb, &fragstolen)) { + nip_dbg("partial overlap, ofo skb coalesce done"); + goto coalesce_done; + } else { + nip_dbg("partial overlap, ofo skb coalesce failed, drop pkt"); + tcp_nip_drop(sk, skb); + skb = NULL; + goto end; + } + } else { + /* skb1->seq == seq, end_seq > skb1->end_seq + * partial overlap, skb covers skb1, replace skb1 with skb. + */ + nip_dbg("partial overlap, replace old skb node"); + rb_replace_node(&skb1->rbnode, &skb->rbnode, + &tp->out_of_order_queue); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); + tcp_nip_drop(sk, skb1); + goto merge_right; + } + } else if (tcp_nip_ooo_try_coalesce(sk, skb1, skb, &fragstolen)) { + nip_dbg("ofo skb coalesce done while scan ofo queue"); + goto coalesce_done; + } + p = &parent->rb_right; + } +insert: + /* Insert segment into RB tree. */ + nip_dbg("add skb into ofo queue"); + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); + +merge_right: + /* Remove other segments covered by skb. */ + while ((skb1 = skb_rb_next(skb)) != NULL) { + if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) + break; + if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + tcp_nip_right_overlap(skb, skb1); + nip_dbg("partial overlap, compress the right side of the current package"); + break; + } + nip_dbg("del overlapping nodes on the right"); + rb_erase(&skb1->rbnode, &tp->out_of_order_queue); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); + tcp_nip_drop(sk, skb1); + } + /* If there is no skb after us, we are the last_skb ! */ + if (!skb1) + tp->ooo_last_skb = skb; + +end: + if (skb) { + /* Try space compression for the skb. if the skb has enough space left in its + * linear space, the page fragment from its shared space can be copied into the + * linear space to free the page fragment. If the remaining amount of linear space + * is less than the length of the page fragment, or if the skb has been cloned + * (the page fragment is shared with other SKBS), no compression is performed. + */ + skb_condense(skb); + skb_set_owner_r(skb, sk); + } +} + +#define PKT_DISCARD_MAX 500 +static void tcp_nip_data_queue(struct sock *sk, struct sk_buff *skb) +{ + int mss = tcp_nip_current_mss(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + u32 cur_win = tcp_receive_window(tp); + u32 seq_max = tp->rcv_nxt + cur_win; + + /* Newip Urg_ptr is disabled. Urg_ptr is used to carry the number of discarded packets */ + tp->snd_up = (TCP_SKB_CB(skb)->seq - tcp_sk(sk)->rcv_nxt) / mss; + tp->snd_up = tp->snd_up > PKT_DISCARD_MAX ? 0 : tp->snd_up; + + if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { + nip_dbg("no data, only handle ack"); + __kfree_skb(skb); + return; + } + + if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { + if (cur_win == 0) { + nip_dbg("rcv window is 0"); + goto out_of_window; + } + } + + /* Out of window. F.e. zero window probe. */ + if (!before(TCP_SKB_CB(skb)->seq, seq_max)) { + nip_dbg("out of rcv win, seq=[%u-%u], rcv_nxt=%u, seq_max=%u", + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, + tp->rcv_nxt, seq_max); + goto out_of_window; + } + + if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { + /* A retransmit, 2nd most common case. Force an immediate ack. */ + nip_dbg("rcv retransmit pkt, seq=[%u-%u], rcv_nxt=%u", + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); +out_of_window: + inet_csk_schedule_ack(sk); + tcp_nip_drop(sk, skb); + return; + } + icsk->icsk_ack.lrcvtime = tcp_jiffies32; + __skb_pull(skb, tcp_hdr(skb)->doff * TCP_NUM_4); + + if (cur_win == 0 || after(TCP_SKB_CB(skb)->end_seq, seq_max)) { + nip_dbg("win lack, drop pkt, seq=[%u-%u], seq_max=%u, rmem_alloc/rbuf=[%u:%u]", + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, + seq_max, atomic_read(&sk->sk_rmem_alloc), sk->sk_rcvbuf); + /* wake up processes that are blocked for lack of data */ + sk->sk_data_ready(sk); + inet_csk_schedule_ack(sk); + tcp_nip_drop(sk, skb); + return; + } + + /* case1: seq == rcv_next + * case2: seq -- rcv_next -- end_seq ==> rcv_next(seq) -- end_seq + */ + if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt || + (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt) && + after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { + if (TCP_SKB_CB(skb)->seq != tp->rcv_nxt) + tcp_nip_overlap_handle(tp, skb); + + nip_dbg("packet received. seq=[%u-%u], rcv_nxt=%u, skb->len=%u", + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, + tp->rcv_nxt, skb->len); + + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + inet_csk_schedule_ack(sk); + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + tcp_nip_fin(sk); + if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) + tcp_nip_ofo_queue(sk); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk); + return; + } + + nip_dbg("ofo packet received. seq=[%u-%u], rcv_nxt=%u, skb->len=%u", + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, + tp->rcv_nxt, skb->len); + + tcp_nip_data_queue_ofo(sk, skb); +} + +static inline void tcp_nip_push_pending_frames(struct sock *sk) +{ + if (tcp_nip_send_head(sk)) { + struct tcp_sock *tp = tcp_sk(sk); + u32 cur_mss = tcp_nip_current_mss(sk); // TCP_BASE_MSS + + __tcp_nip_push_pending_frames(sk, cur_mss, tp->nonagle); + } +} + +static void tcp_nip_new_space(struct sock *sk) +{ + sk->sk_write_space(sk); +} + +static void tcp_nip_check_space(struct sock *sk) +{ + /* Invoke memory barrier (annotated prior to checkpatch requirements) */ + smp_mb(); + if (sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + tcp_nip_new_space(sk); +} + +static inline void tcp_nip_data_snd_check(struct sock *sk) +{ + tcp_nip_push_pending_frames(sk); + tcp_nip_check_space(sk); +} + +#define TCP_NIP_DELACK_MIN (HZ / 50) +void tcp_nip_send_delayed_ack(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + int ato = TCP_NIP_DELACK_MIN; + unsigned long timeout; + + icsk->icsk_ack.ato = TCP_DELACK_MIN; + + /* Stay within the limit we were given */ + timeout = jiffies + ato; + + /* Use new timeout only if there wasn't a older one earlier. */ + if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { + if (time_before_eq(icsk->icsk_ack.timeout, + jiffies + (ato >> TCP_NIP_4BYTE_PAYLOAD))) { + nip_dbg("ok"); + tcp_nip_send_ack(sk); + return; + } + + if (!time_before(timeout, icsk->icsk_ack.timeout)) + timeout = icsk->icsk_ack.timeout; + } + + icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = timeout; + sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); +} + +static void __tcp_nip_ack_snd_check(struct sock *sk, int ofo_possible) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + + inet_csk(sk)->icsk_ack.rcv_mss = tcp_nip_current_mss(sk); // TCP_BASE_MSS + + /* More than n full frame received... */ + if (((tp->rcv_nxt - tp->rcv_wup) > get_ack_num() * inet_csk(sk)->icsk_ack.rcv_mss && + __nip_tcp_select_window(sk) >= tp->rcv_wnd) || + /* We have out of order data. */ + (ofo_possible && (!RB_EMPTY_ROOT(&tp->out_of_order_queue)))) { + if (ofo_possible && (!RB_EMPTY_ROOT(&tp->out_of_order_queue))) { + if (tp->rcv_nxt == ntp->last_rcv_nxt) { + ntp->dup_ack_cnt++; + } else { + ntp->dup_ack_cnt = 0; + ntp->last_rcv_nxt = tp->rcv_nxt; + } + if (ntp->dup_ack_cnt < get_dup_ack_snd_max()) + tcp_nip_send_ack(sk); + else if (ntp->dup_ack_cnt % get_dup_ack_snd_max() == 0) + tcp_nip_send_ack(sk); + } else { + tcp_nip_send_ack(sk); + } + } else { + /* Else, send delayed ack. */ + tcp_nip_send_delayed_ack(sk); + } +} + +static inline void tcp_nip_ack_snd_check(struct sock *sk) +{ + if (!inet_csk_ack_scheduled(sk)) { + /* We sent a data segment already. */ + nip_dbg("We sent a data segment already"); + return; + } + __tcp_nip_ack_snd_check(sk, 1); +} + +static void tcp_nip_snd_una_update(struct tcp_sock *tp, u32 ack) +{ + u32 delta = ack - tp->snd_una; + + sock_owned_by_me((struct sock *)tp); + tp->bytes_acked += delta; + tp->snd_una = ack; +} + +void tcp_nip_rearm_rto(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + + if (!tp->packets_out) { + int icsk_backoff = icsk->icsk_backoff; + + inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); + icsk->icsk_backoff = 0; /* V4 no modified this line */ + nip_dbg("stop tcp retrans timer, icsk_backoff %u to %u", + icsk_backoff, icsk->icsk_backoff); + } else { + u32 rto = inet_csk(sk)->icsk_rto; + + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); + } +} + +static int tcp_nip_clean_rtx_queue(struct sock *sk, ktime_t *skb_snd_tstamp) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + int flag = 0; + struct inet_connection_sock *icsk = inet_csk(sk); + + while ((skb = tcp_write_queue_head(sk)) && skb != tcp_nip_send_head(sk)) { + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + u32 acked_pcount = 0; + + if (after(scb->end_seq, tp->snd_una)) { + if (tcp_skb_pcount(skb) == 1 || !after(tp->snd_una, scb->seq)) + break; + nip_dbg("ack error"); + } else { + prefetchw(skb->next); + acked_pcount = tcp_skb_pcount(skb); + } + + if (likely(!(scb->tcp_flags & TCPHDR_SYN))) { + flag |= FLAG_DATA_ACKED; + } else { + flag |= FLAG_SYN_ACKED; + tp->retrans_stamp = 0; + } + + tp->packets_out -= acked_pcount; + + if (*skb_snd_tstamp == 0) + *skb_snd_tstamp = skb->tstamp; + + tcp_unlink_write_queue(skb, sk); + sk_wmem_free_skb(sk, skb); + } + /* V4 no modified this line */ + icsk->icsk_rto = get_nip_rto() == 0 ? TCP_TIMEOUT_INIT : (HZ / get_nip_rto()); + if (flag & FLAG_ACKED) + tcp_nip_rearm_rto(sk); + return 0; +} + +/* Function + * Allocate a connection request block that holds connection request information. + * At the same time, initialize the set of operations used to send ACK/RST segments + * during connection, so that these interfaces can be easily called during establishment. + * Set the socket state to TCP_NEW_SYN_RECV + * Parameter + * ops: Request the functional interface of the control block + * sk_listener: Transmission control block + * attach_listener: Whether to set cookies + */ +struct request_sock *ninet_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener, + bool attach_listener) +{ + struct request_sock *req = reqsk_alloc(ops, sk_listener, + attach_listener); + + if (req) { + struct inet_request_sock *ireq = inet_rsk(req); + + ireq->ireq_opt = NULL; + atomic64_set(&ireq->ir_cookie, 0); + ireq->ireq_state = TCP_NEW_SYN_RECV; + write_pnet(&ireq->ireq_net, sock_net(sk_listener)); + ireq->ireq_family = sk_listener->sk_family; + } + + return req; +} + +void tcp_nip_parse_mss(struct tcp_options_received *opt_rx, + const struct tcphdr *th, + const unsigned char *ptr, + int opsize, + int estab) +{ + if (opsize == TCPOLEN_MSS && th->syn && !estab) { + u16 in_mss = get_unaligned_be16(ptr); + + nip_dbg("in_mss %d", in_mss); + + if (in_mss) { + if (opt_rx->user_mss && + opt_rx->user_mss < in_mss) + in_mss = opt_rx->user_mss; + opt_rx->mss_clamp = in_mss; + } + } +} + +/* Function + * Look for tcp options. Normally only called on SYN and SYNACK packets. + * Parsing of TCP options in SKB + * Parameter + * skb: Transfer control block buffer + * opt_rx: Saves the structure for TCP options + * estab: WANTCOOKIE + * foc: Len field + */ +void tcp_nip_parse_options(const struct sk_buff *skb, + struct tcp_options_received *opt_rx, int estab, + struct tcp_fastopen_cookie *foc) +{ + const unsigned char *ptr; + const struct tcphdr *th = tcp_hdr(skb); + /* The length of the TCP option = Length of TCP header - The length of the TCP structure */ + int length = (th->doff * 4) - sizeof(struct tcphdr); + + /* A pointer to the option position */ + ptr = (const unsigned char *)(th + 1); + opt_rx->saw_tstamp = 0; + + while (length > 0) { + int opcode = *ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + return; + case TCPOPT_NOP: + length--; + continue; + default: + opsize = *ptr++; + if (opsize < 2) /* "2 - silly options" */ + return; + if (opsize > length) + return; /* don't parse partial options */ + switch (opcode) { + case TCPOPT_MSS: + tcp_nip_parse_mss(opt_rx, th, ptr, opsize, estab); + break; + default: + break; + } + ptr += opsize - TCP_NUM_2; + length -= opsize; + } + } +} + +static void tcp_nip_common_init(struct request_sock *req) +{ + struct tcp_nip_request_sock *niptreq = tcp_nip_rsk(req); + struct tcp_nip_common *ntp = &niptreq->common; + + memset(ntp, 0, sizeof(*ntp)); + ntp->nip_ssthresh = get_nip_ssthresh_default(); +} + +/* Function + * Initializes the connection request block information based + * on the options and sequence number in the received SYN segment + * Parameter + * req: Request connection control block + * rx_opt: Saves the structure for TCP options + * skb: Transfer control block buffer. + * sk: transmission control block. + */ +static void tcp_nip_openreq_init(struct request_sock *req, + const struct tcp_options_received *rx_opt, + struct sk_buff *skb, const struct sock *sk) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + tcp_nip_common_init(req); + + req->rsk_rcv_wnd = 0; + tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; + tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + tcp_rsk(req)->snt_synack = tcp_clock_us(); + tcp_rsk(req)->last_oow_ack_time = 0; + req->mss = rx_opt->mss_clamp; + req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; + ireq->tstamp_ok = rx_opt->tstamp_ok; + ireq->snd_wscale = rx_opt->snd_wscale; + + if (get_wscale_enable()) { + ireq->wscale_ok = 1; + ireq->snd_wscale = get_wscale(); // rx_opt->snd_wscale; + ireq->rcv_wscale = get_wscale(); + } + + ireq->acked = 0; + ireq->ecn_ok = 0; + ireq->ir_rmt_port = tcp_hdr(skb)->source; + ireq->ir_num = ntohs(tcp_hdr(skb)->dest); + ireq->ir_mark = sk->sk_mark; +} + +/* Function + * Based on listening SOCK and REQ, create a transport control block + * for the new connection and initialize it. + * Parameter + * sk: the listening transmission control block. + * req: Request connection control block + * skb: Transfer control block buffer. + */ +struct sock *tcp_nip_create_openreq_child(const struct sock *sk, + struct request_sock *req, + struct sk_buff *skb) +{ + /* Clone a transport control block and lock the new transport control block */ + struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); + + if (newsk) { + const struct inet_request_sock *ireq = inet_rsk(req); + struct tcp_request_sock *treq = tcp_rsk(req); + struct inet_connection_sock *newicsk = inet_csk(newsk); + struct tcp_sock *newtp = tcp_sk(newsk); + + /* Now setup tcp_sock */ + newtp->pred_flags = 0; + + /* The variables related to the receiving and sending serial numbers + * are initialized. The second handshake sends an ACK in the SYN+ACK segment + */ + newtp->rcv_wup = treq->rcv_isn + 1; + newtp->copied_seq = treq->rcv_isn + 1; + newtp->rcv_nxt = treq->rcv_isn + 1; + newtp->segs_in = 1; + /* The second handshake sends seq+1 in the SYN+ACK segment */ + newtp->snd_sml = treq->snt_isn + 1; + newtp->snd_una = treq->snt_isn + 1; + newtp->snd_nxt = treq->snt_isn + 1; + newtp->snd_up = treq->snt_isn + 1; + + INIT_LIST_HEAD(&newtp->tsq_node); + + /* The ACK segment number of the send window that + * received the first handshake update + */ + tcp_init_wl(newtp, treq->rcv_isn); + + /* Initialization of delay-related variables */ + minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U); + newicsk->icsk_rto = get_nip_rto() == 0 ? TCP_TIMEOUT_INIT : (HZ / get_nip_rto()); + newicsk->icsk_ack.lrcvtime = tcp_jiffies32; + + /* The congestion control-related variables are initialized */ + newtp->packets_out = 0; + + newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + + newtp->lsndtime = tcp_jiffies32; + + newtp->total_retrans = req->num_retrans; + + newtp->snd_cwnd = TCP_INIT_CWND; + + /* There's a bubble in the pipe until at least the first ACK. */ + newtp->app_limited = ~0U; + + /* Initialize several timers */ + tcp_nip_init_xmit_timers(newsk); + newtp->write_seq = treq->snt_isn + 1; + newtp->pushed_seq = treq->snt_isn + 1; + + /* TCP option correlation */ + newtp->rx_opt.saw_tstamp = 0; + + newtp->rx_opt.dsack = 0; + newtp->rx_opt.num_sacks = 0; + + newtp->urg_data = 0; + + newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; + newtp->window_clamp = req->rsk_window_clamp; + newtp->rcv_ssthresh = req->rsk_rcv_wnd; + newtp->rcv_wnd = req->rsk_rcv_wnd; + newtp->rx_opt.wscale_ok = ireq->wscale_ok; + if (newtp->rx_opt.wscale_ok) { + newtp->rx_opt.snd_wscale = ireq->snd_wscale; + newtp->rx_opt.rcv_wscale = ireq->rcv_wscale; + } else { + newtp->rx_opt.snd_wscale = 0; + newtp->rx_opt.rcv_wscale = 0; + newtp->window_clamp = min(newtp->window_clamp, 65535U); + } + newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) << + newtp->rx_opt.snd_wscale); + newtp->max_window = newtp->snd_wnd; + + if (newtp->rx_opt.tstamp_ok) { + newtp->rx_opt.ts_recent = req->ts_recent; + newtp->rx_opt.ts_recent_stamp = get_seconds(); + newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; + } else { + newtp->rx_opt.ts_recent_stamp = 0; + newtp->tcp_header_len = sizeof(struct tcphdr); + } + newtp->tsoffset = 0; + + /* Determines the size of the last passed segment */ + if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) + newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; + newtp->rx_opt.mss_clamp = req->mss; + newtp->fastopen_req = NULL; + newtp->fastopen_rsk = NULL; + newtp->syn_data_acked = 0; + newtp->rack.mstamp = 0; + newtp->rack.advanced = 0; + + __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); + } + return newsk; +} + +void tcp_nip_openreq_init_rwin(struct request_sock *req, + const struct sock *sk_listener, + const struct dst_entry *dst) +{ + struct inet_request_sock *ireq = inet_rsk(req); + const struct tcp_sock *tp = tcp_sk(sk_listener); + int full_space = tcp_full_space(sk_listener); + int mss; + u32 window_clamp; + __u8 rcv_wscale; + + mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); + + window_clamp = READ_ONCE(tp->window_clamp); + /* Set this up on the first call only */ + req->rsk_window_clamp = window_clamp ? : dst_metric(dst, RTAX_WINDOW); + + /* limit the window selection if the user enforce a smaller rx buffer */ + if (sk_listener->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) + req->rsk_window_clamp = full_space; + + /* tcp_full_space because it is guaranteed to be the first packet */ + tcp_select_initial_window(sk_listener, full_space, + mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), + &req->rsk_rcv_wnd, + &req->rsk_window_clamp, + 0, + &rcv_wscale, + 0); + ireq->rcv_wscale = get_wscale_enable() ? get_wscale() : rcv_wscale; +} + +/* Function + * A function used by the server to process client connection requests. + * Parameter + * rsk_ops: Functional interface to request control blocks. + * af_ops: The functional interface of the TCP request block. + * sk: transmission control block. + * skb: Transfer control block buffer. + */ +int _tcp_nip_conn_request(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct sk_buff *skb) +{ + struct tcp_fastopen_cookie foc = { .len = -1 }; + + __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn; + /* All received TCP options are resolved into this structure */ + struct tcp_options_received tmp_opt; + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = NULL; + struct request_sock *req; + + /* If the half-connection queue length has reached the upper limit, + * the current request is discarded + */ + if (inet_csk_reqsk_queue_is_full(sk) && !isn) { + nip_dbg("inet_csk_reqsk_queue_is_full"); + goto drop; + } + + /* If the queue holds the socket that has completed the connection (full connection queue) + * The length has reached its upper limit + * The current request is discarded + */ + if (sk_acceptq_is_full(sk)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + nip_dbg("sk_acceptq_is_full, sk_ack_backlog=%u, sk_max_ack_backlog=%u", + READ_ONCE(sk->sk_ack_backlog), + READ_ONCE(sk->sk_max_ack_backlog)); + goto drop; + } + + /* Allocate a connection request block that holds connection request information + * While initializing the connection process + * The set of operations that send ACK/RST segments + * These interfaces can be easily invoked during the setup process. + */ + req = ninet_reqsk_alloc(rsk_ops, sk, true); + if (!req) + goto drop; + + tcp_rsk(req)->af_specific = af_ops; + + tcp_clear_options(&tmp_opt); + /* Maximum MSS negotiated during connection establishment */ + tmp_opt.mss_clamp = af_ops->mss_clamp; + /* The best way to do this is to prink the value of user_mss and see if it is 0 */ + tmp_opt.user_mss = tp->rx_opt.user_mss; + /* Parsing of TCP options in SKB */ + tcp_nip_parse_options(skb, &tmp_opt, 0, NULL); + + /* Tstamp_ok indicates the TIMESTAMP seen on the received SYN packet */ + tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; + /* Initializes the connection request block information based on the options + * and sequence number in the received SYN segment + */ + tcp_nip_openreq_init(req, &tmp_opt, skb, sk); + + inet_rsk(req)->ir_iif = sk->sk_bound_dev_if; + + af_ops->init_req(req, sk, skb); + + if (!isn) + isn = af_ops->init_seq(skb); + + if (!dst) { + dst = af_ops->route_req(sk, NULL, req); + if (!dst) + goto drop_and_free; + } + + tcp_rsk(req)->snt_isn = isn; + tcp_rsk(req)->txhash = net_tx_rndhash(); + /* Initialize the receive window */ + tcp_nip_openreq_init_rwin(req, sk, dst); + /* Record the syn */ + tcp_rsk(req)->tfo_listener = false; + /* Add a timer to add reQ to the ehash table */ + ninet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + + af_ops->send_synack(sk, dst, NULL, req, &foc, TCP_SYNACK_NORMAL, NULL); + + reqsk_put(req); + return 0; + +drop_and_free: + reqsk_free(req); +drop: + tcp_listendrop(sk); + return 0; +} + +static inline bool tcp_nip_paws_check(const struct tcp_options_received *rx_opt, + int paws_win) +{ + if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) + return true; + if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)) + return true; + + if (!rx_opt->ts_recent) + return true; + return false; +} + +static inline bool tcp_nip_may_update_window(const struct tcp_sock *tp, + const u32 ack, const u32 ack_seq, + const u32 nwin) +{ + return after(ack, tp->snd_una) || + after(ack_seq, tp->snd_wl1) || + (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); +} + +static void tcp_nip_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack, + u32 ack_seq) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 nwin = ntohs(tcp_hdr(skb)->window); + + if (likely(!tcp_hdr(skb)->syn)) + nwin <<= tp->rx_opt.snd_wscale; + + if (tcp_nip_may_update_window(tp, ack, ack_seq, nwin)) { + tcp_update_wl(tp, ack_seq); + + if (tp->snd_wnd != nwin) { + nip_dbg("snd_wnd change [%u to %u]", tp->snd_wnd, nwin); + tp->snd_wnd = nwin; + tp->pred_flags = 0; + } + } +} + +/* Check whether the ACK returned by the packet is detected + * and whether the peer window is opened + */ +static void tcp_nip_ack_probe(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + + if (!after(TCP_SKB_CB(tcp_nip_send_head(sk))->end_seq, tcp_wnd_end(tp))) { + icsk->icsk_backoff = 0; + icsk->icsk_probes_tstamp = 0; + nip_dbg("stop probe0 timer"); + inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); + /* Socket must be waked up by subsequent tcp_data_snd_check(). + * This function is not for random using! + */ + } else { + unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); + unsigned long base_when = tcp_probe0_base(sk); + u8 icsk_backoff = inet_csk(sk)->icsk_backoff; + + nip_dbg("start probe0 timer, when=%lu, RTO MAX=%u, base_when=%lu, backoff=%u", + when, TCP_RTO_MAX, base_when, icsk_backoff); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX); + } +} + +#define DUP_ACK 0 +#define NOR_ACK 1 +#define ACK_DEF 2 +static void tcp_nip_ack_retrans(struct sock *sk, u32 ack, int ack_type, u32 retrans_num) +{ + int skb_index = 0; + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + struct sk_buff *skb, *tmp; + const char *ack_str[ACK_DEF] = {"dup", "nor"}; + int index = ack_type == DUP_ACK ? DUP_ACK : NOR_ACK; + + skb_queue_walk_safe(&sk->sk_write_queue, skb, tmp) { + if (skb == tcp_nip_send_head(sk)) { + ssthresh_dbg("%s ack retrans(%u) end, ack=%u, seq=%u~%u, pkt_out=%u", + ack_str[index], ntp->ack_retrans_num, ack, + tp->selective_acks[0].start_seq, + tp->selective_acks[0].end_seq, tp->packets_out); + tp->selective_acks[0].start_seq = 0; + tp->selective_acks[0].end_seq = 0; + ntp->ack_retrans_seq = 0; + ntp->ack_retrans_num = 0; + break; + } + + if (TCP_SKB_CB(skb)->seq > tp->selective_acks[0].end_seq) { + ssthresh_dbg("%s ack retrans(%u) finish, ack=%u, seq=%u~%u, pkt_out=%u", + ack_str[index], ntp->ack_retrans_num, ack, + tp->selective_acks[0].start_seq, + tp->selective_acks[0].end_seq, tp->packets_out); + + tp->selective_acks[0].start_seq = 0; + tp->selective_acks[0].end_seq = 0; + ntp->ack_retrans_seq = 0; + ntp->ack_retrans_num = 0; + break; + } + + if (TCP_SKB_CB(skb)->seq != ntp->ack_retrans_seq) + continue; + + if (skb_index < retrans_num) { + tcp_nip_retransmit_skb(sk, skb, 1); + skb_index++; + ntp->ack_retrans_num++; + ntp->ack_retrans_seq = TCP_SKB_CB(skb)->end_seq; + } else { + retrans_dbg("%s ack retrans(%u) no end, ack=%u, seq=%u~%u, pkt_out=%u", + ack_str[index], ntp->ack_retrans_num, ack, + tp->selective_acks[0].start_seq, + tp->selective_acks[0].end_seq, tp->packets_out); + break; + } + } +} + +#define DUP_ACK_RETRANS_START_NUM 3 +#define DIVIDEND_UP 3 +#define DIVIDEND_DOWN 5 +static void tcp_nip_dup_ack_retrans(struct sock *sk, const struct sk_buff *skb, + u32 ack, u32 retrans_num) +{ + if (tcp_write_queue_head(sk)) { + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + + tp->sacked_out++; + if (tp->sacked_out == DUP_ACK_RETRANS_START_NUM) { + /* Newip Urg_ptr is disabled. Urg_ptr is used to + * carry the number of discarded packets + */ + int mss = tcp_nip_current_mss(sk); + struct tcphdr *th = (struct tcphdr *)skb->data; + u16 discard_num = htons(th->urg_ptr); + u32 last_nip_ssthresh = ntp->nip_ssthresh; + + if (tp->selective_acks[0].end_seq) + ssthresh_dbg("last retans(%u) not end, seq=%u~%u, pkt_out=%u", + ntp->ack_retrans_num, + tp->selective_acks[0].start_seq, + tp->selective_acks[0].end_seq, + tp->packets_out); + + tp->selective_acks[0].start_seq = ack; + tp->selective_acks[0].end_seq = ack + discard_num * mss; + ntp->ack_retrans_seq = ack; + ntp->ack_retrans_num = 0; + + ntp->nip_ssthresh = get_ssthresh_low(); + ssthresh_dbg("new dup ack, win %u to %u, discard_num=%u, seq=%u~%u", + last_nip_ssthresh, ntp->nip_ssthresh, discard_num, + tp->selective_acks[0].start_seq, + tp->selective_acks[0].end_seq); + + tcp_nip_ack_retrans(sk, ack, DUP_ACK, retrans_num); + } + } +} + +static void tcp_nip_nor_ack_retrans(struct sock *sk, u32 ack, u32 retrans_num) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + + if (tp->selective_acks[0].end_seq != 0) { + if (ack >= tp->selective_acks[0].end_seq) { + ssthresh_dbg("nor ack retrans(%u) resume, seq=%u~%u, pkt_out=%u, ack=%u", + ntp->ack_retrans_num, + tp->selective_acks[0].start_seq, + tp->selective_acks[0].end_seq, tp->packets_out, ack); + tp->selective_acks[0].start_seq = 0; + tp->selective_acks[0].end_seq = 0; + ntp->ack_retrans_seq = 0; + ntp->ack_retrans_num = 0; + + tp->sacked_out = 0; + return; + } + + tcp_nip_ack_retrans(sk, ack, NOR_ACK, retrans_num); + } + + tp->sacked_out = 0; +} + +static void tcp_nip_ack_calc_ssthresh(struct sock *sk, u32 ack, int icsk_rto_last, + ktime_t skb_snd_tstamp) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + struct inet_connection_sock *icsk = inet_csk(sk); + int ack_reset = ack / get_nip_ssthresh_reset(); + u32 nip_ssthresh; + + if (ntp->nip_ssthresh_reset != ack_reset) { + ssthresh_dbg("ack reset win %u to %u, ack=%u", + ntp->nip_ssthresh, get_ssthresh_low(), ack); + ntp->nip_ssthresh_reset = ack_reset; + ntp->nip_ssthresh = get_ssthresh_low(); + } else { + if (skb_snd_tstamp) { + u32 rtt_tstamp = tp->rcv_tstamp - skb_snd_tstamp; + + if (rtt_tstamp >= get_rtt_tstamp_rto_up()) { + ssthresh_dbg("rtt %u >= %u, win %u to %u, rto %u to %u, ack=%u", + rtt_tstamp, get_rtt_tstamp_rto_up(), + ntp->nip_ssthresh, get_ssthresh_low_min(), + icsk_rto_last, icsk->icsk_rto, ack); + + ntp->nip_ssthresh = get_ssthresh_low_min(); + } else if (rtt_tstamp >= get_rtt_tstamp_high()) { + ssthresh_dbg("rtt %u >= %u, win %u to %u, ack=%u", + rtt_tstamp, get_rtt_tstamp_high(), + ntp->nip_ssthresh, get_ssthresh_low(), ack); + + ntp->nip_ssthresh = get_ssthresh_low(); + } else if (rtt_tstamp >= get_rtt_tstamp_mid_high()) { + ssthresh_dbg("rtt %u >= %u, win %u to %u, ack=%u", + rtt_tstamp, get_rtt_tstamp_mid_high(), + ntp->nip_ssthresh, get_ssthresh_mid_low(), ack); + + ntp->nip_ssthresh = get_ssthresh_mid_low(); + } else if (rtt_tstamp >= get_rtt_tstamp_mid_low()) { + u32 rtt_tstamp_scale = get_rtt_tstamp_mid_high() - rtt_tstamp; + int half_mid_high = get_ssthresh_mid_high() / 2; + + nip_ssthresh = half_mid_high + rtt_tstamp_scale * half_mid_high / + (get_rtt_tstamp_mid_high() - + get_rtt_tstamp_mid_low()); + + ntp->nip_ssthresh = ntp->nip_ssthresh > get_ssthresh_mid_high() ? + half_mid_high : ntp->nip_ssthresh; + nip_ssthresh = (ntp->nip_ssthresh * get_ssthresh_high_step() + + nip_ssthresh) / (get_ssthresh_high_step() + 1); + + ssthresh_dbg("rtt %u >= %u, win %u to %u, ack=%u", + rtt_tstamp, get_rtt_tstamp_mid_low(), + ntp->nip_ssthresh, nip_ssthresh, ack); + + ntp->nip_ssthresh = nip_ssthresh; + } else if (rtt_tstamp != 0) { + nip_ssthresh = (ntp->nip_ssthresh * get_ssthresh_high_step() + + get_ssthresh_high()) / + (get_ssthresh_high_step() + 1); + + ssthresh_dbg("rtt %u < %u, win %u to %u, ack=%u", + rtt_tstamp, get_rtt_tstamp_mid_low(), + ntp->nip_ssthresh, nip_ssthresh, ack); + + ntp->nip_ssthresh = nip_ssthresh; + } + } + } +} + +static int tcp_nip_ack(struct sock *sk, const struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + struct inet_connection_sock *icsk = inet_csk(sk); + u32 prior_snd_una = tp->snd_una; + u32 ack_seq = TCP_SKB_CB(skb)->seq; + u32 ack = TCP_SKB_CB(skb)->ack_seq; + int prior_packets = tp->packets_out; + ktime_t skb_snd_tstamp = 0; + + if (before(ack, prior_snd_una)) + return 0; + if (after(ack, tp->snd_nxt)) + return -1; + + tcp_nip_ack_update_window(sk, skb, ack, ack_seq); + icsk->icsk_probes_out = 0; /* probe0 cnt */ + ntp->nip_keepalive_out = 0; /* keepalive cnt */ + tp->rcv_tstamp = tcp_jiffies32; + + /* maybe zero window probe */ + if (!prior_packets) { + nip_dbg("no unack pkt, seq=[%u-%u], rcv_nxt=%u, ack=%u", + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt, ack); + if (tcp_nip_send_head(sk)) + tcp_nip_ack_probe(sk); + return 1; + } + + if (after(ack, prior_snd_una)) { + int icsk_rto_last; + + icsk->icsk_retransmits = 0; + tp->retrans_stamp = tcp_time_stamp(tp); + tp->rcv_tstamp = tcp_jiffies32; + tcp_nip_snd_una_update(tp, ack); + + icsk_rto_last = icsk->icsk_rto; + tcp_nip_clean_rtx_queue(sk, &skb_snd_tstamp); + + tcp_nip_ack_calc_ssthresh(sk, ack, icsk_rto_last, skb_snd_tstamp); + tcp_nip_nor_ack_retrans(sk, ack, get_ack_retrans_num()); + return 1; + } + + // dup ack: ack == tp->snd_una + tcp_nip_dup_ack_retrans(sk, skb, ack, get_dup_ack_retrans_num()); + + return 1; +} + +static inline bool tcp_nip_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) +{ + /* False is returned if end_seq has been received, + * or if SEq is not behind the receive window + */ + return !before(end_seq, tp->rcv_wup) && + !after(seq, tp->rcv_nxt + tcp_receive_window(tp)); +} + +/* When we get a reset we do this. */ +void tcp_nip_reset(struct sock *sk) +{ + nip_dbg("handle rst"); + + /* We want the right error as BSD sees it (and indeed as we do). */ + switch (sk->sk_state) { + case TCP_SYN_SENT: + sk->sk_err = ECONNREFUSED; + break; + case TCP_CLOSE_WAIT: + sk->sk_err = EPIPE; + break; + case TCP_CLOSE: + return; + default: + sk->sk_err = ECONNRESET; + } + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); + + tcp_nip_write_queue_purge(sk); + tcp_nip_done(sk); + + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_error_report(sk); +} + +/* Reack some incorrect packets, because if you do not ACK these packets, + * they may be retransmitted frequently + */ +static void tcp_nip_send_dupack(struct sock *sk, const struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) + NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); + + nip_dbg("send dup ack"); + tcp_nip_send_ack(sk); +} + +static bool tcp_nip_reset_check(const struct sock *sk, const struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + return unlikely(TCP_SKB_CB(skb)->seq == (tp->rcv_nxt - 1) && + (1 << sk->sk_state) & (TCPF_CLOSE_WAIT | TCPF_LAST_ACK | + TCPF_CLOSING)); +} + +/* This function is used to process the SYN received in RST packets + * and illegal SEQ packets in ESTABLISHED state. Currently only seQ checks are included + */ +static bool tcp_nip_validate_incoming(struct sock *sk, struct sk_buff *skb, + const struct tcphdr *th, int syn_inerr) +{ + struct tcp_sock *tp = tcp_sk(sk); + bool rst_seq_match = false; + + /* Step 1: check sequence number */ + /* 01.Check for unexpected packets. For some probe packets, + * unexpected packets do not need to be processed, but reply for an ACK. + * 02.Enter this branch when the receive window is 0 + */ + if (!tcp_nip_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { + nip_dbg("receive unexpected pkt, drop it. seq=[%u-%u], rec_win=[%u-%u]", + TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, + tp->rcv_wup, tp->rcv_nxt + tcp_receive_window(tp)); + if (!th->rst) + tcp_nip_send_dupack(sk, skb); + else if (tcp_nip_reset_check(sk, skb)) + tcp_nip_reset(sk); + goto discard; + } + + /* Step 2: check RST bit */ + if (th->rst) { + if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt || tcp_nip_reset_check(sk, skb)) + rst_seq_match = true; + if (rst_seq_match) + tcp_nip_reset(sk); + goto discard; + } + + return true; + +discard: + tcp_nip_drop(sk, skb); + return false; +} + +void tcp_nip_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct tcphdr *th, unsigned int len) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tcp_mstamp_refresh(tp); + if (unlikely(!sk->sk_rx_dst)) + inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); + + if (!tcp_nip_validate_incoming(sk, skb, th, 1)) + return; + + if (tcp_nip_ack(sk, skb) < 0) + goto discard; + + tcp_nip_data_queue(sk, skb); + tcp_nip_data_snd_check(sk); + tcp_nip_ack_snd_check(sk); + + return; + +discard: + tcp_nip_drop(sk, skb); +} + +static u32 tcp_default_init_rwnd(u32 mss) +{ + u32 init_rwnd = TCP_INIT_CWND * 2; + + if (mss > TCP_MAX_MSS) + init_rwnd = max((TCP_MAX_MSS * init_rwnd) / mss, 2U); + return init_rwnd; +} + +static void tcp_nip_fixup_rcvbuf(struct sock *sk) +{ + u32 mss = TCP_BASE_MSS; + int rcvmem; + + rcvmem = TCP_NUM_2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * + tcp_default_init_rwnd(mss); + + if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) + rcvmem <<= TCP_NIP_4BYTE_PAYLOAD; + + if (sk->sk_rcvbuf < rcvmem) + sk->sk_rcvbuf = min(rcvmem, + sock_net(sk)->ipv4.sysctl_tcp_rmem[TCP_ARRAY_INDEX_2]); +} + +#define TCP_NIP_SND_BUF_SIZE 30720 +void tcp_nip_init_buffer_space(struct sock *sk) +{ + int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; + struct tcp_sock *tp = tcp_sk(sk); + int maxwin; + + if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) + tcp_nip_fixup_rcvbuf(sk); + + tp->rcvq_space.space = tp->rcv_wnd; + tcp_mstamp_refresh(tp); + tp->rcvq_space.time = jiffies; + tp->rcvq_space.seq = tp->copied_seq; + maxwin = tcp_full_space(sk); + if (tp->window_clamp >= maxwin) { + tp->window_clamp = maxwin; + if (tcp_app_win && maxwin > TCP_NUM_4 * tp->advmss) + tp->window_clamp = max(maxwin - + (maxwin >> tcp_app_win), + TCP_NUM_4 * tp->advmss); + } + /* Force reservation of one segment. */ + if (tcp_app_win && + tp->window_clamp > TCP_NUM_2 * tp->advmss && + tp->window_clamp + tp->advmss > maxwin) + tp->window_clamp = max(TCP_NUM_2 * tp->advmss, maxwin - tp->advmss); + tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); + tp->snd_cwnd_stamp = tcp_jiffies32; +} + +void tcp_nip_finish_connect(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + + tcp_set_state(sk, TCP_ESTABLISHED); + icsk->icsk_ack.lrcvtime = tcp_jiffies32; + if (skb) { + icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); + security_inet_conn_established(sk, skb); + } + + tp->lsndtime = tcp_jiffies32; + + tcp_nip_init_buffer_space(sk); +} + +/* Function: + * A function that handles the second handshake + * Parameter: + * sk: transmission control block + * skb: Transfer control block buffer + * Th: TCP header field + */ +static int tcp_nip_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, + const struct tcphdr *th) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + int saved_clamp = tp->rx_opt.mss_clamp; + + /* TCP Option Parsing */ + tcp_nip_parse_options(skb, &tp->rx_opt, 0, NULL); + /* Rcv_tsecr saves the timestamp of the last TCP segment received from the peer end */ + if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) + tp->rx_opt.rcv_tsecr -= tp->tsoffset; + + if (th->ack) { + /* Whether the ACK value is between the initial send sequence number + * and the next sequence number + */ + if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) || + after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) + goto reset_and_undo; + /* Must be within the corresponding time */ + if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && + !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, tcp_time_stamp(tp))) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); + goto reset_and_undo; + } + + if (th->rst) { + tcp_nip_reset(sk); + goto discard; + } + + if (!th->syn) + goto discard_and_undo; + + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); + + tcp_nip_ack(sk, skb); + tp->out_of_order_queue = RB_ROOT; + /* The next data number expected to be accepted is +1 */ + tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + /* Accept the left margin of the window +1 */ + tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; + tp->snd_wnd = ntohs(th->window); + + if (get_wscale_enable()) { + tp->rx_opt.wscale_ok = 1; + tp->rx_opt.snd_wscale = get_wscale(); + tp->rx_opt.rcv_wscale = get_wscale(); + } + + if (!tp->rx_opt.wscale_ok) { + tp->rx_opt.snd_wscale = 0; + tp->rx_opt.rcv_wscale = 0; + tp->window_clamp = min(tp->window_clamp, 65535U); + } + + if (tp->rx_opt.saw_tstamp) { + tp->rx_opt.tstamp_ok = 1; + tp->tcp_header_len = + sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; + tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; + tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; + tp->rx_opt.ts_recent_stamp = get_seconds(); + } else { + tp->tcp_header_len = sizeof(struct tcphdr); + } + + tp->copied_seq = tp->rcv_nxt; + /* Invoke memory barrier (annotated prior to checkpatch requirements) */ + smp_mb(); + + tcp_nip_sync_mss(sk, icsk->icsk_pmtu_cookie); + tcp_nip_initialize_rcv_mss(sk); + + tcp_nip_finish_connect(sk, skb); + /* Wake up the process */ + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_state_change(sk); + rcu_read_lock(); + sock_wake_async(rcu_dereference(sk->sk_wq), SOCK_WAKE_IO, POLL_OUT); + rcu_read_unlock(); + } + + tcp_nip_send_ack(sk); + return -1; +discard: + tcp_nip_drop(sk, skb); + return 0; + } + +discard_and_undo: + tcp_clear_options(&tp->rx_opt); + tp->rx_opt.mss_clamp = saved_clamp; + goto discard; + +reset_and_undo: + tcp_clear_options(&tp->rx_opt); + tp->rx_opt.mss_clamp = saved_clamp; + return 1; +} + +/* Function: + * TCP processing function that is differentiated according to + * different states after receiving data packets + * Parameter: + * sk: transmission control block + * skb: Transfer control block buffer + * Note: Currently this function only has code for handling the first handshake packet + * Implementation of the third handshake ACK to handle the code + */ +int tcp_nip_rcv_state_process(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcphdr *th = tcp_hdr(skb); + int queued = 0; + bool acceptable; + + /* Step 1: Connect handshake packet processing */ + switch (sk->sk_state) { + case TCP_CLOSE: + goto discard; + + case TCP_LISTEN: + if (th->ack) + return 1; + + if (th->rst) + goto discard; + + if (th->syn) { + if (th->fin) + goto discard; + + rcu_read_lock(); + local_bh_disable(); + acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; + local_bh_enable(); + rcu_read_unlock(); + + if (!acceptable) + return 1; + consume_skb(skb); + return 0; + } + goto discard; + case TCP_SYN_SENT: + nip_dbg("TCP_SYN_SENT"); + tp->rx_opt.saw_tstamp = 0; + tcp_mstamp_refresh(tp); + queued = tcp_nip_rcv_synsent_state_process(sk, skb, th); + if (queued >= 0) + return queued; + __kfree_skb(skb); + return 0; + } + tcp_mstamp_refresh(tp); + tp->rx_opt.saw_tstamp = 0; + + if (!th->ack && !th->rst && !th->syn) + goto discard; + + if (!tcp_nip_validate_incoming(sk, skb, th, 0)) + return 0; + + acceptable = tcp_nip_ack(sk, skb); + /* If the third handshake ACK is invalid, 1 is returned + * and the SKB is discarded in tcp_nip_rcv + */ + if (!acceptable) { + if (sk->sk_state == TCP_SYN_RECV) + return 1; + goto discard; + } + + switch (sk->sk_state) { + case TCP_SYN_RECV: + tp->copied_seq = tp->rcv_nxt; + tcp_nip_init_buffer_space(sk); + /* Invoke memory barrier (annotated prior to checkpatch requirements) */ + smp_mb(); + tcp_set_state(sk, TCP_ESTABLISHED); + nip_dbg("TCP_ESTABLISHED"); + sk->sk_state_change(sk); + + /* Sets the part to be sent, and the size of the send window */ + tp->snd_una = TCP_SKB_CB(skb)->ack_seq; + tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); + + tp->lsndtime = tcp_jiffies32; + + tcp_initialize_rcv_mss(sk); + break; + case TCP_FIN_WAIT1: { + if (tp->snd_una != tp->write_seq) { + nip_dbg("tp->snd_una != tp->write_seq"); + break; + } + + tcp_set_state(sk, TCP_FIN_WAIT2); + sk->sk_shutdown |= SEND_SHUTDOWN; + + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { + tcp_nip_done(sk); + nip_dbg("received payload packets, call tcp_nip_done"); + return 1; + } + + nip_dbg("TCP_FIN_WAIT1: recvd ack for fin.Wait for fin from other side"); + inet_csk_reset_keepalive_timer(sk, TCP_NIP_CSK_KEEPALIVE_CYCLE * HZ); + + break; + } + + case TCP_CLOSING: + if (tp->snd_una == tp->write_seq) { + nip_dbg("TCP_CLOSING: recvd ack for fin.Ready to destroy"); + inet_csk_reset_keepalive_timer(sk, TCP_TIMEWAIT_LEN); + goto discard; + } + break; + case TCP_LAST_ACK: + nip_dbg("tcp_nip_rcv_state_process_2: TCP_LAST_ACK"); + if (tp->snd_una == tp->write_seq) { + nip_dbg("LAST_ACK: recvd ack for fin.Directly destroy"); + tcp_nip_done(sk); + goto discard; + } + break; + } + + switch (sk->sk_state) { + case TCP_CLOSE_WAIT: + nip_dbg("into TCP_CLOSE_WAIT, rst = %d, seq = %u, end_seq = %u, rcv_nxt = %u", + th->rst, TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->seq, tp->rcv_nxt); + fallthrough; + case TCP_CLOSING: + case TCP_LAST_ACK: + if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { + nip_dbg("break in TCP_LAST_ACK"); + break; + } + nip_dbg("tcp_nip_rcv_state_process_3: TCP_LAST_ACK_2"); + fallthrough; + case TCP_FIN_WAIT1: + case TCP_FIN_WAIT2: + /* Reset is required according to RFC 1122. + * Do not enter the reset process temporarily + */ + if (sk->sk_shutdown & RCV_SHUTDOWN) { + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { + tcp_nip_reset(sk); + nip_dbg("call tcp_nip_reset"); + return 1; + } + } + fallthrough; + case TCP_ESTABLISHED: + tcp_nip_data_queue(sk, skb); + queued = 1; + break; + } + + if (sk->sk_state != TCP_CLOSE) { + tcp_nip_data_snd_check(sk); + tcp_nip_ack_snd_check(sk); + } + + if (!queued) { +discard: + tcp_nip_drop(sk, skb); + } + return 0; +} + +/* Function + * Initialize RCV_MSS + * Parameter + * sk: transmission control block + */ +void tcp_nip_initialize_rcv_mss(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); + + hint = min(hint, tp->rcv_wnd / TCP_NUM_2); + hint = min(hint, TCP_MSS_DEFAULT); + hint = max(hint, TCP_MIN_MSS); + + inet_csk(sk)->icsk_ack.rcv_mss = hint; +} + +/* Function + * Handle the third handshake ACK and return the new control block successfully. + * Is the core process for handling ACKS. + * (1)Create a child control block. Note that the state of the child control + * block is TCP_SYN_RECV + * This is different from the TCP_NEW_SYN_RECV control block created when syn was received. + * (2)Remove the request control block from the incomplete connection queue + * and add it to the completed connection queue + * Parameter + * sk: transmission control block + * skb: Transfer control block buffer + * req: Request connection control block + */ +struct sock *tcp_nip_check_req(struct sock *sk, struct sk_buff *skb, + struct request_sock *req) +{ + struct tcp_options_received tmp_opt; + struct sock *child; + const struct tcphdr *th = tcp_hdr(skb); + __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_ACK); + bool own_req; + + tmp_opt.saw_tstamp = 0; + /* Check whether the TCP option exists */ + if (th->doff > (sizeof(struct tcphdr) >> TCP_NIP_4BYTE_PAYLOAD)) + /* Parsing TCP options */ + tcp_nip_parse_options(skb, &tmp_opt, 0, NULL); + + /* ACK but the serial number does not match, + * return to the original control block, no processing outside + */ + if ((flg & TCP_FLAG_ACK) && + (TCP_SKB_CB(skb)->ack_seq != + tcp_rsk(req)->snt_isn + 1)) { + nip_dbg("ack_seq is wrong"); + return sk; + } + + /* The above process guarantees that there is an ACK, if not, return directly */ + if (!(flg & TCP_FLAG_ACK)) { + nip_dbg("No TCP_FLAG_ACK"); + return NULL; + } + + /* The ack is valid and the child control block is created. + * Note that the state of the child control block is TCP_SYN_RECV + */ + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, req, &own_req); + if (!child) { + nip_dbg("No listen_overflow"); + goto listen_overflow; + } + nip_dbg("creat child sock successfully"); + + sock_rps_save_rxhash(child, skb); + /* Calculate the time spent synack-ack in three handshakes */ + tcp_synack_rtt_meas(child, req); + /* Delete the original control block from the incomplete queue + * and add it to the completed queue + */ + return inet_csk_complete_hashdance(sk, child, req, own_req); + +listen_overflow: + if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) { + inet_rsk(req)->acked = 1; + return NULL; + } + return NULL; +} + diff --git a/newip/src/linux-5.10/net/newip/tcp_nip_output.c b/newip/src/linux-5.10/net/newip/tcp_nip_output.c new file mode 100644 index 0000000000000000000000000000000000000000..d0a205d7547920a7a8f9262dbfdc9b7209c3485c --- /dev/null +++ b/newip/src/linux-5.10/net/newip/tcp_nip_output.c @@ -0,0 +1,1323 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Implementation of the Transmission Control Protocol(TCP). + * + * Based on net/ipv4/tcp_output.c + * Based on net/ipv4/tcp_minisocks.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include +#include +#include "nip_hdr.h" +#include "nip_checksum.h" +#include "tcp_nip_parameter.h" + +#define OPTION_SACK_ADVERTISE BIT(0) +#define OPTION_TS BIT(1) +#define OPTION_MD5 BIT(2) +#define OPTION_WSCALE BIT(3) +#define OPTION_FAST_OPEN_COOKIE BIT(8) + +/* Store the options contained in TCP when sending TCP packets */ +struct tcp_nip_out_options { + u16 options; /* bit field of OPTION_* */ + u16 mss; /* If it is zero, the MSS option is disabled */ + + u8 ws; /* window scale, 0 to disable */ + __u32 tsval, tsecr; /* need to include OPTION_TS */ +}; + +static bool tcp_nip_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, + int push_one, gfp_t gfp); + +/* Calculate MSS not accounting any TCP options. */ +static inline int __tcp_nip_mtu_to_mss(struct sock *sk, int pmtu) +{ + const struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); + int mss_now; + int nip_hdr_len = get_nip_hdr_len(NIP_HDR_COMM, &sk->sk_nip_rcv_saddr, &sk->sk_nip_daddr); + + /* Calculate base mss without TCP options: It is MMS_S - sizeof(tcphdr) of rfc1122 */ + nip_hdr_len = nip_hdr_len == 0 ? NIP_HDR_MAX : nip_hdr_len; + mss_now = pmtu - nip_hdr_len - sizeof(struct tcphdr); + + /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ + if (icsk->icsk_af_ops->net_frag_header_len) { + const struct dst_entry *dst = __sk_dst_get(sk); + + if (dst && dst_allfrag(dst)) + mss_now -= icsk->icsk_af_ops->net_frag_header_len; + } + + /* Clamp it (mss_clamp does not include tcp options) */ + if (mss_now > tp->rx_opt.mss_clamp) + mss_now = tp->rx_opt.mss_clamp; + + /* Now subtract optional transport overhead */ + mss_now -= icsk->icsk_ext_hdr_len; + + /* Then reserve room for full set of TCP options and 8 bytes of data */ + mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); + return mss_now; +} + +/* Calculate MSS. Not accounting for SACKs here. */ +int tcp_nip_mtu_to_mss(struct sock *sk, int pmtu) +{ + /* Subtract TCP options size, not including SACKs */ + return __tcp_nip_mtu_to_mss(sk, pmtu) - + (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr)); +} + +static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb) +{ + if (tcp_skb_is_last(sk, skb)) + sk->sk_send_head = NULL; + else + sk->sk_send_head = skb_queue_next(&sk->sk_write_queue, skb); +} + +static void tcp_nip_event_new_data_sent(struct sock *sk, struct sk_buff *skb) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + unsigned int prior_packets = tp->packets_out; + + tcp_advance_send_head(sk, skb); + tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; + tp->packets_out += tcp_skb_pcount(skb); + if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) + tcp_nip_rearm_rto(sk); +} + +/* check probe0 timer */ +static void tcp_nip_check_probe_timer(struct sock *sk) +{ + unsigned long when; + + if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) { + when = tcp_probe0_base(sk); + nip_dbg("start probe0 timer, when=%lu, RTO MAX=%u", when, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX); + } else if (inet_csk(sk)->icsk_pending != ICSK_TIME_PROBE0) { + nip_dbg("can`t start probe0 timer, packets_out=%u, icsk_pending=%u", + tcp_sk(sk)->packets_out, inet_csk(sk)->icsk_pending); + } +} + +void __tcp_nip_push_pending_frames(struct sock *sk, unsigned int cur_mss, + int nonagle) +{ + if (unlikely(sk->sk_state == TCP_CLOSE)) + return; + + if (tcp_nip_write_xmit(sk, cur_mss, nonagle, 0, sk_gfp_mask(sk, GFP_ATOMIC))) + tcp_nip_check_probe_timer(sk); +} + +u32 __nip_tcp_select_window(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + int mss = tcp_nip_current_mss(sk); /* TCP_BASE_MSS */ + int allowed_space = tcp_full_space(sk); + int full_space = min_t(int, tp->window_clamp, allowed_space); /* Total receive cache */ + int free_space = tcp_space(sk); /* 3/4 remaining receive cache */ + int window; + + if (unlikely(mss > full_space)) { + mss = full_space; + if (mss <= 0) + return 0; + } + + /* receive buffer is half full */ + if (free_space < (full_space >> 1)) { + icsk->icsk_ack.quick = 0; + + free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale); + if (free_space < (allowed_space >> TCP_NUM_4) || free_space < mss) { + nip_dbg("rcv_wnd is 0, [allowed|full|free]space=[%u, %u, %u], mss=%u", + allowed_space, full_space, free_space, mss); + return 0; + } + } + + if (get_nip_tcp_rcv_win_enable()) { + if (get_ssthresh_enable()) + free_space = free_space > ntp->nip_ssthresh ? + ntp->nip_ssthresh : free_space; + else + free_space = free_space > tp->rcv_ssthresh ? tp->rcv_ssthresh : free_space; + } else { + free_space = free_space > get_ssthresh_high() ? get_ssthresh_high() : free_space; + } + + /* Don't do rounding if we are using window scaling, since the + * scaled window will not line up with the MSS boundary anyway. + * tp->rx_opt.rcv_wscale is always true + */ + window = free_space; + + /* Advertise enough space so that it won't get scaled away. + * Import case: prevent zero window announcement if + * 1< mss. + */ + window = ALIGN(window, (1 << tp->rx_opt.rcv_wscale)); + nip_dbg("wscale(%u) win change [%u to %u], [allowed|free]space=[%u, %u], mss=%u", + tp->rx_opt.rcv_wscale, free_space, window, allowed_space, free_space, mss); + return window; +} + +/* The basic algorithm of window size selection: + * 1. Calculate the remaining size of the receiving window cur_win. + * 2. Calculate the new receive window size NEW_win, which is 3/4 of the remaining receive + * cache and cannot exceed RCV_SSTHresh. + * 3. Select the receiving window size with the larger median value of cur_win and new_win. + */ +static u16 nip_tcp_select_window(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 old_win = tp->rcv_wnd; + /* The remaining size of the front receive window */ + u32 cur_win = tcp_receive_window(tp); + /* Calculate the size of the new receive window based on the remaining receive cache */ + u32 new_win = __nip_tcp_select_window(sk); + u32 new_win_bak; + + /* Never shrink the offered window */ + if (new_win < cur_win) { + /* Danger Will Robinson! + * Don't update rcv_wup/rcv_wnd here or else + * we will not be able to advertise a zero + * window in time. --DaveM + * + * Relax Will Robinson. + */ + if (new_win == 0) + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWANTZEROWINDOWADV); + new_win_bak = new_win; + new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); + nip_dbg("when new_win(%u) < cur_win(%u), win change [%u to %u]", + new_win_bak, cur_win, new_win_bak, new_win); + } + tp->rcv_wnd = new_win; + tp->rcv_wup = tp->rcv_nxt; + + /* Make sure we do not exceed the maximum possible + * scaled window. + */ + if (!tp->rx_opt.rcv_wscale && sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) + new_win = min(new_win, MAX_TCP_WINDOW); + else + new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); + + /* RFC1323 Scaling Applied. + * Scaling the receive window so that it can represent up to 30 bits + */ + new_win_bak = new_win; + new_win >>= tp->rx_opt.rcv_wscale; + nip_dbg("wscale(%u) win change [%u to %u]", tp->rx_opt.rcv_wscale, new_win_bak, new_win); + if (new_win == 0) { + tp->pred_flags = 0; + if (old_win) + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTOZEROWINDOWADV); + } else if (old_win == 0) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV); + } + + return new_win; +} + +/* Function + * Initialize transport layer parameters. + * Parameter + * sk: transmission control block. + */ +static void tcp_nip_connect_init(struct sock *sk) +{ + const struct dst_entry *dst = __sk_dst_get(sk); + struct tcp_sock *tp = tcp_sk(sk); + __u8 rcv_wscale = 0; + + /* Header structure length + timestamp length */ + tp->tcp_header_len = sizeof(struct tcphdr); + if (sock_net(sk)->ipv4.sysctl_tcp_timestamps) + tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; + + if (tp->rx_opt.user_mss) + tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; + tp->max_window = 0; + + tcp_mtup_init(sk); + tp->rx_opt.mss_clamp = tcp_nip_sync_mss(sk, dst_mtu(dst)); + + if (!tp->window_clamp) + tp->window_clamp = dst_metric(dst, RTAX_WINDOW); + tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); + + tcp_initialize_rcv_mss(sk); + + /* Initialization window */ + tcp_select_initial_window(sk, tcp_full_space(sk), + tp->advmss - (tp->rx_opt.ts_recent_stamp ? + tp->tcp_header_len - sizeof(struct tcphdr) : 0), + &tp->rcv_wnd, + &tp->window_clamp, + 0, + &rcv_wscale, + 0); + + tp->rx_opt.rcv_wscale = get_wscale_enable() ? get_wscale() : rcv_wscale; + tp->rcv_ssthresh = tp->rcv_wnd; + + sk->sk_err = 0; + sock_reset_flag(sk, SOCK_DONE); + tp->snd_wnd = 0; + tp->snd_wl1 = 0; + tcp_write_queue_purge(sk); + + tp->snd_una = tp->write_seq; + tp->snd_sml = tp->write_seq; + tp->snd_up = tp->write_seq; + tp->snd_nxt = tp->write_seq; + + tp->rcv_nxt = 0; + tp->rcv_wup = tp->rcv_nxt; + tp->copied_seq = tp->rcv_nxt; + inet_csk(sk)->icsk_rto = get_nip_rto() == 0 ? TCP_TIMEOUT_INIT : (HZ / get_nip_rto()); + inet_csk(sk)->icsk_retransmits = 0; + tcp_clear_retrans(tp); +} + +static void tcp_nip_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) +{ + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum = 0; + + TCP_SKB_CB(skb)->tcp_flags = flags; + TCP_SKB_CB(skb)->sacked = 0; + + tcp_skb_pcount_set(skb, 1); + + TCP_SKB_CB(skb)->seq = seq; + if (flags & (TCPHDR_SYN | TCPHDR_FIN)) + seq++; + TCP_SKB_CB(skb)->end_seq = seq; +} + +#define OPTION_TS BIT(1) +#define OPTION_WSCALE BIT(3) + +static void tcp_nip_connect_queue_skb(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + + tcb->end_seq += skb->len; + __skb_header_release(skb); + __skb_queue_tail(&sk->sk_write_queue, skb); + sk->sk_wmem_queued += skb->truesize; + sk_mem_charge(sk, skb->truesize); + WRITE_ONCE(tp->write_seq, tcb->end_seq); + tp->packets_out += tcp_skb_pcount(skb); +} + +static __u16 tcp_nip_advertise_mss(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + const struct dst_entry *dst = __sk_dst_get(sk); + int mss = tp->advmss; + u32 mtu; + + if (dst) { + int nip_hdr_len; + int nip_mss; + unsigned int metric = dst_metric_advmss(dst); + + if (metric < mss) { + mss = metric; + tp->advmss = mss; + } + + mtu = dst_mtu(dst); + nip_hdr_len = get_nip_hdr_len(NIP_HDR_COMM, &sk->sk_nip_rcv_saddr, + &sk->sk_nip_daddr); + nip_hdr_len = nip_hdr_len == 0 ? NIP_HDR_MAX : nip_hdr_len; + nip_mss = mtu - nip_hdr_len - sizeof(struct tcphdr); + if (nip_mss > mss) { + mss = nip_mss; + tp->advmss = mss; + } + } + + return (__u16)mss; +} + +/* Compute TCP options for SYN packets. This is not the final + * network wire format yet. + */ +static unsigned int tcp_nip_syn_options(struct sock *sk, struct sk_buff *skb, + struct tcp_nip_out_options *opts) +{ + unsigned int remaining = MAX_TCP_OPTION_SPACE; + + opts->mss = tcp_nip_advertise_mss(sk); + nip_dbg("advertise mss %d", opts->mss); + remaining -= TCPOLEN_MSS_ALIGNED; + + return MAX_TCP_OPTION_SPACE - remaining; +} + +/* Compute TCP options for ESTABLISHED sockets. This is not the + * final wire format yet. + */ +static unsigned int tcp_nip_established_options(struct sock *sk, struct sk_buff *skb, + struct tcp_nip_out_options *opts) +{ + struct tcp_sock *tp = tcp_sk(sk); + unsigned int size = 0; + + opts->options = 0; + + if (likely(tp->rx_opt.tstamp_ok)) { + opts->options |= OPTION_TS; + opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0; + opts->tsecr = tp->rx_opt.ts_recent; + size += TCPOLEN_TSTAMP_ALIGNED; + } + return size; +} + +/* Function + * Put the parameters from the TCP option into SKB. + * Write previously computed TCP options to the packet. + * Parameter + * ptr: pointer to TCP options in SKB. + * tp: transmission control block. + * opts: structure to be sent to temporarily load TCP options. + */ +static void tcp_nip_options_write(__be32 *ptr, struct tcp_sock *tp, + struct tcp_nip_out_options *opts) +{ + if (unlikely(opts->mss)) + *ptr++ = htonl((TCPOPT_MSS << TCP_OPT_MSS_PAYLOAD) | + (TCPOLEN_MSS << TCP_OLEN_MSS_PAYLOAD) | + opts->mss); +} + +static inline void tcp_nip_event_ack_sent(struct sock *sk, unsigned int pkts, + u32 rcv_nxt) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (unlikely(rcv_nxt != tp->rcv_nxt)) + return; + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); +} + +unsigned short nip_get_output_checksum_tcp(struct sk_buff *skb, struct nip_addr src_addr, + struct nip_addr dst_addr) +{ + struct nip_pseudo_header nph = {0}; + u8 *tcp_hdr = skb_transport_header(skb); + + nph.nexthdr = IPPROTO_TCP; + nph.saddr = src_addr; + nph.daddr = dst_addr; + + nph.check_len = htons(skb->len); + return nip_check_sum_build(tcp_hdr, skb->len, &nph); +} + +static int __tcp_nip_transmit_skb(struct sock *sk, struct sk_buff *skb, + int clone_it, gfp_t gfp_mask, u32 rcv_nxt) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_sock *inet; + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_skb_cb *tcb; + struct tcp_nip_out_options opts; + unsigned int tcp_options_size, tcp_header_size; + struct sk_buff *oskb = NULL; + struct tcphdr *th; + int err = 0; + __be16 len; + unsigned short check = 0; + bool ack; + + if (skb->tstamp == 0) + skb->tstamp = tcp_jiffies32; + + if (clone_it) { + TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq + - tp->snd_una; + oskb = skb; + + tcp_skb_tsorted_save(oskb) { + if (unlikely(skb_cloned(oskb))) + skb = pskb_copy(oskb, gfp_mask); + else + skb = skb_clone(oskb, gfp_mask); + } tcp_skb_tsorted_restore(oskb); + + if (unlikely(!skb)) + return -ENOBUFS; + } + + inet = inet_sk(sk); + tcb = TCP_SKB_CB(skb); + memset(&opts, 0, sizeof(opts)); + + if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) + tcp_options_size = tcp_nip_syn_options(sk, skb, &opts); + else + tcp_options_size = tcp_nip_established_options(sk, skb, &opts); + tcp_header_size = tcp_options_size + sizeof(struct tcphdr); + + skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1); + /* The data pointer moves up */ + skb_push(skb, tcp_header_size); + skb_reset_transport_header(skb); + + /* Disassociate the control block */ + skb_orphan(skb); + + /* Establishes associations with control blocks */ + skb->sk = sk; + skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; + skb_set_hash_from_sk(skb, sk); + /* Increase allocated memory */ + refcount_add(skb->truesize, &sk->sk_wmem_alloc); + + /* Build TCP header and checksum it. */ + th = (struct tcphdr *)skb->data; + th->source = inet->inet_sport; + th->dest = inet->inet_dport; + th->seq = htonl(tcb->seq); + th->ack_seq = htonl(rcv_nxt); + /* TCP's header offset is measured in 4 bytes, so moving two to the right + * means dividing by 4. In addition, according to the position of the offset + * field in the packet, the offset field is at the beginning of a short type, + * accounting for 4 bits. Therefore, the offset field should be shifted 12 bits + * to the left + */ + len = htons(((tcp_header_size >> TCP_NIP_4BYTE_PAYLOAD) << TCP_HDR_LEN_POS_PAYLOAD) | + tcb->tcp_flags); + *(((__be16 *)th) + TCP_HDR_LEN_OFFSET) = len; + + th->check = 0; + /* Newip Urg_ptr is disabled. Urg_ptr is used to carry the number of discarded packets */ + th->urg_ptr = htons(tp->snd_up); + + /* Write TCP option */ + tcp_nip_options_write((__be32 *)(th + 1), tp, &opts); + + /* Window Settings */ + if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) + th->window = htons(nip_tcp_select_window(sk)); + else + th->window = htons(min(tp->rcv_wnd, TCP_NIP_WINDOW_MAX)); + + ack = tcb->tcp_flags & TCPHDR_ACK; + nip_dbg("sport=%u, dport=%u, win=%u, rcvbuf=%d, sk_rmem_alloc=%d, ack=%u, skb->len=%u", + ntohs(inet->inet_sport), ntohs(inet->inet_dport), ntohs(th->window), + sk->sk_rcvbuf, atomic_read(&sk->sk_rmem_alloc), ack, skb->len); + + /* Fill in checksum */ + check = nip_get_output_checksum_tcp(skb, sk->sk_nip_rcv_saddr, sk->sk_nip_daddr); + th->check = htons(check); + + if (likely(tcb->tcp_flags & TCPHDR_ACK)) + tcp_nip_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt); + + /* There's data to send */ + if (skb->len != tcp_header_size) + tp->data_segs_out += tcp_skb_pcount(skb); + + memset(skb->cb, 0, sizeof(struct ninet_skb_parm)); + err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); + return err; +} + +/* Function + * TCP's transport layer sends code that builds and initializes the TCP header + * Construct the SK_buff call transport layer to network layer interface + * Parameter + * sk: Transmission control block. + * skb: Structure stores all information about network datagrams + */ +int tcp_nip_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, + gfp_t gfp_mask) +{ + return __tcp_nip_transmit_skb(sk, skb, clone_it, gfp_mask, + tcp_sk(sk)->rcv_nxt); +} + +static void tcp_nip_queue_skb(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* Advance write_seq and place onto the write_queue. */ + tp->write_seq = TCP_SKB_CB(skb)->end_seq; + tcp_nip_add_write_queue_tail(sk, skb); + sk->sk_wmem_queued += skb->truesize; + sk_mem_charge(sk, skb->truesize); +} + +/* Function + * A function used by the client transport layer to connect requests. + * Parameter + * sk: transmission control block. + */ +int __tcp_nip_connect(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *buff; + int err; + + tcp_nip_connect_init(sk); + buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true); + if (unlikely(!buff)) + return -ENOBUFS; + + /* Initializes the SYN flag bit */ + tcp_nip_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); + tcp_mstamp_refresh(tp); + tp->retrans_stamp = tcp_time_stamp(tp); + tcp_nip_init_xmit_timers(sk); + + tcp_nip_connect_queue_skb(sk, buff); + + /* Send off SYN */ + err = tcp_nip_transmit_skb(sk, buff, 1, sk->sk_allocation); + if (err == -ECONNREFUSED) + return err; + + tp->snd_nxt = tp->write_seq; + tp->pushed_seq = tp->write_seq; + + TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); + + /* Timer for repeating the SYN until an answer. */ + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); + + return 0; +} + +unsigned int tcp_nip_sync_mss(struct sock *sk, u32 pmtu) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + int mss_now; + + if (icsk->icsk_mtup.search_high > pmtu) + icsk->icsk_mtup.search_high = pmtu; + + mss_now = tcp_nip_mtu_to_mss(sk, pmtu); + nip_dbg("sync mtu_to_mss %d", mss_now); + mss_now = tcp_bound_to_half_wnd(tp, mss_now); + nip_dbg("sync bound to half wnd %d", mss_now); + + /* And store cached results */ + icsk->icsk_pmtu_cookie = pmtu; + if (icsk->icsk_mtup.enabled) + mss_now = min(mss_now, tcp_nip_mtu_to_mss(sk, icsk->icsk_mtup.search_low)); + tp->mss_cache = mss_now; + + nip_dbg("sync final mss %d", mss_now); + + return mss_now; +} + +unsigned int tcp_nip_current_mss(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + const struct dst_entry *dst = __sk_dst_get(sk); + u32 mss_now; + unsigned int header_len; + struct tcp_nip_out_options opts; + + mss_now = tp->mss_cache; + + if (dst) { + u32 mtu = dst_mtu(dst); + + if (mtu != inet_csk(sk)->icsk_pmtu_cookie) + mss_now = tcp_nip_sync_mss(sk, mtu); + } + + header_len = tcp_nip_established_options(sk, NULL, &opts) + sizeof(struct tcphdr); + if (header_len != tp->tcp_header_len) { + int delta = (int)header_len - tp->tcp_header_len; + + mss_now -= delta; + } + + return mss_now; +} + +/* Function: + * Set up TCP options for SYN-ACKs. + * Initializes the TCP option for the SYN-ACK segment. Returns the SIZE of the TCP header. + * Parameter + * req: Request connection control block. + * mss: maximum segment length. + * skb: Transfer control block buffer. + * opts: stores the options contained in TCP packets when they are sent. + * foc: Fast Open option. + * synack_type: type of SYN+ACK segment. + */ +static unsigned int tcp_nip_synack_options(struct request_sock *req, + unsigned int mss, struct sk_buff *skb, + struct tcp_nip_out_options *opts, + const struct tcp_md5sig_key *md5, + struct tcp_fastopen_cookie *foc, + enum tcp_synack_type synack_type) +{ + struct inet_request_sock *ireq = inet_rsk(req); + unsigned int remaining = MAX_TCP_OPTION_SPACE; + + /* We always send an MSS option. */ + opts->mss = mss; + remaining -= TCPOLEN_MSS_ALIGNED; + + if (likely(ireq->tstamp_ok)) { + opts->options |= OPTION_TS; + opts->tsval = tcp_skb_timestamp(skb); + opts->tsecr = req->ts_recent; + remaining -= TCPOLEN_TSTAMP_ALIGNED; + } + return MAX_TCP_OPTION_SPACE - remaining; +} + +static int get_nip_mss(const struct sock *sk, struct dst_entry *dst, struct request_sock *req) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct tcp_sock *tp = tcp_sk(sk); + u16 user_mss; + int mss; + int nip_hdr_len; + int nip_mss; + u32 mtu; + + mss = dst_metric_advmss(dst); + user_mss = READ_ONCE(tp->rx_opt.user_mss); + if (user_mss && user_mss < mss) + mss = user_mss; + + mtu = dst_mtu(dst); + nip_hdr_len = get_nip_hdr_len(NIP_HDR_COMM, &ireq->ir_nip_loc_addr, &ireq->ir_nip_rmt_addr); + nip_hdr_len = nip_hdr_len == 0 ? NIP_HDR_MAX : nip_hdr_len; + nip_mss = mtu - nip_hdr_len - sizeof(struct tcphdr); + + if (nip_mss > mss) { + mss = nip_mss; + tp->advmss = mss; + } + + return mss; +} + +/* Function + * The SYN + ACK segment is constructed based on the current transport control block, + * routing information, and request information. + * Parameter + * sk: transmission control block. + * dst: routing. + * req: Request connection control block. + * foc: Fast Open option. + * synack_type: type of SYN+ACK segment. + */ +struct sk_buff *tcp_nip_make_synack(const struct sock *sk, struct dst_entry *dst, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + enum tcp_synack_type synack_type) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct tcp_md5sig_key *md5 = NULL; + struct tcp_nip_out_options opts; + struct sk_buff *skb; + int tcp_header_size; + struct tcphdr *th; + int mss; + unsigned short check = 0; + + skb = alloc_skb(MAX_TCP_HEADER, 0); + if (unlikely(!skb)) { + dst_release(dst); + return NULL; + } + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_TCP_HEADER); + + switch (synack_type) { + case TCP_SYNACK_NORMAL: + /* Release the original SKB and treat itself as the SKB of the current SK */ + skb_set_owner_w(skb, req_to_sk(req)); + break; + default: + break; + } + skb_dst_set(skb, dst); + /* set skb priority from sk */ + skb->priority = sk->sk_priority; + + mss = get_nip_mss(sk, dst, req); + + /* Clear the options and set the associated timestamp */ + memset(&opts, 0, sizeof(opts)); + skb->skb_mstamp_ns = tcp_clock_us(); + + /* Get the TCP header size, then set the size and reset the transport layer header */ + skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); + tcp_header_size = tcp_nip_synack_options(req, mss, skb, &opts, md5, + foc, synack_type) + sizeof(*th); + skb_push(skb, tcp_header_size); + skb_reset_transport_header(skb); + + /* Clear the TCP header and set the fields of the TCP header */ + th = (struct tcphdr *)skb->data; + memset(th, 0, sizeof(struct tcphdr)); + th->syn = 1; + th->ack = 1; + if (inet_rsk(req)->ecn_ok) + th->ece = 1; + th->source = htons(ireq->ir_num); + th->dest = ireq->ir_rmt_port; + skb->ip_summed = CHECKSUM_PARTIAL; + th->seq = htonl(tcp_rsk(req)->snt_isn); + th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); + th->check = 0; + + th->window = htons(min(req->rsk_rcv_wnd, 65535U)); + + tcp_nip_options_write((__be32 *)(th + 1), NULL, &opts); + /* TCP data offset, divided by 4 because doff is a 32-bit word + * That is, words four bytes long are counted in units + */ + th->doff = (tcp_header_size >> 2); + __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); + + /* Fill in checksum */ + check = nip_get_output_checksum_tcp(skb, ireq->ir_nip_loc_addr, ireq->ir_nip_rmt_addr); + th->check = htons(check); + + /* Do not fool tcpdump (if any), clean our debris */ + skb->tstamp = 0; + return skb; +} + +/* Function + * Send SKB packets with SYN+ACK segments to the network layer. + * Parameter + * req: Request connection control block. + * skb: Transfer control block buffer. + */ +int __nip_send_synack(struct request_sock *req, struct sk_buff *skb) +{ + struct inet_request_sock *ireq = inet_rsk(req); /* 连接请求块 */ + int err; + int csummode = CHECKSUM_NONE; + struct nip_addr *saddr, *daddr; + struct nip_hdr_encap head = {0}; + unsigned char hdr_buf[NIP_HDR_MAX]; /* Cache the newIP header */ + + skb->protocol = htons(ETH_P_NEWIP); + skb->ip_summed = csummode; + skb->csum = 0; + saddr = &ireq->ir_nip_loc_addr; + daddr = &ireq->ir_nip_rmt_addr; + + head.saddr = *saddr; + head.daddr = *daddr; + head.ttl = NIP_DEFAULT_TTL; + head.nexthdr = IPPROTO_TCP; + head.hdr_buf = hdr_buf; + nip_hdr_comm_encap(&head); + head.total_len = head.hdr_buf_pos + skb->len; + nip_update_total_len(&head, htons(head.total_len)); + + skb_push(skb, head.hdr_buf_pos); + memcpy(skb->data, head.hdr_buf, head.hdr_buf_pos); + skb_reset_network_header(skb); + NIPCB(skb)->srcaddr = *saddr; + NIPCB(skb)->dstaddr = *daddr; + NIPCB(skb)->nexthdr = head.nexthdr; + + head.total_len = skb->len; + err = nip_send_skb(skb); + if (err) + nip_dbg("failed to send skb, skb->len=%u", head.total_len); + else + nip_dbg("send skb ok, skb->len=%u", head.total_len); + + return err; +} + +int nip_send_synack(struct request_sock *req, struct sk_buff *skb) +{ + return __nip_send_synack(req, skb); +} + +/* Function: + * Creates a subtransport block to complete the establishment of the three-way handshake + * Parameter: + * parent: indicates the parent transmission control block + * child: indicates the child transmission control block + * skb: Transfer control block buffer + */ +int tcp_nip_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb) +{ + int ret = 0; + int state = child->sk_state; + /* Child is not occupied by the user process */ + if (!sock_owned_by_user(child)) { + ret = tcp_nip_rcv_state_process(child, skb); + /* At this point the state of the child has been migrated, + * waking up the process on the listening socket, + * which may be blocked due to Accept + */ + if (state == TCP_SYN_RECV && child->sk_state != state) + parent->sk_data_ready(parent); + } else { + __sk_add_backlog(child, skb); + } + bh_unlock_sock(child); + sock_put(child); + return ret; +} + +static inline __u32 tcp_nip_acceptable_seq(const struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + if (!before(tcp_wnd_end(tp), tp->snd_nxt)) + return tp->snd_nxt; + else + return tcp_wnd_end(tp); +} + +/* Function: + * The client sends an ACK + * Parameter: + * sk: transmission control block + * rcv_nxt: serial number to be accepted + */ +void __tcp_nip_send_ack(struct sock *sk, u32 rcv_nxt) +{ + struct sk_buff *buff; + + if (sk->sk_state == TCP_CLOSE) + return; + + buff = alloc_skb(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); + + /* Reserve space for the header. */ + skb_reserve(buff, MAX_TCP_HEADER); + /* Initialize SKB without data */ + tcp_nip_init_nondata_skb(buff, tcp_nip_acceptable_seq(sk), TCPHDR_ACK); + + /* Mark pure ack,skb->truesize set to 2 */ + skb_set_tcp_pure_ack(buff); + + /* Record the timestamp and send the SKB. */ + __tcp_nip_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt); +} + +void tcp_nip_send_ack(struct sock *sk) +{ + __tcp_nip_send_ack(sk, tcp_sk(sk)->rcv_nxt); +} + +void tcp_nip_send_fin(struct sock *sk) +{ + struct sk_buff *skb; + struct sk_buff *tskb = tcp_write_queue_tail(sk); + struct tcp_sock *tp = tcp_sk(sk); + u32 cur_mss; + + nip_dbg("send fin"); + /* Set the fin position of the last packet to 1 */ + if (tskb && tcp_nip_send_head(sk)) { +coalesce: + TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; + TCP_SKB_CB(tskb)->end_seq++; + tp->write_seq++; + } else { + skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); + if (unlikely(!skb)) { + if (tskb) + goto coalesce; + return; + } + skb_reserve(skb, MAX_TCP_HEADER); + + tcp_nip_init_nondata_skb(skb, tp->write_seq, + TCPHDR_ACK | TCPHDR_FIN); + tcp_nip_queue_skb(sk, skb); + } + + cur_mss = tcp_nip_current_mss(sk); // TCP_BASE_MSS + __tcp_nip_push_pending_frames(sk, cur_mss, TCP_NAGLE_OFF); +} + +void tcp_nip_send_active_reset(struct sock *sk, gfp_t priority) +{ + struct sk_buff *skb; + + nip_dbg("send rst"); + /* NOTE: No TCP options attached and we never retransmit this. */ + skb = alloc_skb(MAX_TCP_HEADER, priority); + if (!skb) + /* If you add log here, there will be an alarm: + * WARNING: Possible unnecessary 'out of memory' message + */ + return; + + /* Reserve space for headers and prepare control bits. */ + skb_reserve(skb, MAX_TCP_HEADER); + tcp_nip_init_nondata_skb(skb, tcp_nip_acceptable_seq(sk), + TCPHDR_ACK | TCPHDR_RST); + /* Send it off. */ + tcp_nip_transmit_skb(sk, skb, 0, priority); +} + +static bool tcp_nip_snd_wnd_test(const struct tcp_sock *tp, + const struct sk_buff *skb, + unsigned int cur_mss) +{ + u32 end_seq = TCP_SKB_CB(skb)->end_seq; + + if (skb->len > cur_mss) + end_seq = TCP_SKB_CB(skb)->seq + cur_mss; + + return !after(end_seq, tcp_wnd_end(tp)); +} + +static void tcp_nip_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) +{ + if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { + /* Avoid the costly divide in the normal + * non-TSO case. + */ + tcp_skb_pcount_set(skb, 1); + TCP_SKB_CB(skb)->tcp_gso_size = 0; + } else { + tcp_skb_pcount_set(skb, DIV_ROUND_UP(skb->len, mss_now)); + TCP_SKB_CB(skb)->tcp_gso_size = mss_now; + } +} + +static int tcp_nip_init_tso_segs(struct sk_buff *skb, unsigned int mss_now) +{ + int tso_segs = tcp_skb_pcount(skb); + + if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { + tcp_nip_set_skb_tso_segs(skb, mss_now); + tso_segs = tcp_skb_pcount(skb); + } + return tso_segs; +} + +static bool tcp_nip_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, + int push_one, gfp_t gfp) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + struct sk_buff *skb; + u32 snd_num = get_nip_tcp_snd_win_enable() ? (ntp->nip_ssthresh / mss_now) : 0xFFFFFFFF; + u32 last_nip_ssthresh = ntp->nip_ssthresh; + static const char * const str[] = {"can`t send pkt because no window", + "have window to send pkt"}; + + tcp_nip_keepalive_enable(sk); + ntp->idle_ka_probes_out = 0; + + tcp_mstamp_refresh(tp); + + if (tp->rcv_tstamp) { + u32 tstamp = tcp_jiffies32 - tp->rcv_tstamp; + + if (tstamp >= get_ack_to_nxt_snd_tstamp()) { + ntp->nip_ssthresh = get_ssthresh_low_min(); + snd_num = ntp->nip_ssthresh / mss_now; + ssthresh_dbg("new snd tstamp %u >= %u, ssthresh %u to %u, snd_num=%u", + tstamp, get_ack_to_nxt_snd_tstamp(), + last_nip_ssthresh, ntp->nip_ssthresh, snd_num); + } + } + + while ((skb = tcp_nip_send_head(sk)) && (snd_num--)) { + bool snd_wnd_ready; + + tcp_nip_init_tso_segs(skb, mss_now); + snd_wnd_ready = tcp_nip_snd_wnd_test(tp, skb, mss_now); + nip_dbg("%s, skb->len=%u", (snd_wnd_ready ? str[1] : str[0]), skb->len); + if (unlikely(!snd_wnd_ready)) + break; + + if (unlikely(tcp_nip_transmit_skb(sk, skb, 1, gfp))) + break; + + tcp_nip_event_new_data_sent(sk, skb); + + if (push_one) + break; + } + return !tp->packets_out && tcp_nip_send_head(sk); +} + +int tcp_nip_rtx_synack(const struct sock *sk, struct request_sock *req) +{ + const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific; + int res; + struct dst_entry *dst; + + dst = af_ops->route_req(sk, NULL, req); + tcp_rsk(req)->txhash = net_tx_rndhash(); + + res = af_ops->send_synack(sk, dst, NULL, req, NULL, TCP_SYNACK_NORMAL, NULL); + + return res; +} + +static void tcp_nip_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tp->packets_out -= decr; +} + +int __tcp_nip_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) +{ + struct tcp_sock *tp = tcp_sk(sk); + unsigned int cur_mss; + int len, err; + + if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { + if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { + WARN_ON_ONCE(1); + return -EINVAL; + } + if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) + return -ENOMEM; + } + + cur_mss = tcp_nip_current_mss(sk); + + if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) && + TCP_SKB_CB(skb)->seq != tp->snd_una) + return -EAGAIN; + + len = cur_mss * segs; + if (skb->len > len) { + if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, skb, len, cur_mss, GFP_ATOMIC)) + return -ENOMEM; /* We'll try again later. */ + } else { + int diff = tcp_skb_pcount(skb); + + tcp_nip_set_skb_tso_segs(skb, cur_mss); + diff -= tcp_skb_pcount(skb); + if (diff) + tcp_nip_adjust_pcount(sk, skb, diff); + } + + err = tcp_nip_transmit_skb(sk, skb, 1, GFP_ATOMIC); + if (likely(!err)) { + segs = tcp_skb_pcount(skb); + + tp->total_retrans += segs; + } + return err; +} + +int tcp_nip_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) +{ + struct tcp_sock *tp = tcp_sk(sk); + int err = __tcp_nip_retransmit_skb(sk, skb, segs); + + if (err == 0) { + TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; + tp->retrans_out += tcp_skb_pcount(skb); + + /* Save stamp of the first retransmit. */ + if (!tp->retrans_stamp) + tp->retrans_stamp = tcp_skb_timestamp(skb); + } else if (err != -EBUSY) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); + } + + return err; +} + +#define TCP_NIP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ + (1UL << TCP_NIP_WRITE_TIMER_DEFERRED) | \ + (1UL << TCP_NIP_DELACK_TIMER_DEFERRED) | \ + (1UL << TCP_MTU_REDUCED_DEFERRED)) + +void tcp_nip_release_cb(struct sock *sk) +{ + unsigned long flags, nflags; + + /* perform an atomic operation only if at least one flag is set */ + do { + flags = sk->sk_tsq_flags; + if (!(flags & TCP_NIP_DEFERRED_ALL)) + return; + nflags = flags & ~TCP_NIP_DEFERRED_ALL; + } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); + + sock_release_ownership(sk); + if (flags & (1UL << TCP_NIP_WRITE_TIMER_DEFERRED)) { + tcp_nip_write_timer_handler(sk); + __sock_put(sk); + } + if (flags & (1UL << TCP_NIP_DELACK_TIMER_DEFERRED)) { + tcp_nip_delack_timer_handler(sk); + __sock_put(sk); + } + if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { + inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); + __sock_put(sk); + } +} + +enum nip_probe_type { + NIP_PROBE0 = 0, + NIP_KEEPALIVE = 1, + NIP_UNKNOWN = 2, + NIP_PROBE_MAX, +}; + +static int tcp_nip_xmit_probe_skb(struct sock *sk, int urgent, int mib) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + int ret; + int probe_type; + const char *str[NIP_PROBE_MAX] = {"probe0", "keepalive", "unknown"}; + + if (mib == LINUX_MIB_TCPWINPROBE) + probe_type = NIP_PROBE0; + else if (mib == LINUX_MIB_TCPKEEPALIVE) + probe_type = NIP_KEEPALIVE; + else + probe_type = NIP_UNKNOWN; + + /* We don't queue it, tcp_transmit_skb() sets ownership. */ + skb = alloc_skb(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); + if (!skb) + return -1; + + /* Reserve space for headers and set control bits. */ + skb_reserve(skb, MAX_TCP_HEADER); + + tcp_nip_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); + + NET_INC_STATS(sock_net(sk), mib); + ret = tcp_nip_transmit_skb(sk, skb, 0, (__force gfp_t)0); + nip_dbg("send %s probe packet, ret=%d", str[probe_type], ret); + return ret; +} + +int tcp_nip_write_wakeup(struct sock *sk, int mib) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + + if (sk->sk_state == TCP_CLOSE) { + nip_dbg("no probe0 when tcp close"); + return -1; + } + + skb = tcp_nip_send_head(sk); + /* If the serial number of the next packet is in the sending window */ + if (skb && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { + int err; + unsigned int mss = tcp_nip_current_mss(sk); + unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; + + if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) + tp->pushed_seq = TCP_SKB_CB(skb)->end_seq; + /* If the current window size is not enough to send a complete packet */ + if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq) { + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; + err = tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, + skb, seg_size, mss, GFP_ATOMIC); + if (err) { + nip_dbg("tcp_fragment return err=%d", err); + return -1; + } + } + err = tcp_nip_transmit_skb(sk, skb, 1, GFP_ATOMIC); + if (!err) + tcp_nip_event_new_data_sent(sk, skb); + nip_dbg("transmit skb %s", (!err ? "ok" : "fail")); + return err; + } else { + return tcp_nip_xmit_probe_skb(sk, 0, mib); + } +} + +/* The 0 window probe packet is sent */ +void tcp_nip_send_probe0(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); + unsigned long when; + /* An ACK packet with snd_UNa-1 and length 0 is sent as a zero-window detection packet */ + int err = tcp_nip_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); + + /* If there are packets to be sent on the network and no packets to be + * sent in the send queue, the packet is returned directly + */ + if (tp->packets_out || !tcp_nip_send_head(sk)) { + /* Cancel probe timer, if it is not required. */ + nip_dbg("packets_out(%u) not 0 or send_head is NULL, cancel probe0 timer", + tp->packets_out); + icsk->icsk_probes_out = 0; + icsk->icsk_backoff = 0; + return; + } + + /* Err: 0 succeeded, -1 failed */ + icsk->icsk_probes_out++; /* Number of probes +1 */ + if (err <= 0) { + if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) + icsk->icsk_backoff++; + when = tcp_probe0_when(sk, TCP_RTO_MAX); + nip_dbg("probe0 %s, probes_out=%u, probe0_base=%lu, icsk_backoff=%u, when=%lu", + (!err ? "send ok" : "send fail"), icsk->icsk_probes_out, + tcp_probe0_base(sk), icsk->icsk_backoff, when); + } else { + /* Makes the zero window probe timer time out faster */ + when = TCP_RESOURCE_PROBE_INTERVAL; + nip_dbg("probe0 not sent due to local congestion, make timer out faster"); + } + + nip_dbg("restart probe0 timer, when=%lu, icsk_backoff=%u, probe_max=%u", + when, icsk->icsk_backoff, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX); +} diff --git a/newip/src/linux-5.10/net/newip/tcp_nip_parameter.c b/newip/src/linux-5.10/net/newip/tcp_nip_parameter.c new file mode 100644 index 0000000000000000000000000000000000000000..9b24e26b9d9b5ba43e96b98e5bebd77dc181de19 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/tcp_nip_parameter.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the NewIP parameter module. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include +#include +#include + +/*********************************************************************************************/ +/* Newip protocol name */ +/*********************************************************************************************/ +int g_af_ninet = AF_NINET; +module_param_named(af_ninet, g_af_ninet, int, 0444); + +/*********************************************************************************************/ +/* Rto timeout timer period (HZ/n) */ +/*********************************************************************************************/ +/* RTT RTO in the small-delay scenario */ +int g_nip_rto = 5; +module_param_named(nip_rto, g_nip_rto, int, 0644); + +int get_nip_rto(void) +{ + return g_nip_rto; +} + +/*********************************************************************************************/ +/* TCP sending and receiving buffer configuration */ +/*********************************************************************************************/ +int g_nip_sndbuf = 1050000; // 1M +module_param_named(nip_sndbuf, g_nip_sndbuf, int, 0644); + +int get_nip_sndbuf(void) +{ + return g_nip_sndbuf; +} + +int g_nip_rcvbuf = 2000000; // 2M +module_param_named(nip_rcvbuf, g_nip_rcvbuf, int, 0644); + +int get_nip_rcvbuf(void) +{ + return g_nip_rcvbuf; +} + +/*********************************************************************************************/ +/* Window configuration */ +/*********************************************************************************************/ +/* Maximum receiving window */ +bool g_wscale_enable = 1; +module_param_named(wscale_enable, g_wscale_enable, bool, 0644); + +bool get_wscale_enable(void) +{ + return g_wscale_enable; +} + +/* Window scale configuration, 2^n */ +int g_wscale = 7; +module_param_named(wscale, g_wscale, int, 0644); + +int get_wscale(void) +{ + return g_wscale; +} + +/*********************************************************************************************/ +/* Enables the debugging of special scenarios */ +/*********************************************************************************************/ +/* After receiving n packets, an ACK packet is sent */ +int g_ack_num = 5; +module_param_named(ack_num, g_ack_num, int, 0644); + +int get_ack_num(void) +{ + return g_ack_num; +} + +/* Reset the packet sending window threshold after receiving n ACK packets */ +int g_nip_ssthresh_reset = 10000000; // 10M +module_param_named(nip_ssthresh_reset, g_nip_ssthresh_reset, int, 0644); + +int get_nip_ssthresh_reset(void) +{ + return g_nip_ssthresh_reset; +} + +/*********************************************************************************************/ +/* Retransmission parameters after ACK */ +/*********************************************************************************************/ +/* Three DUP ACK packets indicates the number of retransmission packets */ +int g_dup_ack_retrans_num = 5; +module_param_named(dup_ack_retrans_num, g_dup_ack_retrans_num, int, 0644); + +int get_dup_ack_retrans_num(void) +{ + return g_dup_ack_retrans_num; +} + +/* Common ACK Indicates the number of retransmissions */ +int g_ack_retrans_num = 5; +module_param_named(ack_retrans_num, g_ack_retrans_num, int, 0644); + +int get_ack_retrans_num(void) +{ + return g_ack_retrans_num; +} + +int g_dup_ack_snd_max = 6; +module_param_named(dup_ack_snd_max, g_dup_ack_snd_max, int, 0644); + +int get_dup_ack_snd_max(void) +{ + return g_dup_ack_snd_max; +} + +/*********************************************************************************************/ +/* RTT timestamp parameters */ +/*********************************************************************************************/ +int g_rtt_tstamp_rto_up = 100; // rtt_tstamp >= 100 ==> shorten rto +module_param_named(rtt_tstamp_rto_up, g_rtt_tstamp_rto_up, int, 0644); + +int get_rtt_tstamp_rto_up(void) +{ + return g_rtt_tstamp_rto_up; +} + +int g_rtt_tstamp_high = 30; // rtt_tstamp >= 30 ==> ssthresh = 100K +module_param_named(rtt_tstamp_high, g_rtt_tstamp_high, int, 0644); + +int get_rtt_tstamp_high(void) +{ + return g_rtt_tstamp_high; +} + +int g_rtt_tstamp_mid_high = 20; // rtt_tstamp >= 20 ==> ssthresh = 250K +module_param_named(rtt_tstamp_mid_high, g_rtt_tstamp_mid_high, int, 0644); + +int get_rtt_tstamp_mid_high(void) +{ + return g_rtt_tstamp_mid_high; +} + +/* rtt_tstamp >= 10 ==> ssthresh = 1M (500K ~ 1M) + * rtt_tstamp < 10 ==> ssthresh = 1.5M + */ +int g_rtt_tstamp_mid_low = 10; +module_param_named(rtt_tstamp_mid_low, g_rtt_tstamp_mid_low, int, 0644); + +int get_rtt_tstamp_mid_low(void) +{ + return g_rtt_tstamp_mid_low; +} + +int g_ack_to_nxt_snd_tstamp = 500; +module_param_named(ack_to_nxt_snd_tstamp, g_ack_to_nxt_snd_tstamp, int, 0644); + +int get_ack_to_nxt_snd_tstamp(void) +{ + return g_ack_to_nxt_snd_tstamp; +} + +/*********************************************************************************************/ +/* Window threshold parameters */ +/*********************************************************************************************/ +bool g_ssthresh_enable = 1; +module_param_named(ssthresh_enable, g_ssthresh_enable, bool, 0644); + +bool get_ssthresh_enable(void) +{ + return g_ssthresh_enable; +} + +int g_nip_ssthresh_default = 300000; // 300K +module_param_named(nip_ssthresh_default, g_nip_ssthresh_default, int, 0644); + +int get_nip_ssthresh_default(void) +{ + return g_nip_ssthresh_default; +} + +int g_ssthresh_high = 1500000; // rtt_tstamp < 10 ==> ssthresh = 1.5M +module_param_named(ssthresh_high, g_ssthresh_high, int, 0644); + +int get_ssthresh_high(void) +{ + return g_ssthresh_high; +} + +int g_ssthresh_mid_high = 1000000; // rtt_tstamp >= 10 ==> ssthresh = 1M (500K ~ 1M) +module_param_named(ssthresh_mid_high, g_ssthresh_mid_high, int, 0644); + +int get_ssthresh_mid_high(void) +{ + return g_ssthresh_mid_high; +} + +int g_ssthresh_mid_low = 250000; // rtt_tstamp >= 20 ==> ssthresh = 250K +module_param_named(ssthresh_mid_low, g_ssthresh_mid_low, int, 0644); + +int get_ssthresh_mid_low(void) +{ + return g_ssthresh_mid_low; +} + +int g_ssthresh_low = 100000; // rtt_tstamp >= 30 ==> ssthresh = 100K +module_param_named(ssthresh_low, g_ssthresh_low, int, 0644); + +int get_ssthresh_low(void) +{ + return g_ssthresh_low; +} + +int g_ssthresh_low_min = 10000; // rtt_tstamp >= 100 ==> ssthresh = 10K +module_param_named(ssthresh_low_min, g_ssthresh_low_min, int, 0644); + +int get_ssthresh_low_min(void) +{ + return g_ssthresh_low_min; +} + +int g_ssthresh_high_step = 1; +module_param_named(ssthresh_high_step, g_ssthresh_high_step, int, 0644); + +int get_ssthresh_high_step(void) +{ + return g_ssthresh_high_step; +} + +/*********************************************************************************************/ +/* keepalive parameters */ +/*********************************************************************************************/ +int g_nip_idle_ka_probes_out = 20; +module_param_named(nip_idle_ka_probes_out, g_nip_idle_ka_probes_out, int, 0644); + +int get_nip_idle_ka_probes_out(void) +{ + return g_nip_idle_ka_probes_out; +} + +int g_nip_keepalive_time = 25; +module_param_named(nip_keepalive_time, g_nip_keepalive_time, int, 0644); + +int get_nip_keepalive_time(void) +{ + return g_nip_keepalive_time; +} + +int g_nip_keepalive_intvl = 25; +module_param_named(nip_keepalive_intvl, g_nip_keepalive_intvl, int, 0644); + +int get_nip_keepalive_intvl(void) +{ + return g_nip_keepalive_intvl; +} + +/*********************************************************************************************/ +/* probe parameters */ +/*********************************************************************************************/ +int g_nip_probe_max = 2000; +module_param_named(nip_probe_max, g_nip_probe_max, int, 0644); + +int get_nip_probe_max(void) +{ + return g_nip_probe_max; +} + +/*********************************************************************************************/ +/* window mode parameters */ +/*********************************************************************************************/ +bool g_nip_tcp_snd_win_enable; +module_param_named(nip_tcp_snd_win_enable, g_nip_tcp_snd_win_enable, bool, 0644); + +bool get_nip_tcp_snd_win_enable(void) +{ + return g_nip_tcp_snd_win_enable; +} + +bool g_nip_tcp_rcv_win_enable = true; +module_param_named(nip_tcp_rcv_win_enable, g_nip_tcp_rcv_win_enable, bool, 0644); + +bool get_nip_tcp_rcv_win_enable(void) +{ + return g_nip_tcp_rcv_win_enable; +} + +/*********************************************************************************************/ +/* nip debug parameters */ +/*********************************************************************************************/ +/* Debugging for control DEBUG */ +bool g_nip_debug; +module_param_named(nip_debug, g_nip_debug, bool, 0644); + +bool get_nip_debug(void) +{ + return g_nip_debug; +} + +/* Debugging of threshold change */ +bool g_rtt_ssthresh_debug; +module_param_named(rtt_ssthresh_debug, g_rtt_ssthresh_debug, bool, 0644); + +bool get_rtt_ssthresh_debug(void) +{ + return g_rtt_ssthresh_debug; +} + +/* Debugging of packet retransmission after ACK */ +bool g_ack_retrans_debug; +module_param_named(ack_retrans_debug, g_ack_retrans_debug, bool, 0644); + +bool get_ack_retrans_debug(void) +{ + return g_ack_retrans_debug; +} + diff --git a/newip/src/linux-5.10/net/newip/tcp_nip_parameter.h b/newip/src/linux-5.10/net/newip/tcp_nip_parameter.h new file mode 100644 index 0000000000000000000000000000000000000000..a9cea6cdb8a25d94763e0f396e305e634a78e429 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/tcp_nip_parameter.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the NewIP parameter module. + */ +#ifndef _TCP_NIP_PARAMETER_H +#define _TCP_NIP_PARAMETER_H + +int get_nip_rto(void); +int get_nip_sndbuf(void); +int get_nip_rcvbuf(void); +bool get_wscale_enable(void); +int get_wscale(void); +int get_ack_num(void); +int get_nip_ssthresh_reset(void); +int get_dup_ack_retrans_num(void); +int get_ack_retrans_num(void); +int get_dup_ack_snd_max(void); +int get_rtt_tstamp_rto_up(void); +int get_rtt_tstamp_high(void); +int get_rtt_tstamp_mid_high(void); +int get_rtt_tstamp_mid_low(void); +int get_ack_to_nxt_snd_tstamp(void); +bool get_ssthresh_enable(void); +int get_nip_ssthresh_default(void); +int get_ssthresh_high(void); +int get_ssthresh_mid_high(void); +int get_ssthresh_mid_low(void); +int get_ssthresh_low(void); +int get_ssthresh_low_min(void); +int get_ssthresh_high_step(void); +int get_nip_idle_ka_probes_out(void); +int get_nip_keepalive_time(void); +int get_nip_keepalive_intvl(void); +int get_nip_probe_max(void); +bool get_nip_tcp_snd_win_enable(void); +bool get_nip_tcp_rcv_win_enable(void); +bool get_nip_debug(void); +bool get_rtt_ssthresh_debug(void); +bool get_ack_retrans_debug(void); + +/*********************************************************************************************/ +/* nip debug parameters */ +/*********************************************************************************************/ +#define nip_dbg(fmt, ...) \ +do { \ + if (get_nip_debug()) \ + pr_crit(fmt, ##__VA_ARGS__); \ +} while (0) + +/* Debugging of threshold change */ +#define ssthresh_dbg(fmt, ...) \ +do { \ + if (get_rtt_ssthresh_debug()) \ + pr_crit(fmt, ##__VA_ARGS__); \ +} while (0) + +/* Debugging of packet retransmission after ACK */ +#define retrans_dbg(fmt, ...) \ +do { \ + if (get_ack_retrans_debug()) \ + pr_crit(fmt, ##__VA_ARGS__); \ +} while (0) + +#endif /* _TCP_NIP_PARAMETER_H */ diff --git a/newip/src/linux-5.10/net/newip/tcp_nip_timer.c b/newip/src/linux-5.10/net/newip/tcp_nip_timer.c new file mode 100644 index 0000000000000000000000000000000000000000..ad913505a7c31a236e68f44fcf2d0f4a3b939c3b --- /dev/null +++ b/newip/src/linux-5.10/net/newip/tcp_nip_timer.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Implementation of the Transmission Control Protocol(TCP). + * + * Based on net/ipv4/tcp_timer.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include "tcp_nip_parameter.h" + +/** + * tcp_nip_orphan_retries() - Returns maximal number of retries on an orphaned socket + * @sk: Pointer to the current socket. + * @alive: bool, socket alive state + */ +static int tcp_nip_orphan_retries(struct sock *sk, bool alive) +{ + int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */ + + /* We know from an ICMP that something is wrong. */ + if (sk->sk_err_soft && !alive) + retries = 0; + + /* However, if socket sent something recently, select some safe + * number of retries. 8 corresponds to >100 seconds with minimal + * RTO of 200msec. + */ + if (retries == 0 && alive) + retries = 8; + return retries; +} + +void tcp_nip_delack_timer_handler(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || + !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + goto out; + + if (time_after(icsk->icsk_ack.timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + goto out; + } + icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; + + if (inet_csk_ack_scheduled(sk)) { + icsk->icsk_ack.ato = TCP_ATO_MIN; + tcp_mstamp_refresh(tcp_sk(sk)); + tcp_nip_send_ack(sk); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); + } + +out:; +} + +static void tcp_nip_write_err(struct sock *sk) +{ + sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; + sk->sk_error_report(sk); + /* Releasing TCP Resources */ + tcp_nip_done(sk); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); +} + +static void tcp_nip_delack_timer(struct timer_list *t) +{ + struct inet_connection_sock *icsk = + from_timer(icsk, t, icsk_delack_timer); + struct sock *sk = &icsk->icsk_inet.sk; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { + tcp_nip_delack_timer_handler(sk); + } else { + __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); + /* deleguate our work to tcp_release_cb() */ + if (!test_and_set_bit(TCP_NIP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) + sock_hold(sk); + } + bh_unlock_sock(sk); + sock_put(sk); +} + +static bool retransmits_nip_timed_out(struct sock *sk, + unsigned int boundary, + unsigned int timeout, + bool syn_set) +{ + /* Newip does not support the calculation of the timeout period based on the timestamp. + * Currently, it determines whether the timeout period is based on + * the retransmission times + */ + nip_dbg("icsk->retransmits=%u, boundary=%u", + inet_csk(sk)->icsk_retransmits, boundary); + return inet_csk(sk)->icsk_retransmits > boundary; +} + +static int tcp_nip_write_timeout(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct net *net = sock_net(sk); + int retry_until; + bool syn_set = false; + + if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { + retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + syn_set = true; + } else { + retry_until = net->ipv4.sysctl_tcp_retries2; + if (sock_flag(sk, SOCK_DEAD)) { + const bool alive = icsk->icsk_rto < TCP_RTO_MAX; + + /* In the case of SOCK_DEAD, the retry_until value is smaller */ + retry_until = tcp_nip_orphan_retries(sk, alive); + } + } + + if (retransmits_nip_timed_out(sk, retry_until, + syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) { + nip_dbg("tcp retransmit time out"); + tcp_nip_write_err(sk); + return 1; + } + return 0; +} + +void tcp_nip_retransmit_timer(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + struct inet_connection_sock *icsk = inet_csk(sk); + struct sk_buff *skb = tcp_write_queue_head(sk); + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + struct net *net = sock_net(sk); + u32 icsk_rto_last; + + if (!tp->packets_out) + return; + + if (tcp_nip_write_queue_empty(sk)) + return; + + tp->tlp_high_seq = 0; + + if (tcp_nip_write_timeout(sk)) + return; + + if (tcp_nip_retransmit_skb(sk, skb, 1) > 0) { + if (!icsk->icsk_retransmits) + icsk->icsk_retransmits = 1; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), + TCP_RTO_MAX); + + nip_dbg("seq %u retransmit fail, win=%u, rto=%u, pkt_out=%u, icsk_backoff=%u", + scb->seq, ntp->nip_ssthresh, + icsk->icsk_rto, tp->packets_out, icsk->icsk_backoff); + return; + } + + if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) + icsk->icsk_backoff++; + icsk->icsk_retransmits++; + + icsk_rto_last = icsk->icsk_rto; + /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is + * used to reset timer, set to 0. Recalculate 'icsk_rto' as this + * might be increased if the stream oscillates between thin and thick, + * thus the old value might already be too high compared to the value + * set by 'tcp_set_rto' in tcp_input.c which resets the rto without + * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating + * exponential backoff behaviour to avoid continue hammering + * linear-timeout retransmissions into a black hole + */ + if (sk->sk_state == TCP_ESTABLISHED && + (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) && + tcp_stream_is_thin(tp) && + icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { + icsk->icsk_backoff = 0; + icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); + } else { + /* Use normal (exponential) backoff */ + icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + } + + nip_dbg("seq %u, win[%u-%u] rto[%u-%u] pkt_out=%u, icsk_backoff=%u, retransmits=%u", + scb->seq, ntp->nip_ssthresh, get_ssthresh_low(), + icsk_rto_last, icsk->icsk_rto, tp->packets_out, icsk->icsk_backoff, + icsk->icsk_retransmits); + + ntp->nip_ssthresh = get_ssthresh_low(); + + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); +} + +void tcp_nip_probe_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + int max_probes; + int icsk_backoff; + int icsk_probes_out; + + if (tp->packets_out || !tcp_nip_send_head(sk)) { + icsk->icsk_probes_out = 0; + icsk->icsk_probes_tstamp = 0; + icsk->icsk_backoff = 0; /* V4 no modified this line */ + nip_dbg("packets_out(%u) not 0 or send_head is NULL, cancel probe0 timer", + tp->packets_out); + return; + } + + /* default: sock_net(sk)->ipv4.sysctl_tcp_retries2 */ + max_probes = get_nip_probe_max(); /* fix session auto close */ + + if (sock_flag(sk, SOCK_DEAD)) { + const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; + + max_probes = tcp_nip_orphan_retries(sk, alive); + nip_dbg("sock dead, icsk_backoff=%u, max_probes=%u, alive=%u", + icsk->icsk_backoff, max_probes, alive); + if (!alive && icsk->icsk_backoff >= max_probes) { + nip_dbg("will close session, icsk_backoff=%u, max_probes=%u", + icsk->icsk_backoff, max_probes); + goto abort; + } + } + + if (icsk->icsk_probes_out >= max_probes) { +abort: icsk_backoff = icsk->icsk_backoff; + icsk_probes_out = icsk->icsk_probes_out; + nip_dbg("close session, probes_out=%u, icsk_backoff=%u, max_probes=%u", + icsk_probes_out, icsk_backoff, max_probes); + tcp_nip_write_err(sk); + } else { + icsk_backoff = icsk->icsk_backoff; + icsk_probes_out = icsk->icsk_probes_out; + nip_dbg("will send probe0, probes_out=%u, icsk_backoff=%u, max_probes=%u", + icsk_probes_out, icsk_backoff, max_probes); + /* Only send another probe if we didn't close things up. */ + tcp_nip_send_probe0(sk); + } +} + +void tcp_nip_write_timer_handler(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + int event; + + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || !icsk->icsk_pending) + return; + + if (time_after(icsk->icsk_timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + return; + } + tcp_mstamp_refresh(tcp_sk(sk)); + event = icsk->icsk_pending; + + switch (event) { + case ICSK_TIME_RETRANS: + icsk->icsk_pending = 0; + tcp_nip_retransmit_timer(sk); + break; + case ICSK_TIME_PROBE0: + icsk->icsk_pending = 0; + tcp_nip_probe_timer(sk); + break; + default: + break; + } +} + +static void tcp_nip_write_timer(struct timer_list *t) +{ + struct inet_connection_sock *icsk = + from_timer(icsk, t, icsk_retransmit_timer); + struct sock *sk = &icsk->icsk_inet.sk; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { + tcp_nip_write_timer_handler(sk); + } else { + /* delegate our work to tcp_release_cb() */ + if (!test_and_set_bit(TCP_NIP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags)) + sock_hold(sk); + } + bh_unlock_sock(sk); + sock_put(sk); +} + +static bool tcp_nip_keepalive_is_timeout(struct sock *sk, u32 elapsed) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + u32 keepalive_time = keepalive_time_when(tp); + bool is_timeout = false; + + /* keepalive set by setsockopt */ + if (keepalive_time > HZ) { + /* If the TCP_USER_TIMEOUT option is enabled, use that + * to determine when to timeout instead. + */ + if ((icsk->icsk_user_timeout != 0 && + elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) && + ntp->nip_keepalive_out > 0) || + (icsk->icsk_user_timeout == 0 && + ntp->nip_keepalive_out >= keepalive_probes(tp))) { + nip_dbg("normal keepalive timeout, keepalive_out=%u", + ntp->nip_keepalive_out); + tcp_nip_write_err(sk); + is_timeout = true; + } + } + + return is_timeout; +} + +static void tcp_nip_keepalive_timer(struct timer_list *t) +{ + struct sock *sk = from_timer(sk, t, sk_timer); + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_nip_common *ntp = &tcp_nip_sk(sk)->common; + u32 elapsed; + + /* Only process if socket is not in use. */ + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + inet_csk_reset_keepalive_timer(sk, HZ / TCP_NIP_KEEPALIVE_CYCLE_MS_DIVISOR); + goto out; + } + + if (sk->sk_state == TCP_LISTEN) { + nip_dbg("keepalive on a LISTEN"); + goto out; + } + tcp_mstamp_refresh(tp); + /* 2022-02-18 + * NewIP TCP doesn't have TIME_WAIT state, so socket in TCP_CLOSING + * uses keepalive timer to release socket. + */ + if ((sk->sk_state == TCP_FIN_WAIT2 || sk->sk_state == TCP_CLOSING) && + sock_flag(sk, SOCK_DEAD)) { + nip_dbg("finish wait, close sock, sk_state=%u", sk->sk_state); + goto death; + } + + if (!sock_flag(sk, SOCK_KEEPOPEN) || + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT))) + goto out; + + elapsed = keepalive_time_when(tp); + + /* It is alive without keepalive 8) */ + if (tp->packets_out || !tcp_write_queue_empty(sk)) + goto resched; + + elapsed = keepalive_time_elapsed(tp); + if (elapsed >= keepalive_time_when(tp)) { + if (tcp_nip_keepalive_is_timeout(sk, elapsed)) + goto out; + + if (tcp_nip_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { + ntp->nip_keepalive_out++; + ntp->idle_ka_probes_out++; + elapsed = keepalive_intvl_when(tp); + } else { + /* If keepalive was lost due to local congestion, + * try harder. + */ + elapsed = TCP_RESOURCE_PROBE_INTERVAL; + } + } else { + /* It is tp->rcv_tstamp + keepalive_time_when(tp) */ + elapsed = keepalive_time_when(tp) - elapsed; + } + + sk_mem_reclaim(sk); + +resched: + inet_csk_reset_keepalive_timer(sk, elapsed); + goto out; + +death: + tcp_nip_done(sk); + +out: + tcp_nip_keepalive_disable(sk); + bh_unlock_sock(sk); + sock_put(sk); +} + +void tcp_nip_init_xmit_timers(struct sock *sk) +{ + inet_csk_init_xmit_timers(sk, &tcp_nip_write_timer, &tcp_nip_delack_timer, + &tcp_nip_keepalive_timer); +} + +void tcp_nip_clear_xmit_timers(struct sock *sk) +{ + inet_csk_clear_xmit_timers(sk); +} diff --git a/newip/src/linux-5.10/net/newip/udp.c b/newip/src/linux-5.10/net/newip/udp.c new file mode 100644 index 0000000000000000000000000000000000000000..276019b5944d11f73c722f6e08f75bb94c5910e9 --- /dev/null +++ b/newip/src/linux-5.10/net/newip/udp.c @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * + * NewIP INET + * An implementation of the TCP/IP protocol suite for the LINUX + * operating system. NewIP INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * The User Datagram Protocol (NewIP UDP). + * + * Based on net/ipv4/udp.c + * Based on net/ipv6/udp.c + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": [%s:%d] " fmt, __func__, __LINE__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nip_hdr.h" +#include "nip_checksum.h" +#include "tcp_nip_parameter.h" + +static u32 nip_udp_portaddr_hash(const struct net *net, + const struct nip_addr *niaddr, + u_short port) +{ + u32 hash; + u32 mix = net_hash_mix(net); + + /* use nip_addr_hash() to obtain a hash result of nip_addr */ + hash = jhash_1word(nip_addr_hash(niaddr), mix); + + return hash ^ port; +} + +/* Called during the bind & sendto procedure, bind ports */ +int nip_udp_get_port(struct sock *sk, unsigned short snum) +{ + unsigned int hash2_nulladdr, hash2_partial; + + hash2_nulladdr = nip_udp_portaddr_hash(sock_net(sk), &nip_any_addr, snum); + /* hash2_partial is the hash result of nip_addr only */ + hash2_partial = nip_udp_portaddr_hash(sock_net(sk), &sk->sk_nip_rcv_saddr, 0); + + /* precompute partial secondary hash */ + udp_sk(sk)->udp_portaddr_hash = hash2_partial; + return udp_lib_get_port(sk, snum, hash2_nulladdr); +} + +static int nip_udp_compute_score(struct sock *sk, struct net *net, + const struct nip_addr *saddr, __be16 sport, + const struct nip_addr *daddr, unsigned short hnum, + int dif, int sdif) +{ + int score = 0; + struct inet_sock *inet; + + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_NINET) + return -1; + + /* Destination port of the peer device + * In the header sent by the peer end, it is the source port + */ + inet = inet_sk(sk); + if (inet->inet_dport) { + if (inet->inet_dport != sport) + return -1; + score++; + } + + /* Source ADDRESS of the local device + * In the header sent by the peer device, it is the destination address + */ + if (!nip_addr_any(&sk->sk_nip_rcv_saddr)) { + if (!nip_addr_eq(&sk->sk_nip_rcv_saddr, daddr)) + return -1; + score++; + } + + /* Address of the peer device + * In the packet header sent by the peer device, is the source ADDRESS + */ + if (!nip_addr_any(&sk->sk_nip_daddr)) { + if (!nip_addr_eq(&sk->sk_nip_daddr, saddr)) + return -1; + score++; + } + + /* Check the dev index */ + if (sk->sk_bound_dev_if) { + bool dev_match = dif == sk->sk_bound_dev_if || sdif == sk->sk_bound_dev_if; + + if (!dev_match) + return -1; + score++; + } + + if (sk->sk_incoming_cpu == raw_smp_processor_id()) + score++; + return score; +} + +static struct sock *nip_udp_lib_lookup2(struct net *net, + const struct nip_addr *saddr, + u_short sport, + const struct nip_addr *daddr, + unsigned short hnum, + int dif, int sdif, + struct udp_hslot *hslot2, + struct sk_buff *skb) +{ + struct sock *sk; + struct sock *result = NULL; + int badness = -1; + + udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { + int score = nip_udp_compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); + + if (score > badness) { + result = sk; + badness = score; + } + } + return result; +} + +/* rcu_read_lock() must be held */ +struct sock *__nip_udp_lib_lookup(struct net *net, + const struct nip_addr *saddr, __be16 sport, + const struct nip_addr *daddr, __be16 dport, + int dif, int sdif, struct udp_table *udptable, + struct sk_buff *skb) +{ + unsigned short hnum = ntohs(dport); + unsigned int hash2, slot2; + struct udp_hslot *hslot2; + struct sock *result; + + hash2 = nip_udp_portaddr_hash(net, daddr, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + + /* Lookup connected or non-wildcard sockets */ + result = nip_udp_lib_lookup2(net, saddr, sport, + daddr, hnum, dif, sdif, + hslot2, skb); + if (!IS_ERR_OR_NULL(result)) + goto done; + + /* Lookup wildcard sockets */ + hash2 = nip_udp_portaddr_hash(net, &nip_any_addr, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + + result = nip_udp_lib_lookup2(net, saddr, sport, + &nip_any_addr, hnum, dif, sdif, + hslot2, skb); +done: + if (IS_ERR(result)) + return NULL; + return result; +} + +static struct sock *__nip_udp_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport, + struct udp_table *udptable) +{ + return __nip_udp_lib_lookup(dev_net(skb->dev), + &NIPCB(skb)->srcaddr, sport, + &NIPCB(skb)->dstaddr, dport, skb->skb_iif, + 0, udptable, skb); +} + +void udp_table_del(struct sock *sk) +{ + udp_lib_unhash(sk); +} + +int nip_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) +{ + struct sk_buff *skb; + unsigned int ulen, copied; + int peeking, off, datalen; + int err; + + off = sk_peek_offset(sk, flags); + peeking = off; /* Fetch the SKB from the queue */ + skb = __skb_recv_udp(sk, flags, noblock, &off, &err); + if (!skb) + return err; + ulen = skb->len; + copied = len; + if (copied > ulen - off) + copied = ulen - off; + else if (copied < ulen) + msg->msg_flags |= MSG_TRUNC; + + /* copy data */ + datalen = copy_to_iter(skb->data, copied, &msg->msg_iter); + if (datalen < 0) { + nip_dbg("copy to iter in failure, len=%d", datalen); + err = -EFAULT; + return err; + } + + sock_recv_ts_and_drops(msg, sk, skb); + /* Update information such as the timestamp received + * by the last datagram in the transport control block + */ + /* copy the address */ + if (msg->msg_name) { + DECLARE_SOCKADDR(struct sockaddr_nin *, sin, msg->msg_name); + + sin->sin_family = AF_NINET; + sin->sin_port = udp_hdr(skb)->source; + sin->sin_addr = NIPCB(skb)->srcaddr; + *addr_len = sizeof(*sin); + } + + err = copied; + if (flags & MSG_TRUNC) + err = ulen; + + skb_consume_udp(sk, skb, peeking ? -err : err); + return err; +} + +static void nip_udp_err(struct sk_buff *skb, + struct ninet_skb_parm *opt, + u8 type, + u8 code, int offset, + __be32 info) +{ +} + +static int __nip_udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int rc; + + sk_incoming_cpu_update(sk); + + rc = __udp_enqueue_schedule_skb(sk, skb); + if (rc < 0) { + kfree_skb(skb); + return -1; + } + return 0; +} + +bool nip_get_udp_input_checksum(struct sk_buff *skb) +{ + struct nip_pseudo_header nph = {0}; + struct udphdr *udphead = udp_hdr(skb); + unsigned short check_len = ntohs(udphead->len); + + nph.nexthdr = NIPCB(skb)->nexthdr; + nph.saddr = NIPCB(skb)->srcaddr; + nph.daddr = NIPCB(skb)->dstaddr; + nph.check_len = udphead->len; + + return nip_check_sum_parse(skb_transport_header(skb), check_len, &nph) + == 0xffff ? true : false; +} + +/* Udp packets are received at the network layer */ +int nip_udp_input(struct sk_buff *skb) +{ + struct sock *sk; + int rc = 0; + struct udphdr *udphead = udp_hdr(skb); + + if (!nip_get_udp_input_checksum(skb)) { + nip_dbg("checksum failed, drop the packet"); + kfree_skb(skb); + rc = -1; + goto end; + } + + sk = __nip_udp_lib_lookup_skb(skb, udphead->source, + udphead->dest, &udp_table); + if (!sk) { + nip_dbg("dport not match, drop the packet. sport=%u, dport=%u, data_len=%u", + ntohs(udphead->source), ntohs(udphead->dest), ntohs(udphead->len)); + kfree_skb(skb); + rc = -1; + goto end; + } + + skb_pull(skb, sizeof(struct udphdr)); + skb->len = ntohs(udphead->len) - sizeof(struct udphdr); + + skb_dst_drop(skb); + /* enqueue */ + rc = __nip_udp_queue_rcv_skb(sk, skb); +end: + return rc; +} + +int nip_udp_output(struct sock *sk, struct msghdr *msg, size_t len) +{ + DECLARE_SOCKADDR(struct sockaddr_nin *, sin, msg->msg_name); + struct flow_nip fln; + u_short sport, dport; + struct dst_entry *dst; + int err = 0; + struct inet_sock *inet; + + if (!sin) + /* Currently, udp socket Connect function is not implemented. + * The destination address and port must be directly provided by Sendto + */ + return -EDESTADDRREQ; + + if (sin->sin_family != AF_NINET) { + nip_dbg("sin_family false"); + return -EAFNOSUPPORT; + } + if (nip_addr_invalid(&sin->sin_addr)) { + nip_dbg("sin_addr false"); + return -EFAULT; + } + + inet = inet_sk(sk); + /* Destination address, port (network order) must be specified when sendto */ + dport = sin->sin_port; + fln.daddr = sin->sin_addr; + sport = htons(inet->inet_num); + + /* Check the dev index */ + fln.flowin_oif = sk->sk_bound_dev_if; + + /* Query the route & Obtain the Saddr */ + dst = nip_sk_dst_lookup_flow(sk, &fln); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; + goto out; + } + + err = _nip_udp_output(sk, msg, len, + sizeof(struct udphdr), &fln.saddr, + sport, &fln.daddr, + dport, dst); + +out: + dst_release(dst); + if (!err) + return len; + + return err; +} + +/* Close the connection using */ +void nip_udp_destroy_sock(struct sock *sk) +{ + udp_table_del(sk); + ninet_destroy_sock(sk); +} + +/* socket option code for udp */ +int nip_udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return 0; + return nip_setsockopt(sk, level, optname, optval, optlen); +} + +int nip_udp_getsockopt(struct sock *sk, int level, + int optname, char __user *optval, + int __user *optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return 0; + return nip_getsockopt(sk, level, optname, optval, optlen); +} + +static const struct ninet_protocol nip_udp_protocol = { + .handler = nip_udp_input, + .err_handler = nip_udp_err, + .flags = 0, +}; + +/* Newip Udp related operations */ +struct proto nip_udp_prot = { + .name = "nip_udp", + .owner = THIS_MODULE, + .close = udp_lib_close, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .init = udp_init_sock, + .destroy = nip_udp_destroy_sock, + .setsockopt = nip_udp_setsockopt, + .getsockopt = nip_udp_getsockopt, + .sendmsg = nip_udp_output, + .recvmsg = nip_udp_recvmsg, + .backlog_rcv = __nip_udp_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = nip_udp_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .obj_size = sizeof(struct nip_udp_sock), + .h.udp_table = &udp_table, + .diag_destroy = udp_abort, +}; + +/* Example Create newip socket information */ +static struct inet_protosw nip_udp_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_UDP, + .prot = &nip_udp_prot, + .ops = &ninet_dgram_ops, + .flags = INET_PROTOSW_PERMANENT, +}; + +/* Af_NINET initializes the call */ +int __init nip_udp_init(void) +{ + int ret; + + ret = ninet_add_protocol(&nip_udp_protocol, IPPROTO_UDP); + if (ret) + goto out; + + ret = ninet_register_protosw(&nip_udp_protosw); + if (ret) + goto out_nip_udp_protocol; +out: + return ret; + +out_nip_udp_protocol: + ninet_del_protocol(&nip_udp_protocol, IPPROTO_UDP); + goto out; +} + +void nip_udp_exit(void) +{ + ninet_unregister_protosw(&nip_udp_protosw); + ninet_del_protocol(&nip_udp_protocol, IPPROTO_UDP); +} diff --git a/newip/tools/wireshark_cfg_for_newip.lua b/newip/tools/wireshark_cfg_for_newip.lua new file mode 100644 index 0000000000000000000000000000000000000000..f9f9fab6e1337a1f0fc8371785d9323fbb9aa76e --- /dev/null +++ b/newip/tools/wireshark_cfg_for_newip.lua @@ -0,0 +1,327 @@ +require "bit32" +--[[ +SPDX-License-Identifier: GPL-2.0-or-later +Copyright (c) 2022 Huawei Device Co., Ltd. +Function : wireshark lua configure for NewIP protocol stack +Author : yangyanjun +Edit Date : 2022/5/27 +--]] + +do -- lua begin + +--协议名称为NewIP,在Packet Details窗格显示为NewIP +-- create a new protocol +local nip_proto_name = "NewIP" +local nip_proto_desc = "NewIP Protocol" +local nip_proto_obj = Proto(nip_proto_name, nip_proto_desc) + +--[[ +NewIP协议字段定义 + ProtoField 参数: + para1 [必选] - 字段的缩写名称(过滤器中使用的字符串) + para2 [可选] - 字段的实际名称(出现在树中的字符串) + para3 [可选] - 字段类型 +--]] +--[[ +ProtoField.{type}(abbr, [name], [base], [valuestring], [mask], [desc]) +·type包括:uint8, uint16, uint24, uint32, uint64, framenum, float, double, string, stringz, bytes, bool, ipv4, ipv6, ether,oid, guid + +abbr 字段的缩写名称(过滤器中使用的字符串)。 +name (optional) 字段的实际名称(出现在树中的字符串)。 +base (optional) base.DEC,base.HEX或base.OCT,base.DEC_HEX,base.HEX_DEC,base.UNIT_STRING或base.RANGE_STRING。 +valuestring (optional) 包含与值对应的文本的表,或包含与值 ({min, max, “string”}) 对应的范围字符串值表的表(如果基数为 )base.RANGE_STRING, + 或包含单位名称的表如果 base 是base.UNIT_STRING. +mask (optional) 此字段的整数掩码。 +desc (optional) 字段说明。 + +--]] +local _ttl = ProtoField.uint8 (nip_proto_name .. ".ttl", "ttl ( 1 Byte)", base.DEC) +local _total_len = ProtoField.uint16(nip_proto_name .. ".total_len", "total_len ( 2 Byte)", base.DEC) +local _nexthdr = ProtoField.uint8 (nip_proto_name .. ".nexthdr", "nexthdr ( 1 Byte)", base.DEC) +local _daddr = ProtoField.bytes (nip_proto_name .. ".daddr", "daddr (1~8 Byte)", base.SPACE) +local _saddr = ProtoField.bytes (nip_proto_name .. ".saddr", "saddr (1~8 Byte)", base.SPACE) +local _hdr_len = ProtoField.uint8 (nip_proto_name .. ".hdr_len", "hdr_len ( 1 Byte)", base.DEC) +local _trans_data = ProtoField.bytes (nip_proto_name .. ".trans_data", "trans_data", base.SPACE) + +-- 将字段添加都协议中 +nip_proto_obj.fields = { + _ttl, + _total_len, + _nexthdr, + _daddr, + _saddr, + _hdr_len, + _trans_data +} +--获取 _trans_data 解析器 +local _unknown_data_dis = Dissector.get("data") + +--定义 bitmap1 子菜单 +-- create a new protocol +local bitmap1_name = "bitmap1" +local bitmap1_desc = "bitmap1" +local bitmap1_obj = Proto(bitmap1_name, bitmap1_desc) + +--[[ +bitmap1 子菜单字段定义 + ProtoField 参数: + para1 [必选] - 字段的缩写名称(过滤器中使用的字符串) + para2 [可选] - 字段的实际名称(出现在树中的字符串) + para3 [可选] - 字段类型 +--]] +local _bitmap1 = ProtoField.uint8(bitmap1_name .. ".bitmap1", "bitmap1", base.HEX) +local _pkt_hdr_type = ProtoField.uint8(bitmap1_name .. ".pkt_hdr_type", "pkt_hdr_type ", base.DEC, Payload_type, 0x80) --_bitmap1的8bit +local _include_ttl = ProtoField.uint8(bitmap1_name .. ".include_ttl", "include_ttl ", base.DEC, Payload_type, 0x40) --_bitmap1的7bit +local _include_total_len = ProtoField.uint8(bitmap1_name .. ".include_total_len", "include_total_len", base.DEC, Payload_type, 0x20) --_bitmap1的6bit +local _include_nexthdr = ProtoField.uint8(bitmap1_name .. ".include_nexthdr", "include_nexthdr ", base.DEC, Payload_type, 0x10) --_bitmap1的5bit +local _include_reserve1 = ProtoField.uint8(bitmap1_name .. ".include_reserve1", "include_reserve1 ", base.DEC, Payload_type, 0x08) --_bitmap1的4bit +local _include_daddr = ProtoField.uint8(bitmap1_name .. ".include_daddr", "include_daddr ", base.DEC, Payload_type, 0x04) --_bitmap1的3bit +local _include_saddr = ProtoField.uint8(bitmap1_name .. ".include_saddr", "include_saddr ", base.DEC, Payload_type, 0x02) --_bitmap1的2bit +local _include_bitmap2 = ProtoField.uint8(bitmap1_name .. ".include_bitmap2", "include_bitmap2 ", base.DEC, Payload_type, 0x01) --_bitmap1的1bit + +-- 将字段添加都协议中 +bitmap1_obj.fields = { + _bitmap1, _pkt_hdr_type, _include_ttl, _include_total_len, _include_nexthdr, + _include_reserve1, _include_daddr, _include_saddr, _include_bitmap2 +} + +--定义 bitmap2 子菜单 +-- create a new protocol +local bitmap2_name = "bitmap2" +local bitmap2_desc = "bitmap2" +local bitmap2_obj = Proto(bitmap2_name, bitmap2_desc) + +--[[ +bitmap2 子菜单字段定义 + ProtoField 参数: + para1 [必选] - 字段的缩写名称(过滤器中使用的字符串) + para2 [可选] - 字段的实际名称(出现在树中的字符串) + para3 [可选] - 字段类型 +--]] +local _bitmap2 = ProtoField.uint8(bitmap2_name .. ".bitmap2", "bitmap2", base.HEX) +local _include_hdr_len = ProtoField.uint8(bitmap2_name .. ".include_hdr_len", "include_hdr_len ", base.DEC, Payload_type, 0x80) --_bitmap2的8bit +local _include_reserve2 = ProtoField.uint8(bitmap2_name .. ".include_reserve2", "include_reserve2", base.DEC, Payload_type, 0x40) --_bitmap2的7bit +local _include_reserve3 = ProtoField.uint8(bitmap2_name .. ".include_reserve3", "include_reserve3", base.DEC, Payload_type, 0x20) --_bitmap2的6bit +local _include_reserve4 = ProtoField.uint8(bitmap2_name .. ".include_reserve4", "include_reserve4", base.DEC, Payload_type, 0x10) --_bitmap2的5bit +local _include_reserve5 = ProtoField.uint8(bitmap2_name .. ".include_reserve5", "include_reserve5", base.DEC, Payload_type, 0x08) --_bitmap2的4bit +local _include_reserve6 = ProtoField.uint8(bitmap2_name .. ".include_reserve6", "include_reserve6", base.DEC, Payload_type, 0x04) --_bitmap2的3bit +local _include_reserve7 = ProtoField.uint8(bitmap2_name .. ".include_reserve7", "include_reserve7", base.DEC, Payload_type, 0x02) --_bitmap2的2bit +local _include_bitmap3 = ProtoField.uint8(bitmap2_name .. ".include_bitmap3", "include_bitmap3 ", base.DEC, Payload_type, 0x01) --_bitmap2的1bit + +-- 将字段添加都协议中 +bitmap2_obj.fields = { + _bitmap2, _include_hdr_len, _include_reserve2, _include_reserve3, _include_reserve4, + _include_reserve5, _include_reserve6, _include_reserve7, _include_bitmap3 +} + +--定义 nd icmp 子菜单 +-- create a new protocol +local nd_icmp_name = "nd_icmp" +local nd_icmp_desc = "nd_icmp" +local nd_icmp_obj = Proto(nd_icmp_name, nd_icmp_desc) + +--[[ +nd_icmp 子菜单字段定义 + ProtoField 参数: + para1 [必选] - 字段的缩写名称(过滤器中使用的字符串) + para2 [可选] - 字段的实际名称(出现在树中的字符串) + para3 [可选] - 字段类型 +--]] +local _type = ProtoField.uint8 (nd_icmp_name .. ".type", "type ( 1 Byte)", base.DEC) +local _code = ProtoField.uint8 (nd_icmp_name .. ".code", "code ( 1 Byte)", base.DEC) +local _checksum = ProtoField.uint16(nd_icmp_name .. ".checksum", "checksum( 2 Byte)", base.HEX) +local _rs_daddr = ProtoField.bytes (nd_icmp_name .. ".rs_daddr", "rs_daddr(1~8 Byte)", base.SPACE) +local _mac_len = ProtoField.uint8 (nd_icmp_name .. ".mac_len", "mac_len ( 1 Byte)", base.DEC) +local _mac = ProtoField.bytes (nd_icmp_name .. ".mac", "mac ( 6 Byte)", base.SPACE) + +-- 将字段添加都协议中 +nd_icmp_obj.fields = { + _type, _code, _checksum, _rs_daddr, _mac_len, _mac +} + +--[[ + 下面定义 newip 解析器的主函数 + 第一个参数是 tvb 类型,表示的是需要此解析器解析的数据 + 第二个参数是 pinfo 类型,是协议解析树上的信息,包括 UI 上的显示 + 第三个参数是 treeitem 类型,表示上一级解析树 +--]] +function nip_dissector(tvb, pinfo, treeitem) + --设置一些 UI 上面的信息 + pinfo.cols.protocol:set(nip_proto_name) + pinfo.cols.info:set(nip_proto_desc) + + local offset = 0 + local tvb_len = tvb:len() + local nexthdr = 0 + + -- 在上一级解析树上创建 nip 的根节点 + local nip_tree = treeitem:add(nip_proto_obj, tvb:range(tvb_len)) + + local bitmap1 = tvb(offset, 1):uint() --表示从报文缓冲区0开始取1个字节 + local pkt_hdr_type = bit.band(bit.rshift(bitmap1, 7), 0x00000001) --右移 7 位 与 0x01 相与,获取 pkt_hdr_type 位 + local include_ttl = bit.band(bit.rshift(bitmap1, 6), 0x00000001) --右移 6 位 与 0x01 相与,获取 include_ttl 位 + local include_total_len = bit.band(bit.rshift(bitmap1, 5), 0x00000001) --右移 5 位 与 0x01 相与,获取 include_total_len 位 + local include_nexthdr = bit.band(bit.rshift(bitmap1, 4), 0x00000001) --右移 4 位 与 0x01 相与,获取 include_nexthdr 位 + local include_daddr = bit.band(bit.rshift(bitmap1, 2), 0x00000001) --右移 2 位 与 0x01 相与,获取 include_daddr 位 + local include_saddr = bit.band(bit.rshift(bitmap1, 1), 0x00000001) --右移 1 位 与 0x01 相与,获取 include_saddr 位 + local include_bitmap2 = bit.band(bitmap1, 0x00000001) --获取 include_bitmap2 位 + + --nip报头无效(0表示有效) + if pkt_hdr_type ~= 0 then + return false + else + --bitmap1子菜单 + local bitmap1_tree = nip_tree:add(bitmap1_obj, tvb:range(tvb_len)) + bitmap1_tree:add(_bitmap1, bitmap1) + bitmap1_tree:add(_pkt_hdr_type, bitmap1):append_text(" (".."0-New IP; 1-Reserve"..")") + bitmap1_tree:add(_include_ttl, bitmap1) + bitmap1_tree:add(_include_total_len, bitmap1) + bitmap1_tree:add(_include_nexthdr, bitmap1) + bitmap1_tree:add(_include_reserve1, bitmap1) + bitmap1_tree:add(_include_daddr, bitmap1) + bitmap1_tree:add(_include_saddr, bitmap1) + bitmap1_tree:add(_include_bitmap2, bitmap1) + offset = offset + 1 --_bitmap1 占用1字节 + end + + local include_hdr_len = 0 + if include_bitmap2 ~= 0 then + --bitmap2子菜单 + local bitmap2_tree = nip_tree:add(bitmap2_obj, tvb:range(tvb_len)) + local bitmap2 = tvb(offset, 1):uint() + include_hdr_len = bit.band(bit.rshift(bitmap2, 7), 0x00000001) --右移 7 位 与 0x01 相与,获取 include_hdr_len 位 + offset = offset + 1 --_bitmap2 占用1字节 + + bitmap2_tree:add(_bitmap2, bitmap2) + bitmap2_tree:add(_include_hdr_len, bitmap2) + bitmap2_tree:add(_include_reserve2, bitmap2) + bitmap2_tree:add(_include_reserve3, bitmap2) + bitmap2_tree:add(_include_reserve4, bitmap2) + bitmap2_tree:add(_include_reserve5, bitmap2) + bitmap2_tree:add(_include_reserve6, bitmap2) + bitmap2_tree:add(_include_reserve7, bitmap2) + bitmap2_tree:add(_include_bitmap3, bitmap2) + end + + if include_ttl ~= 0 then + nip_tree:add(_ttl, tvb(offset, 1)) + offset = offset + 1 --_ttl 占用1字节 + end + + if include_total_len ~= 0 then + nip_tree:add(_total_len, tvb(offset, 2)) + offset = offset + 2 --_total_len 占用2字节 + end + + if include_nexthdr ~= 0 then + nexthdr = tvb(offset, 1):uint() + nip_tree:add(_nexthdr, tvb(offset, 1)) + offset = offset + 1 --_nexthdr 占用1字节 + end + + if include_daddr ~= 0 then + local first_addr = tvb(offset, 1):uint() + local addr_len = get_nip_addr_len (first_addr) + if addr_len == 0 then + return false + end + nip_tree:add(_daddr, tvb(offset, addr_len)) + offset = offset + addr_len --_daddr 占用 addr_len 字节 + end + + if include_saddr ~= 0 then + local first_addr = tvb(offset, 1):uint() + local addr_len = get_nip_addr_len (first_addr) + if addr_len == 0 then + return false + end + nip_tree:add(_saddr, tvb(offset, addr_len)) + offset = offset + addr_len --_daddr 占用 addr_len 字节 + end + + if include_hdr_len ~= 0 then + nip_tree:add(_hdr_len, tvb(offset, 1)) + offset = offset + 1 --_hdr_len 占用1字节 + end + + --根据next header 确定上层协议 + local trans_data = tvb(offset, tvb_len - offset) + if (nexthdr == 177) then + local nd_icmp_tree = nip_tree:add(nd_icmp_obj, tvb:range(tvb_len)) + local type = tvb(offset, 1):uint() + nd_icmp_tree:add(_type, tvb(offset, 1)) + offset = offset + 1 + nd_icmp_tree:add(_code, tvb(offset, 1)) + offset = offset + 1 + nd_icmp_tree:add(_checksum, tvb(offset, 1)) + offset = offset + 1 + if type == 1 then + local first_addr = tvb(offset, 1):uint() + local addr_len = get_nip_addr_len (first_addr) + if addr_len == 0 then + return false + end + nd_icmp_tree:add(_rs_daddr, tvb(offset, addr_len)) + offset = offset + addr_len --_rs_daddr 占用 addr_len 字节 + pinfo.cols.protocol = "ND request based NewIP" + else + nd_icmp_tree:add(_mac_len, tvb(offset, 1)) + offset = offset + 1 + nd_icmp_tree:add(_mac, tvb(offset, 6)) + offset = offset + 6 + pinfo.cols.protocol = "ND response based NewIP" + end + elseif (nexthdr == 6) then + Dissector.get("tcp"):call(trans_data:tvb(), pinfo, treeitem) + pinfo.cols.protocol = "TCP based NewIP" + elseif (nexthdr == 17) then + Dissector.get("udp"):call(trans_data:tvb(), pinfo, treeitem) + pinfo.cols.protocol = "UDP based NewIP" + else + nip_tree:add(_trans_data, trans_data) + end +end + +--[[ + 下面定义 NewIP 解析器的主函数,这个函数由 wireshark调用 + 第一个参数是 Tvb 类型,表示的是需要此解析器解析的数据 + 第二个参数是 Pinfo 类型,是协议解析树上的信息,包括 UI 上的显示 + 第三个参数是 TreeItem 类型,表示上一级解析树 +--]] +function nip_proto_obj.dissector(tvb, pinfo, treeitem) + if nip_dissector(tvb, pinfo, treeitem) then + --valid NewIP diagram + else + --不是NewIP协议(其他未知协议)时,直接输出报文数据 + _unknown_data_dis:call(tvb, pinfo, treeitem) + end + +end + + +--向 wireshark 注册协议插件被调用的条件 +local ipn_encap_table = DissectorTable.get("ethertype") +ipn_encap_table:add(0xEADD, nip_proto_obj) + +--NewIP地址长度计算 +function get_nip_addr_len (first_addr) + if first_addr <= 0xDC then + return 1 + elseif first_addr >= 0xDD and first_addr <= 0xF0 then + return 2 + elseif first_addr == 0xF1 then + return 3 + elseif first_addr == 0xF2 then + return 5 + elseif first_addr == 0xF3 then + return 7 + elseif first_addr == 0xFE then + return 8 + elseif first_addr == 0xFF then + return 2 + else + return 0 + end +end + +end -- lua end